├── .binder ├── postBuild └── requirements.txt ├── .circleci ├── artifact_path └── config.yml ├── .codecov.yml ├── .coveragerc ├── .git-blame-ignore-revs ├── .gitattributes ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── config.yml │ ├── doc_improvement.yml │ └── feature_request.yml ├── PULL_REQUEST_TEMPLATE.md ├── labeler-file-extensions.yml ├── labeler-module.yml ├── scripts │ └── label_title_regex.py └── workflows │ ├── assign.yml │ ├── check-changelog.yml │ ├── check-manifest.yml │ ├── label-blank-issue.yml │ ├── labeler-module.yml │ ├── labeler-title-regex.yml │ ├── publish_pypi.yml │ ├── twitter.yml │ ├── unassign.yml │ ├── update_tracking_issue.yml │ └── wheels.yml ├── .gitignore ├── .mailmap ├── .pre-commit-config.yaml ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── COPYING ├── MANIFEST.in ├── Makefile ├── README.rst ├── SECURITY.md ├── asv_benchmarks ├── .gitignore ├── asv.conf.json └── benchmarks │ ├── __init__.py │ ├── cluster.py │ ├── common.py │ ├── config.json │ ├── datasets.py │ ├── decomposition.py │ ├── ensemble.py │ ├── linear_model.py │ ├── manifold.py │ ├── metrics.py │ ├── model_selection.py │ ├── neighbors.py │ ├── svm.py │ └── utils.py ├── azure-pipelines.yml ├── benchmarks ├── .gitignore ├── bench_20newsgroups.py ├── bench_covertype.py ├── bench_feature_expansions.py ├── bench_glm.py ├── bench_glmnet.py ├── bench_hist_gradient_boosting.py ├── bench_hist_gradient_boosting_adult.py ├── bench_hist_gradient_boosting_categorical_only.py ├── bench_hist_gradient_boosting_higgsboson.py ├── bench_hist_gradient_boosting_threading.py ├── bench_isolation_forest.py ├── bench_isotonic.py ├── bench_kernel_pca_solvers_time_vs_n_components.py ├── bench_kernel_pca_solvers_time_vs_n_samples.py ├── bench_lasso.py ├── bench_lof.py ├── bench_mnist.py ├── bench_multilabel_metrics.py ├── bench_online_ocsvm.py ├── bench_plot_fastkmeans.py ├── bench_plot_hierarchical.py ├── bench_plot_incremental_pca.py ├── bench_plot_lasso_path.py ├── bench_plot_neighbors.py ├── bench_plot_nmf.py ├── bench_plot_omp_lars.py ├── bench_plot_parallel_pairwise.py ├── bench_plot_polynomial_kernel_approximation.py ├── bench_plot_randomized_svd.py ├── bench_plot_svd.py ├── bench_plot_ward.py ├── bench_random_projections.py ├── bench_rcv1_logreg_convergence.py ├── bench_saga.py ├── bench_sample_without_replacement.py ├── bench_sgd_regression.py ├── bench_sparsify.py ├── bench_text_vectorizers.py ├── bench_tree.py ├── bench_tsne_mnist.py └── plot_tsne_mnist.py ├── build_tools ├── Makefile ├── azure │ ├── install.sh │ ├── install_win.sh │ ├── posix-docker.yml │ ├── posix.yml │ ├── test_docs.sh │ ├── test_pytest_soft_dependency.sh │ ├── test_script.sh │ ├── upload_codecov.sh │ └── windows.yml ├── circle │ ├── build_doc.sh │ ├── build_test_arm.sh │ ├── build_test_pypy.sh │ ├── checkout_merge_commit.sh │ ├── linting.sh │ ├── list_versions.py │ └── push_doc.sh ├── codespell_ignore_words.txt ├── generate_authors_table.py ├── github │ ├── Windows │ ├── build_minimal_windows_image.sh │ ├── build_source.sh │ ├── build_wheels.sh │ ├── check_build_trigger.sh │ ├── check_wheels.py │ ├── repair_windows_wheels.sh │ ├── test_source.sh │ ├── test_wheels.sh │ ├── test_windows_wheels.sh │ ├── upload_anaconda.sh │ └── vendor.py ├── shared.sh └── travis │ ├── after_success.sh │ ├── install.sh │ ├── install_main.sh │ ├── install_wheels.sh │ ├── script.sh │ ├── test_docs.sh │ ├── test_script.sh │ └── test_wheels.sh ├── conftest.py ├── doc ├── Makefile ├── README.md ├── about.rst ├── authors.rst ├── authors_emeritus.rst ├── binder │ └── requirements.txt ├── common_pitfalls.rst ├── communication_team.rst ├── computing.rst ├── computing │ ├── computational_performance.rst │ ├── parallelism.rst │ └── scaling_strategies.rst ├── conf.py ├── conftest.py ├── contents.rst ├── data_transforms.rst ├── datasets.rst ├── datasets │ ├── loading_other_datasets.rst │ ├── real_world.rst │ ├── sample_generators.rst │ └── toy_dataset.rst ├── developers │ ├── advanced_installation.rst │ ├── bug_triaging.rst │ ├── contributing.rst │ ├── develop.rst │ ├── index.rst │ ├── maintainer.rst │ ├── minimal_reproducer.rst │ ├── performance.rst │ ├── plotting.rst │ ├── tips.rst │ └── utilities.rst ├── faq.rst ├── getting_started.rst ├── glossary.rst ├── governance.rst ├── images │ ├── axa-small.png │ ├── axa.png │ ├── bcg-small.png │ ├── bcg.png │ ├── bnp-small.png │ ├── bnp.png │ ├── cds-logo.png │ ├── columbia-small.png │ ├── columbia.png │ ├── czi_logo.svg │ ├── dataiku-small.png │ ├── dataiku.png │ ├── digicosme.png │ ├── dysco.png │ ├── fnrs-logo-small.png │ ├── fujitsu-small.png │ ├── fujitsu.png │ ├── google-small.png │ ├── grid_search_cross_validation.png │ ├── grid_search_workflow.png │ ├── huggingface_logo-noborder.png │ ├── inria-logo.jpg │ ├── inria-small.png │ ├── intel-small.png │ ├── intel.png │ ├── iris.pdf │ ├── iris.svg │ ├── last_digit.png │ ├── lda_model_graph.png │ ├── logo_APHP.png │ ├── logo_APHP_text.png │ ├── microsoft-small.png │ ├── microsoft.png │ ├── ml_map.png │ ├── multi_org_chart.png │ ├── multilayerperceptron_network.png │ ├── no_image.png │ ├── nvidia-small.png │ ├── nvidia.png │ ├── nyu_short_color.png │ ├── plot_digits_classification.png │ ├── plot_face_recognition_1.png │ ├── plot_face_recognition_2.png │ ├── png-logo-inria-la-fondation.png │ ├── quansight-labs-small.png │ ├── quansight-labs.png │ ├── rbm_graph.png │ ├── scikit-learn-logo-notext.png │ ├── scikit-learn-logo-small.png │ ├── sloan_banner.png │ ├── sloan_logo-small.png │ ├── sydney-primary.jpeg │ ├── sydney-stacked-small.png │ ├── telecom-small.png │ ├── telecom.png │ └── visual-studio-build-tools-selection.png ├── includes │ ├── big_toc_css.rst │ └── bigger_toc_css.rst ├── inspection.rst ├── install.rst ├── logos │ ├── favicon.ico │ ├── identity.pdf │ ├── scikit-learn-logo-notext.png │ ├── scikit-learn-logo-small.png │ ├── scikit-learn-logo-thumb.png │ ├── scikit-learn-logo.bmp │ ├── scikit-learn-logo.png │ └── scikit-learn-logo.svg ├── make.bat ├── model_persistence.rst ├── model_selection.rst ├── modules │ ├── biclustering.rst │ ├── calibration.rst │ ├── classes.rst │ ├── clustering.rst │ ├── compose.rst │ ├── covariance.rst │ ├── cross_decomposition.rst │ ├── cross_validation.rst │ ├── decomposition.rst │ ├── density.rst │ ├── ensemble.rst │ ├── feature_extraction.rst │ ├── feature_selection.rst │ ├── gaussian_process.rst │ ├── glm_data │ │ ├── lasso_enet_coordinate_descent.png │ │ └── poisson_gamma_tweedie_distributions.png │ ├── grid_search.rst │ ├── impute.rst │ ├── isotonic.rst │ ├── kernel_approximation.rst │ ├── kernel_ridge.rst │ ├── lda_qda.rst │ ├── learning_curve.rst │ ├── linear_model.rst │ ├── manifold.rst │ ├── metrics.rst │ ├── mixture.rst │ ├── model_evaluation.rst │ ├── multiclass.rst │ ├── naive_bayes.rst │ ├── neighbors.rst │ ├── neural_networks_supervised.rst │ ├── neural_networks_unsupervised.rst │ ├── outlier_detection.rst │ ├── partial_dependence.rst │ ├── permutation_importance.rst │ ├── pipeline.rst │ ├── preprocessing.rst │ ├── preprocessing_targets.rst │ ├── random_projection.rst │ ├── semi_supervised.rst │ ├── sgd.rst │ ├── svm.rst │ ├── tree.rst │ └── unsupervised_reduction.rst ├── preface.rst ├── presentations.rst ├── related_projects.rst ├── roadmap.rst ├── sphinxext │ ├── MANIFEST.in │ ├── add_toctree_functions.py │ ├── custom_references_resolver.py │ ├── doi_role.py │ ├── github_link.py │ └── sphinx_issues.py ├── supervised_learning.rst ├── support.rst ├── templates │ ├── class.rst │ ├── class_with_call.rst │ ├── deprecated_class.rst │ ├── deprecated_class_with_call.rst │ ├── deprecated_class_without_init.rst │ ├── deprecated_function.rst │ ├── function.rst │ ├── generate_deprecated.sh │ ├── index.html │ ├── numpydoc_docstring.rst │ └── redirects.html ├── testimonials │ ├── README.txt │ ├── images │ │ ├── Makefile │ │ ├── aweber.png │ │ ├── bestofmedia-logo.png │ │ ├── betaworks.png │ │ ├── birchbox.jpg │ │ ├── bnp_paribas_cardif.png │ │ ├── booking.png │ │ ├── change-logo.png │ │ ├── dataiku_logo.png │ │ ├── datapublica.png │ │ ├── datarobot.png │ │ ├── evernote.png │ │ ├── howaboutwe.png │ │ ├── huggingface.png │ │ ├── infonea.jpg │ │ ├── inria.png │ │ ├── jpmorgan.png │ │ ├── lovely.png │ │ ├── machinalis.png │ │ ├── mars.png │ │ ├── okcupid.png │ │ ├── ottogroup_logo.png │ │ ├── peerindex.png │ │ ├── phimeca.png │ │ ├── rangespan.png │ │ ├── solido_logo.png │ │ ├── spotify.png │ │ ├── telecomparistech.jpg │ │ ├── yhat.png │ │ └── zopa.png │ └── testimonials.rst ├── themes │ └── scikit-learn-modern │ │ ├── javascript.html │ │ ├── layout.html │ │ ├── nav.html │ │ ├── search.html │ │ ├── static │ │ ├── css │ │ │ ├── theme.css │ │ │ └── vendor │ │ │ │ └── bootstrap.min.css │ │ └── js │ │ │ ├── searchtools.js │ │ │ └── vendor │ │ │ └── bootstrap.min.js │ │ └── theme.conf ├── triage_team.rst ├── tune_toc.rst ├── tutorial │ ├── basic │ │ └── tutorial.rst │ ├── common_includes │ │ └── info.txt │ ├── index.rst │ ├── machine_learning_map │ │ ├── ML_MAPS_README.txt │ │ ├── index.rst │ │ ├── parse_path.py │ │ ├── pyparsing.py │ │ └── svg2imagemap.py │ ├── statistical_inference │ │ ├── index.rst │ │ ├── model_selection.rst │ │ ├── putting_together.rst │ │ ├── settings.rst │ │ ├── supervised_learning.rst │ │ └── unsupervised_learning.rst │ └── text_analytics │ │ ├── .gitignore │ │ ├── data │ │ ├── languages │ │ │ └── fetch_data.py │ │ ├── movie_reviews │ │ │ └── fetch_data.py │ │ └── twenty_newsgroups │ │ │ └── fetch_data.py │ │ ├── skeletons │ │ ├── exercise_01_language_train_model.py │ │ └── exercise_02_sentiment.py │ │ ├── solutions │ │ ├── exercise_01_language_train_model.py │ │ ├── exercise_02_sentiment.py │ │ └── generate_skeletons.py │ │ └── working_with_text_data.rst ├── unsupervised_learning.rst ├── user_guide.rst ├── visualizations.rst ├── whats_new.rst └── whats_new │ ├── _contributors.rst │ ├── changelog_legend.inc │ ├── older_versions.rst │ ├── v0.13.rst │ ├── v0.14.rst │ ├── v0.15.rst │ ├── v0.16.rst │ ├── v0.17.rst │ ├── v0.18.rst │ ├── v0.19.rst │ ├── v0.20.rst │ ├── v0.21.rst │ ├── v0.22.rst │ ├── v0.23.rst │ ├── v0.24.rst │ ├── v1.0.rst │ └── v1.1.rst ├── examples ├── README.txt ├── applications │ ├── README.txt │ ├── plot_cyclical_feature_engineering.py │ ├── plot_digits_denoising.py │ ├── plot_face_recognition.py │ ├── plot_model_complexity_influence.py │ ├── plot_out_of_core_classification.py │ ├── plot_outlier_detection_wine.py │ ├── plot_prediction_latency.py │ ├── plot_species_distribution_modeling.py │ ├── plot_stock_market.py │ ├── plot_tomography_l1_reconstruction.py │ ├── plot_topics_extraction_with_nmf_lda.py │ ├── svm_gui.py │ └── wikipedia_principal_eigenvector.py ├── bicluster │ ├── README.txt │ ├── plot_bicluster_newsgroups.py │ ├── plot_spectral_biclustering.py │ └── plot_spectral_coclustering.py ├── calibration │ ├── README.txt │ ├── plot_calibration.py │ ├── plot_calibration_curve.py │ ├── plot_calibration_multiclass.py │ └── plot_compare_calibration.py ├── classification │ ├── README.txt │ ├── plot_classification_probability.py │ ├── plot_classifier_comparison.py │ ├── plot_digits_classification.py │ ├── plot_lda.py │ └── plot_lda_qda.py ├── cluster │ ├── README.txt │ ├── plot_adjusted_for_chance_measures.py │ ├── plot_affinity_propagation.py │ ├── plot_agglomerative_clustering.py │ ├── plot_agglomerative_clustering_metrics.py │ ├── plot_agglomerative_dendrogram.py │ ├── plot_birch_vs_minibatchkmeans.py │ ├── plot_cluster_comparison.py │ ├── plot_cluster_iris.py │ ├── plot_coin_segmentation.py │ ├── plot_coin_ward_segmentation.py │ ├── plot_color_quantization.py │ ├── plot_dbscan.py │ ├── plot_dict_face_patches.py │ ├── plot_digits_agglomeration.py │ ├── plot_digits_linkage.py │ ├── plot_face_compress.py │ ├── plot_feature_agglomeration_vs_univariate_selection.py │ ├── plot_inductive_clustering.py │ ├── plot_kmeans_assumptions.py │ ├── plot_kmeans_digits.py │ ├── plot_kmeans_plusplus.py │ ├── plot_kmeans_silhouette_analysis.py │ ├── plot_kmeans_stability_low_dim_dense.py │ ├── plot_linkage_comparison.py │ ├── plot_mean_shift.py │ ├── plot_mini_batch_kmeans.py │ ├── plot_optics.py │ ├── plot_segmentation_toy.py │ └── plot_ward_structured_vs_unstructured.py ├── compose │ ├── README.txt │ ├── plot_column_transformer.py │ ├── plot_column_transformer_mixed_types.py │ ├── plot_compare_reduction.py │ ├── plot_digits_pipe.py │ ├── plot_feature_union.py │ └── plot_transformed_target.py ├── covariance │ ├── README.txt │ ├── plot_covariance_estimation.py │ ├── plot_lw_vs_oas.py │ ├── plot_mahalanobis_distances.py │ ├── plot_robust_vs_empirical_covariance.py │ └── plot_sparse_cov.py ├── cross_decomposition │ ├── README.txt │ ├── plot_compare_cross_decomposition.py │ └── plot_pcr_vs_pls.py ├── datasets │ ├── README.txt │ ├── plot_digits_last_image.py │ ├── plot_iris_dataset.py │ ├── plot_random_dataset.py │ └── plot_random_multilabel_dataset.py ├── decomposition │ ├── README.txt │ ├── plot_beta_divergence.py │ ├── plot_faces_decomposition.py │ ├── plot_ica_blind_source_separation.py │ ├── plot_ica_vs_pca.py │ ├── plot_image_denoising.py │ ├── plot_incremental_pca.py │ ├── plot_kernel_pca.py │ ├── plot_pca_3d.py │ ├── plot_pca_iris.py │ ├── plot_pca_vs_fa_model_selection.py │ ├── plot_pca_vs_lda.py │ ├── plot_sparse_coding.py │ └── plot_varimax_fa.py ├── ensemble │ ├── README.txt │ ├── plot_adaboost_hastie_10_2.py │ ├── plot_adaboost_multiclass.py │ ├── plot_adaboost_regression.py │ ├── plot_adaboost_twoclass.py │ ├── plot_bias_variance.py │ ├── plot_ensemble_oob.py │ ├── plot_feature_transformation.py │ ├── plot_forest_importances.py │ ├── plot_forest_importances_faces.py │ ├── plot_forest_iris.py │ ├── plot_gradient_boosting_categorical.py │ ├── plot_gradient_boosting_early_stopping.py │ ├── plot_gradient_boosting_oob.py │ ├── plot_gradient_boosting_quantile.py │ ├── plot_gradient_boosting_regression.py │ ├── plot_gradient_boosting_regularization.py │ ├── plot_isolation_forest.py │ ├── plot_monotonic_constraints.py │ ├── plot_random_forest_embedding.py │ ├── plot_random_forest_regression_multioutput.py │ ├── plot_stack_predictors.py │ ├── plot_voting_decision_regions.py │ ├── plot_voting_probas.py │ └── plot_voting_regressor.py ├── exercises │ ├── README.txt │ ├── plot_cv_diabetes.py │ ├── plot_cv_digits.py │ ├── plot_digits_classification_exercise.py │ └── plot_iris_exercise.py ├── feature_selection │ ├── README.txt │ ├── plot_f_test_vs_mi.py │ ├── plot_feature_selection.py │ ├── plot_feature_selection_pipeline.py │ ├── plot_rfe_digits.py │ ├── plot_rfe_with_cross_validation.py │ └── plot_select_from_model_diabetes.py ├── gaussian_process │ ├── README.txt │ ├── plot_compare_gpr_krr.py │ ├── plot_gpc.py │ ├── plot_gpc_iris.py │ ├── plot_gpc_isoprobability.py │ ├── plot_gpc_xor.py │ ├── plot_gpr_co2.py │ ├── plot_gpr_noisy.py │ ├── plot_gpr_noisy_targets.py │ ├── plot_gpr_on_structured_data.py │ └── plot_gpr_prior_posterior.py ├── impute │ ├── README.txt │ ├── plot_iterative_imputer_variants_comparison.py │ └── plot_missing_values.py ├── inspection │ ├── README.txt │ ├── plot_linear_model_coefficient_interpretation.py │ ├── plot_partial_dependence.py │ ├── plot_permutation_importance.py │ └── plot_permutation_importance_multicollinear.py ├── kernel_approximation │ ├── README.txt │ └── plot_scalable_poly_kernels.py ├── linear_model │ ├── README.txt │ ├── plot_ard.py │ ├── plot_bayesian_ridge.py │ ├── plot_bayesian_ridge_curvefit.py │ ├── plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py │ ├── plot_huber_vs_ridge.py │ ├── plot_iris_logistic.py │ ├── plot_lasso_and_elasticnet.py │ ├── plot_lasso_coordinate_descent_path.py │ ├── plot_lasso_dense_vs_sparse_data.py │ ├── plot_lasso_lars.py │ ├── plot_lasso_lars_ic.py │ ├── plot_lasso_model_selection.py │ ├── plot_logistic.py │ ├── plot_logistic_l1_l2_sparsity.py │ ├── plot_logistic_multinomial.py │ ├── plot_logistic_path.py │ ├── plot_multi_task_lasso_support.py │ ├── plot_nnls.py │ ├── plot_ols.py │ ├── plot_ols_3d.py │ ├── plot_ols_ridge_variance.py │ ├── plot_omp.py │ ├── plot_poisson_regression_non_normal_loss.py │ ├── plot_polynomial_interpolation.py │ ├── plot_quantile_regression.py │ ├── plot_ransac.py │ ├── plot_ridge_coeffs.py │ ├── plot_ridge_path.py │ ├── plot_robust_fit.py │ ├── plot_sgd_comparison.py │ ├── plot_sgd_early_stopping.py │ ├── plot_sgd_iris.py │ ├── plot_sgd_loss_functions.py │ ├── plot_sgd_penalties.py │ ├── plot_sgd_separating_hyperplane.py │ ├── plot_sgd_weighted_samples.py │ ├── plot_sgdocsvm_vs_ocsvm.py │ ├── plot_sparse_logistic_regression_20newsgroups.py │ ├── plot_sparse_logistic_regression_mnist.py │ ├── plot_theilsen.py │ └── plot_tweedie_regression_insurance_claims.py ├── manifold │ ├── README.txt │ ├── plot_compare_methods.py │ ├── plot_lle_digits.py │ ├── plot_manifold_sphere.py │ ├── plot_mds.py │ ├── plot_swissroll.py │ └── plot_t_sne_perplexity.py ├── miscellaneous │ ├── README.txt │ ├── plot_anomaly_comparison.py │ ├── plot_changed_only_pprint_parameter.py │ ├── plot_display_object_visualization.py │ ├── plot_isotonic_regression.py │ ├── plot_johnson_lindenstrauss_bound.py │ ├── plot_kernel_approximation.py │ ├── plot_kernel_ridge_regression.py │ ├── plot_multilabel.py │ ├── plot_multioutput_face_completion.py │ ├── plot_partial_dependence_visualization_api.py │ ├── plot_pipeline_display.py │ └── plot_roc_curve_visualization_api.py ├── mixture │ ├── README.txt │ ├── plot_concentration_prior.py │ ├── plot_gmm.py │ ├── plot_gmm_covariances.py │ ├── plot_gmm_pdf.py │ ├── plot_gmm_selection.py │ └── plot_gmm_sin.py ├── model_selection │ ├── README.txt │ ├── grid_search_text_feature_extraction.py │ ├── plot_confusion_matrix.py │ ├── plot_cv_indices.py │ ├── plot_cv_predict.py │ ├── plot_det.py │ ├── plot_grid_search_digits.py │ ├── plot_grid_search_refit_callable.py │ ├── plot_grid_search_stats.py │ ├── plot_learning_curve.py │ ├── plot_multi_metric_evaluation.py │ ├── plot_nested_cross_validation_iris.py │ ├── plot_permutation_tests_for_classification.py │ ├── plot_precision_recall.py │ ├── plot_randomized_search.py │ ├── plot_roc.py │ ├── plot_roc_crossval.py │ ├── plot_successive_halving_heatmap.py │ ├── plot_successive_halving_iterations.py │ ├── plot_train_error_vs_test_error.py │ ├── plot_underfitting_overfitting.py │ └── plot_validation_curve.py ├── multioutput │ ├── README.txt │ └── plot_classifier_chain_yeast.py ├── neighbors │ ├── README.txt │ ├── approximate_nearest_neighbors.py │ ├── plot_caching_nearest_neighbors.py │ ├── plot_classification.py │ ├── plot_digits_kde_sampling.py │ ├── plot_kde_1d.py │ ├── plot_lof_novelty_detection.py │ ├── plot_lof_outlier_detection.py │ ├── plot_nca_classification.py │ ├── plot_nca_dim_reduction.py │ ├── plot_nca_illustration.py │ ├── plot_nearest_centroid.py │ ├── plot_regression.py │ └── plot_species_kde.py ├── neural_networks │ ├── README.txt │ ├── plot_mlp_alpha.py │ ├── plot_mlp_training_curves.py │ ├── plot_mnist_filters.py │ └── plot_rbm_logistic_classification.py ├── preprocessing │ ├── README.txt │ ├── plot_all_scaling.py │ ├── plot_discretization.py │ ├── plot_discretization_classification.py │ ├── plot_discretization_strategies.py │ ├── plot_map_data_to_normal.py │ └── plot_scaling_importance.py ├── release_highlights │ ├── README.txt │ ├── plot_release_highlights_0_22_0.py │ ├── plot_release_highlights_0_23_0.py │ ├── plot_release_highlights_0_24_0.py │ └── plot_release_highlights_1_0_0.py ├── semi_supervised │ ├── README.txt │ ├── plot_label_propagation_digits.py │ ├── plot_label_propagation_digits_active_learning.py │ ├── plot_label_propagation_structure.py │ ├── plot_self_training_varying_threshold.py │ ├── plot_semi_supervised_newsgroups.py │ └── plot_semi_supervised_versus_svm_iris.py ├── svm │ ├── README.txt │ ├── plot_custom_kernel.py │ ├── plot_iris_svc.py │ ├── plot_linearsvc_support_vectors.py │ ├── plot_oneclass.py │ ├── plot_rbf_parameters.py │ ├── plot_separating_hyperplane.py │ ├── plot_separating_hyperplane_unbalanced.py │ ├── plot_svm_anova.py │ ├── plot_svm_kernels.py │ ├── plot_svm_margin.py │ ├── plot_svm_nonlinear.py │ ├── plot_svm_regression.py │ ├── plot_svm_scale_c.py │ ├── plot_svm_tie_breaking.py │ └── plot_weighted_samples.py ├── text │ ├── README.txt │ ├── plot_document_classification_20newsgroups.py │ ├── plot_document_clustering.py │ └── plot_hashing_vs_dict_vectorizer.py └── tree │ ├── README.txt │ ├── plot_cost_complexity_pruning.py │ ├── plot_iris_dtc.py │ ├── plot_tree_regression.py │ ├── plot_tree_regression_multioutput.py │ └── plot_unveil_tree_structure.py ├── lgtm.yml ├── maint_tools ├── check_pxd_in_installation.py ├── sort_whats_new.py ├── update_tracking_issue.py └── whats_missing.sh ├── pyproject.toml ├── setup.cfg ├── setup.py └── sklearn ├── __check_build ├── __init__.py ├── _check_build.pyx └── setup.py ├── __init__.py ├── _build_utils ├── __init__.py ├── openmp_helpers.py └── pre_build_helpers.py ├── _config.py ├── _distributor_init.py ├── _isotonic.pyx ├── _loss ├── __init__.py ├── _loss.pxd ├── _loss.pyx.tp ├── glm_distribution.py ├── link.py ├── loss.py ├── setup.py └── tests │ ├── __init__.py │ ├── test_glm_distribution.py │ ├── test_link.py │ └── test_loss.py ├── _min_dependencies.py ├── base.py ├── calibration.py ├── cluster ├── __init__.py ├── _affinity_propagation.py ├── _agglomerative.py ├── _bicluster.py ├── _birch.py ├── _dbscan.py ├── _dbscan_inner.pyx ├── _feature_agglomeration.py ├── _hierarchical_fast.pyx ├── _k_means_common.pxd ├── _k_means_common.pyx ├── _k_means_elkan.pyx ├── _k_means_lloyd.pyx ├── _k_means_minibatch.pyx ├── _kmeans.py ├── _mean_shift.py ├── _optics.py ├── _spectral.py ├── setup.py └── tests │ ├── __init__.py │ ├── common.py │ ├── test_affinity_propagation.py │ ├── test_bicluster.py │ ├── test_birch.py │ ├── test_dbscan.py │ ├── test_feature_agglomeration.py │ ├── test_hierarchical.py │ ├── test_k_means.py │ ├── test_mean_shift.py │ ├── test_optics.py │ └── test_spectral.py ├── compose ├── __init__.py ├── _column_transformer.py ├── _target.py └── tests │ ├── __init__.py │ ├── test_column_transformer.py │ └── test_target.py ├── conftest.py ├── covariance ├── __init__.py ├── _elliptic_envelope.py ├── _empirical_covariance.py ├── _graph_lasso.py ├── _robust_covariance.py ├── _shrunk_covariance.py └── tests │ ├── __init__.py │ ├── test_covariance.py │ ├── test_elliptic_envelope.py │ ├── test_graphical_lasso.py │ └── test_robust_covariance.py ├── cross_decomposition ├── __init__.py ├── _pls.py └── tests │ ├── __init__.py │ └── test_pls.py ├── datasets ├── __init__.py ├── _arff_parser.py ├── _base.py ├── _california_housing.py ├── _covtype.py ├── _kddcup99.py ├── _lfw.py ├── _olivetti_faces.py ├── _openml.py ├── _rcv1.py ├── _samples_generator.py ├── _species_distributions.py ├── _svmlight_format_fast.pyx ├── _svmlight_format_io.py ├── _twenty_newsgroups.py ├── data │ ├── __init__.py │ ├── boston_house_prices.csv │ ├── breast_cancer.csv │ ├── diabetes_data_raw.csv.gz │ ├── diabetes_target.csv.gz │ ├── digits.csv.gz │ ├── iris.csv │ ├── linnerud_exercise.csv │ ├── linnerud_physiological.csv │ └── wine_data.csv ├── descr │ ├── __init__.py │ ├── boston_house_prices.rst │ ├── breast_cancer.rst │ ├── california_housing.rst │ ├── covtype.rst │ ├── diabetes.rst │ ├── digits.rst │ ├── iris.rst │ ├── kddcup99.rst │ ├── lfw.rst │ ├── linnerud.rst │ ├── olivetti_faces.rst │ ├── rcv1.rst │ ├── twenty_newsgroups.rst │ └── wine_data.rst ├── images │ ├── README.txt │ ├── __init__.py │ ├── china.jpg │ └── flower.jpg ├── setup.py └── tests │ ├── __init__.py │ ├── conftest.py │ ├── data │ ├── __init__.py │ ├── openml │ │ ├── __init__.py │ │ ├── id_1 │ │ │ ├── __init__.py │ │ │ ├── api-v1-jd-1.json.gz │ │ │ ├── api-v1-jdf-1.json.gz │ │ │ ├── api-v1-jdq-1.json.gz │ │ │ └── data-v1-dl-1.arff.gz │ │ ├── id_1119 │ │ │ ├── __init__.py │ │ │ ├── api-v1-jd-1119.json.gz │ │ │ ├── api-v1-jdf-1119.json.gz │ │ │ ├── api-v1-jdl-dn-adult-census-l-2-dv-1.json.gz │ │ │ ├── api-v1-jdl-dn-adult-census-l-2-s-act-.json.gz │ │ │ ├── api-v1-jdq-1119.json.gz │ │ │ └── data-v1-dl-54002.arff.gz │ │ ├── id_2 │ │ │ ├── __init__.py │ │ │ ├── api-v1-jd-2.json.gz │ │ │ ├── api-v1-jdf-2.json.gz │ │ │ ├── api-v1-jdl-dn-anneal-l-2-dv-1.json.gz │ │ │ ├── api-v1-jdl-dn-anneal-l-2-s-act-.json.gz │ │ │ ├── api-v1-jdq-2.json.gz │ │ │ └── data-v1-dl-1666876.arff.gz │ │ ├── id_292 │ │ │ ├── __init__.py │ │ │ ├── api-v1-jd-292.json.gz │ │ │ ├── api-v1-jd-40981.json.gz │ │ │ ├── api-v1-jdf-292.json.gz │ │ │ ├── api-v1-jdf-40981.json.gz │ │ │ ├── api-v1-jdl-dn-australian-l-2-dv-1-s-dact.json.gz │ │ │ ├── api-v1-jdl-dn-australian-l-2-dv-1.json.gz │ │ │ ├── api-v1-jdl-dn-australian-l-2-s-act-.json.gz │ │ │ └── data-v1-dl-49822.arff.gz │ │ ├── id_3 │ │ │ ├── __init__.py │ │ │ ├── api-v1-jd-3.json.gz │ │ │ ├── api-v1-jdf-3.json.gz │ │ │ ├── api-v1-jdq-3.json.gz │ │ │ └── data-v1-dl-3.arff.gz │ │ ├── id_40589 │ │ │ ├── __init__.py │ │ │ ├── api-v1-jd-40589.json.gz │ │ │ ├── api-v1-jdf-40589.json.gz │ │ │ ├── api-v1-jdl-dn-emotions-l-2-dv-3.json.gz │ │ │ ├── api-v1-jdl-dn-emotions-l-2-s-act-.json.gz │ │ │ ├── api-v1-jdq-40589.json.gz │ │ │ └── data-v1-dl-4644182.arff.gz │ │ ├── id_40675 │ │ │ ├── __init__.py │ │ │ ├── api-v1-jd-40675.json.gz │ │ │ ├── api-v1-jdf-40675.json.gz │ │ │ ├── api-v1-jdl-dn-glass2-l-2-dv-1-s-dact.json.gz │ │ │ ├── api-v1-jdl-dn-glass2-l-2-dv-1.json.gz │ │ │ ├── api-v1-jdl-dn-glass2-l-2-s-act-.json.gz │ │ │ ├── api-v1-jdq-40675.json.gz │ │ │ └── data-v1-dl-4965250.arff.gz │ │ ├── id_40945 │ │ │ ├── __init__.py │ │ │ ├── api-v1-jd-40945.json.gz │ │ │ ├── api-v1-jdf-40945.json.gz │ │ │ ├── api-v1-jdq-40945.json.gz │ │ │ └── data-v1-dl-16826755.arff.gz │ │ ├── id_40966 │ │ │ ├── __init__.py │ │ │ ├── api-v1-jd-40966.json.gz │ │ │ ├── api-v1-jdf-40966.json.gz │ │ │ ├── api-v1-jdl-dn-miceprotein-l-2-dv-4.json.gz │ │ │ ├── api-v1-jdl-dn-miceprotein-l-2-s-act-.json.gz │ │ │ ├── api-v1-jdq-40966.json.gz │ │ │ └── data-v1-dl-17928620.arff.gz │ │ ├── id_42585 │ │ │ ├── __init__.py │ │ │ ├── api-v1-jd-42585.json.gz │ │ │ ├── api-v1-jdf-42585.json.gz │ │ │ ├── api-v1-jdq-42585.json.gz │ │ │ └── data-v1-dl-21854866.arff.gz │ │ ├── id_561 │ │ │ ├── __init__.py │ │ │ ├── api-v1-jd-561.json.gz │ │ │ ├── api-v1-jdf-561.json.gz │ │ │ ├── api-v1-jdl-dn-cpu-l-2-dv-1.json.gz │ │ │ ├── api-v1-jdl-dn-cpu-l-2-s-act-.json.gz │ │ │ ├── api-v1-jdq-561.json.gz │ │ │ └── data-v1-dl-52739.arff.gz │ │ ├── id_61 │ │ │ ├── __init__.py │ │ │ ├── api-v1-jd-61.json.gz │ │ │ ├── api-v1-jdf-61.json.gz │ │ │ ├── api-v1-jdl-dn-iris-l-2-dv-1.json.gz │ │ │ ├── api-v1-jdl-dn-iris-l-2-s-act-.json.gz │ │ │ ├── api-v1-jdq-61.json.gz │ │ │ └── data-v1-dl-61.arff.gz │ │ └── id_62 │ │ │ ├── __init__.py │ │ │ ├── api-v1-jd-62.json.gz │ │ │ ├── api-v1-jdf-62.json.gz │ │ │ ├── api-v1-jdq-62.json.gz │ │ │ └── data-v1-dl-52352.arff.gz │ ├── svmlight_classification.txt │ ├── svmlight_invalid.txt │ ├── svmlight_invalid_order.txt │ └── svmlight_multilabel.txt │ ├── test_20news.py │ ├── test_base.py │ ├── test_california_housing.py │ ├── test_common.py │ ├── test_covtype.py │ ├── test_kddcup99.py │ ├── test_lfw.py │ ├── test_olivetti_faces.py │ ├── test_openml.py │ ├── test_rcv1.py │ ├── test_samples_generator.py │ └── test_svmlight_format.py ├── decomposition ├── __init__.py ├── _base.py ├── _cdnmf_fast.pyx ├── _dict_learning.py ├── _factor_analysis.py ├── _fastica.py ├── _incremental_pca.py ├── _kernel_pca.py ├── _lda.py ├── _nmf.py ├── _online_lda_fast.pyx ├── _pca.py ├── _sparse_pca.py ├── _truncated_svd.py ├── setup.py └── tests │ ├── __init__.py │ ├── test_dict_learning.py │ ├── test_factor_analysis.py │ ├── test_fastica.py │ ├── test_incremental_pca.py │ ├── test_kernel_pca.py │ ├── test_nmf.py │ ├── test_online_lda.py │ ├── test_pca.py │ ├── test_sparse_pca.py │ └── test_truncated_svd.py ├── discriminant_analysis.py ├── dummy.py ├── ensemble ├── __init__.py ├── _bagging.py ├── _base.py ├── _forest.py ├── _gb.py ├── _gb_losses.py ├── _gradient_boosting.pyx ├── _hist_gradient_boosting │ ├── __init__.py │ ├── _binning.pyx │ ├── _bitset.pxd │ ├── _bitset.pyx │ ├── _gradient_boosting.pyx │ ├── _predictor.pyx │ ├── binning.py │ ├── common.pxd │ ├── common.pyx │ ├── gradient_boosting.py │ ├── grower.py │ ├── histogram.pyx │ ├── predictor.py │ ├── splitting.pyx │ ├── tests │ │ ├── __init__.py │ │ ├── test_binning.py │ │ ├── test_bitset.py │ │ ├── test_compare_lightgbm.py │ │ ├── test_gradient_boosting.py │ │ ├── test_grower.py │ │ ├── test_histogram.py │ │ ├── test_monotonic_contraints.py │ │ ├── test_predictor.py │ │ ├── test_splitting.py │ │ └── test_warm_start.py │ └── utils.pyx ├── _iforest.py ├── _stacking.py ├── _voting.py ├── _weight_boosting.py ├── setup.py └── tests │ ├── __init__.py │ ├── test_bagging.py │ ├── test_base.py │ ├── test_common.py │ ├── test_forest.py │ ├── test_gradient_boosting.py │ ├── test_gradient_boosting_loss_functions.py │ ├── test_iforest.py │ ├── test_stacking.py │ ├── test_voting.py │ └── test_weight_boosting.py ├── exceptions.py ├── experimental ├── __init__.py ├── enable_halving_search_cv.py ├── enable_hist_gradient_boosting.py ├── enable_iterative_imputer.py └── tests │ ├── __init__.py │ ├── test_enable_hist_gradient_boosting.py │ ├── test_enable_iterative_imputer.py │ └── test_enable_successive_halving.py ├── externals ├── README ├── __init__.py ├── _arff.py ├── _lobpcg.py ├── _packaging │ ├── __init__.py │ ├── _structures.py │ └── version.py ├── _pilutil.py └── conftest.py ├── feature_extraction ├── __init__.py ├── _dict_vectorizer.py ├── _hash.py ├── _hashing_fast.pyx ├── _stop_words.py ├── image.py ├── setup.py ├── tests │ ├── __init__.py │ ├── test_dict_vectorizer.py │ ├── test_feature_hasher.py │ ├── test_image.py │ └── test_text.py └── text.py ├── feature_selection ├── __init__.py ├── _base.py ├── _from_model.py ├── _mutual_info.py ├── _rfe.py ├── _sequential.py ├── _univariate_selection.py ├── _variance_threshold.py └── tests │ ├── __init__.py │ ├── test_base.py │ ├── test_chi2.py │ ├── test_feature_select.py │ ├── test_from_model.py │ ├── test_mutual_info.py │ ├── test_rfe.py │ ├── test_sequential.py │ └── test_variance_threshold.py ├── gaussian_process ├── __init__.py ├── _gpc.py ├── _gpr.py ├── kernels.py └── tests │ ├── __init__.py │ ├── _mini_sequence_kernel.py │ ├── test_gpc.py │ ├── test_gpr.py │ └── test_kernels.py ├── impute ├── __init__.py ├── _base.py ├── _iterative.py ├── _knn.py └── tests │ ├── __init__.py │ ├── test_base.py │ ├── test_common.py │ ├── test_impute.py │ └── test_knn.py ├── inspection ├── __init__.py ├── _partial_dependence.py ├── _permutation_importance.py ├── _plot │ ├── __init__.py │ ├── partial_dependence.py │ └── tests │ │ ├── __init__.py │ │ └── test_plot_partial_dependence.py ├── setup.py └── tests │ ├── __init__.py │ ├── test_partial_dependence.py │ └── test_permutation_importance.py ├── isotonic.py ├── kernel_approximation.py ├── kernel_ridge.py ├── linear_model ├── __init__.py ├── _base.py ├── _bayes.py ├── _cd_fast.pyx ├── _coordinate_descent.py ├── _glm │ ├── __init__.py │ ├── glm.py │ ├── link.py │ └── tests │ │ ├── __init__.py │ │ ├── test_glm.py │ │ └── test_link.py ├── _huber.py ├── _least_angle.py ├── _linear_loss.py ├── _logistic.py ├── _omp.py ├── _passive_aggressive.py ├── _perceptron.py ├── _quantile.py ├── _ransac.py ├── _ridge.py ├── _sag.py ├── _sag_fast.pyx.tp ├── _sgd_fast.pxd ├── _sgd_fast.pyx ├── _sgd_fast_helpers.h ├── _stochastic_gradient.py ├── _theil_sen.py ├── setup.py └── tests │ ├── __init__.py │ ├── test_base.py │ ├── test_bayes.py │ ├── test_common.py │ ├── test_coordinate_descent.py │ ├── test_huber.py │ ├── test_least_angle.py │ ├── test_linear_loss.py │ ├── test_logistic.py │ ├── test_omp.py │ ├── test_passive_aggressive.py │ ├── test_perceptron.py │ ├── test_quantile.py │ ├── test_ransac.py │ ├── test_ridge.py │ ├── test_sag.py │ ├── test_sgd.py │ ├── test_sparse_coordinate_descent.py │ └── test_theil_sen.py ├── manifold ├── __init__.py ├── _barnes_hut_tsne.pyx ├── _isomap.py ├── _locally_linear.py ├── _mds.py ├── _spectral_embedding.py ├── _t_sne.py ├── _utils.pyx ├── setup.py └── tests │ ├── __init__.py │ ├── test_isomap.py │ ├── test_locally_linear.py │ ├── test_mds.py │ ├── test_spectral_embedding.py │ └── test_t_sne.py ├── metrics ├── __init__.py ├── _base.py ├── _classification.py ├── _dist_metrics.pxd ├── _dist_metrics.pyx ├── _pairwise_distances_reduction.pyx ├── _pairwise_fast.pyx ├── _plot │ ├── __init__.py │ ├── base.py │ ├── confusion_matrix.py │ ├── det_curve.py │ ├── precision_recall_curve.py │ ├── roc_curve.py │ └── tests │ │ ├── __init__.py │ │ ├── test_base.py │ │ ├── test_common_curve_display.py │ │ ├── test_confusion_matrix_display.py │ │ ├── test_det_curve_display.py │ │ ├── test_plot_confusion_matrix.py │ │ ├── test_plot_curve_common.py │ │ ├── test_plot_det_curve.py │ │ ├── test_plot_precision_recall.py │ │ ├── test_plot_roc_curve.py │ │ ├── test_precision_recall_display.py │ │ └── test_roc_curve_display.py ├── _ranking.py ├── _regression.py ├── _scorer.py ├── cluster │ ├── __init__.py │ ├── _bicluster.py │ ├── _expected_mutual_info_fast.pyx │ ├── _supervised.py │ ├── _unsupervised.py │ ├── setup.py │ └── tests │ │ ├── __init__.py │ │ ├── test_bicluster.py │ │ ├── test_common.py │ │ ├── test_supervised.py │ │ └── test_unsupervised.py ├── pairwise.py ├── setup.py └── tests │ ├── __init__.py │ ├── test_classification.py │ ├── test_common.py │ ├── test_dist_metrics.py │ ├── test_pairwise.py │ ├── test_pairwise_distances_reduction.py │ ├── test_ranking.py │ ├── test_regression.py │ └── test_score_objects.py ├── mixture ├── __init__.py ├── _base.py ├── _bayesian_mixture.py ├── _gaussian_mixture.py └── tests │ ├── __init__.py │ ├── test_bayesian_mixture.py │ ├── test_gaussian_mixture.py │ └── test_mixture.py ├── model_selection ├── __init__.py ├── _search.py ├── _search_successive_halving.py ├── _split.py ├── _validation.py └── tests │ ├── __init__.py │ ├── common.py │ ├── test_search.py │ ├── test_split.py │ ├── test_successive_halving.py │ └── test_validation.py ├── multiclass.py ├── multioutput.py ├── naive_bayes.py ├── neighbors ├── __init__.py ├── _ball_tree.pyx ├── _base.py ├── _binary_tree.pxi ├── _classification.py ├── _distance_metric.py ├── _graph.py ├── _kd_tree.pyx ├── _kde.py ├── _lof.py ├── _nca.py ├── _nearest_centroid.py ├── _partition_nodes.pxd ├── _partition_nodes.pyx ├── _quad_tree.pxd ├── _quad_tree.pyx ├── _regression.py ├── _unsupervised.py ├── setup.py └── tests │ ├── __init__.py │ ├── test_ball_tree.py │ ├── test_graph.py │ ├── test_kd_tree.py │ ├── test_kde.py │ ├── test_lof.py │ ├── test_nca.py │ ├── test_nearest_centroid.py │ ├── test_neighbors.py │ ├── test_neighbors_pipeline.py │ ├── test_neighbors_tree.py │ └── test_quad_tree.py ├── neural_network ├── __init__.py ├── _base.py ├── _multilayer_perceptron.py ├── _rbm.py ├── _stochastic_optimizers.py └── tests │ ├── __init__.py │ ├── test_base.py │ ├── test_mlp.py │ ├── test_rbm.py │ └── test_stochastic_optimizers.py ├── pipeline.py ├── preprocessing ├── __init__.py ├── _csr_polynomial_expansion.pyx ├── _data.py ├── _discretization.py ├── _encoders.py ├── _function_transformer.py ├── _label.py ├── _polynomial.py ├── setup.py └── tests │ ├── __init__.py │ ├── test_common.py │ ├── test_data.py │ ├── test_discretization.py │ ├── test_encoders.py │ ├── test_function_transformer.py │ ├── test_label.py │ └── test_polynomial.py ├── random_projection.py ├── semi_supervised ├── __init__.py ├── _label_propagation.py ├── _self_training.py └── tests │ ├── __init__.py │ ├── test_label_propagation.py │ └── test_self_training.py ├── setup.py ├── svm ├── __init__.py ├── _base.py ├── _bounds.py ├── _classes.py ├── _liblinear.pxi ├── _liblinear.pyx ├── _libsvm.pxi ├── _libsvm.pyx ├── _libsvm_sparse.pyx ├── _newrand.pyx ├── setup.py ├── src │ ├── liblinear │ │ ├── COPYRIGHT │ │ ├── _cython_blas_helpers.h │ │ ├── liblinear_helper.c │ │ ├── linear.cpp │ │ ├── linear.h │ │ ├── tron.cpp │ │ └── tron.h │ ├── libsvm │ │ ├── LIBSVM_CHANGES │ │ ├── _svm_cython_blas_helpers.h │ │ ├── libsvm_helper.c │ │ ├── libsvm_sparse_helper.c │ │ ├── libsvm_template.cpp │ │ ├── svm.cpp │ │ └── svm.h │ └── newrand │ │ └── newrand.h └── tests │ ├── __init__.py │ ├── test_bounds.py │ ├── test_sparse.py │ └── test_svm.py ├── tests ├── __init__.py ├── test_base.py ├── test_build.py ├── test_calibration.py ├── test_check_build.py ├── test_common.py ├── test_config.py ├── test_discriminant_analysis.py ├── test_docstring_parameters.py ├── test_docstrings.py ├── test_dummy.py ├── test_init.py ├── test_isotonic.py ├── test_kernel_approximation.py ├── test_kernel_ridge.py ├── test_metaestimators.py ├── test_min_dependencies_readme.py ├── test_multiclass.py ├── test_multioutput.py ├── test_naive_bayes.py ├── test_pipeline.py └── test_random_projection.py ├── tree ├── __init__.py ├── _classes.py ├── _criterion.pxd ├── _criterion.pyx ├── _export.py ├── _reingold_tilford.py ├── _splitter.pxd ├── _splitter.pyx ├── _tree.pxd ├── _tree.pyx ├── _utils.pxd ├── _utils.pyx ├── setup.py └── tests │ ├── __init__.py │ ├── test_export.py │ ├── test_reingold_tilford.py │ └── test_tree.py └── utils ├── __init__.py ├── _arpack.py ├── _bunch.py ├── _cython_blas.pxd ├── _cython_blas.pyx ├── _encode.py ├── _estimator_html_repr.py ├── _fast_dict.pxd ├── _fast_dict.pyx ├── _heap.pxd ├── _heap.pyx ├── _joblib.py ├── _logistic_sigmoid.pyx ├── _mask.py ├── _mocking.py ├── _openmp_helpers.pxd ├── _openmp_helpers.pyx ├── _pprint.py ├── _random.pxd ├── _random.pyx ├── _readonly_array_wrapper.pyx ├── _seq_dataset.pxd.tp ├── _seq_dataset.pyx.tp ├── _show_versions.py ├── _tags.py ├── _testing.py ├── _typedefs.pxd ├── _typedefs.pyx ├── _weight_vector.pxd.tp ├── _weight_vector.pyx.tp ├── arrayfuncs.pyx ├── class_weight.py ├── deprecation.py ├── estimator_checks.py ├── extmath.py ├── fixes.py ├── graph.py ├── metaestimators.py ├── multiclass.py ├── murmurhash.pxd ├── murmurhash.pyx ├── optimize.py ├── random.py ├── setup.py ├── sparsefuncs.py ├── sparsefuncs_fast.pyx ├── src ├── MurmurHash3.cpp └── MurmurHash3.h ├── stats.py ├── tests ├── __init__.py ├── conftest.py ├── test_arpack.py ├── test_arrayfuncs.py ├── test_class_weight.py ├── test_cython_blas.py ├── test_cython_templating.py ├── test_deprecation.py ├── test_encode.py ├── test_estimator_checks.py ├── test_estimator_html_repr.py ├── test_extmath.py ├── test_fast_dict.py ├── test_fixes.py ├── test_graph.py ├── test_metaestimators.py ├── test_mocking.py ├── test_multiclass.py ├── test_murmurhash.py ├── test_optimize.py ├── test_parallel.py ├── test_pprint.py ├── test_random.py ├── test_readonly_wrapper.py ├── test_seq_dataset.py ├── test_shortest_path.py ├── test_show_versions.py ├── test_sparsefuncs.py ├── test_stats.py ├── test_tags.py ├── test_testing.py ├── test_utils.py ├── test_validation.py └── test_weight_vector.py └── validation.py /.binder/requirements.txt: -------------------------------------------------------------------------------- 1 | --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn 2 | --pre 3 | matplotlib 4 | scikit-image 5 | pandas 6 | sphinx-gallery 7 | scikit-learn 8 | 9 | -------------------------------------------------------------------------------- /.circleci/artifact_path: -------------------------------------------------------------------------------- 1 | 0/doc/_changed.html 2 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | # Commits pushed to main should not make the overall 8 | # project coverage decrease by more than 1%: 9 | target: auto 10 | threshold: 1% 11 | patch: 12 | default: 13 | # Be tolerant on slight code coverage diff on PRs to limit 14 | # noisy red coverage status on github PRs. 15 | # Note: The coverage stats are still uploaded 16 | # to codecov so that PR reviewers can see uncovered lines 17 | target: auto 18 | threshold: 1% 19 | 20 | codecov: 21 | notify: 22 | # Prevent coverage status to upload multiple times for parallel and long 23 | # running CI pipelines. This configuration is particularly useful on PRs 24 | # to avoid confusion. Note that this value is set to the number of Azure 25 | # Pipeline jobs uploading coverage reports. 26 | after_n_builds: 6 27 | 28 | ignore: 29 | - "sklearn/externals" 30 | - "sklearn/_build_utils" 31 | - "**/setup.py" 32 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = sklearn 4 | parallel = True 5 | omit = 6 | */sklearn/externals/* 7 | */sklearn/_build_utils/* 8 | */benchmarks/* 9 | **/setup.py 10 | -------------------------------------------------------------------------------- /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # Since git version 2.23, git-blame has a feature to ignore 2 | # certain commits. 3 | # 4 | # This file contains a list of commits that are not likely what 5 | # you are looking for in `git blame`. You can set this file as 6 | # a default ignore file for blame by running the following 7 | # command. 8 | # 9 | # $ git config blame.ignoreRevsFile .git-blame-ignore-revs 10 | 11 | # PR 18948: Migrate code style to Black 12 | 82df48934eba1df9a1ed3be98aaace8eada59e6e 13 | 14 | # PR 20294: Use target_version >= 3.7 in Black 15 | 351ace7935a4ea685171cc6d174890f08facd561 16 | 17 | # PR 20412: Use experimental_string_processing=true in Black 18 | 3ae7c7615343bbd36acece57825d8b0d70fd9da4 19 | 20 | # PR 20502: Runs Black on examples 21 | 70a185ae59b4362633d18b0d0083abb1b6f7370c 22 | 23 | # PR 22474: Update to Black 22.1.0 24 | 1fc86b6aacd89da44a3b4e8abf7c3e2ba4336ffe 25 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | /doc/whats_new/v*.rst merge=union 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: ['https://numfocus.org/donate-to-scikit-learn'] 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: Discussions 4 | url: https://github.com/scikit-learn/scikit-learn/discussions/new 5 | about: Ask questions and discuss with other scikit-learn community members 6 | - name: Stack Overflow 7 | url: https://stackoverflow.com/questions/tagged/scikit-learn 8 | about: Please ask and answer usage questions on Stack Overflow 9 | - name: Mailing list 10 | url: https://mail.python.org/mailman/listinfo/scikit-learn 11 | about: General discussions and announcements on the mailing list 12 | - name: Gitter 13 | url: https://gitter.im/scikit-learn/scikit-learn 14 | about: Users and developers can sometimes be found on the gitter channel 15 | - name: Blank issue 16 | url: https://github.com/scikit-learn/scikit-learn/issues/new 17 | about: Please note that Github Discussions should be used in most cases instead 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/doc_improvement.yml: -------------------------------------------------------------------------------- 1 | name: Documentation improvement 2 | description: Create a report to help us improve the documentation. Alternatively you can just open a pull request with the suggested change. 3 | labels: [Documentation, 'Needs Triage'] 4 | 5 | body: 6 | - type: textarea 7 | attributes: 8 | label: Describe the issue linked to the documentation 9 | description: > 10 | Tell us about the confusion introduced in the documentation. 11 | validations: 12 | required: true 13 | - type: textarea 14 | attributes: 15 | label: Suggest a potential alternative/fix 16 | description: > 17 | Tell us how we could improve the documentation in this regard. 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature request 2 | description: Suggest a new algorithm, enhancement to an existing algorithm, etc. 3 | labels: ['New Feature', 'Needs Triage'] 4 | 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: > 9 | #### If you want to propose a new algorithm, please refer first to the [scikit-learn inclusion criterion](https://scikit-learn.org/stable/faq.html#what-are-the-inclusion-criteria-for-new-algorithms). 10 | - type: textarea 11 | attributes: 12 | label: Describe the workflow you want to enable 13 | validations: 14 | required: true 15 | - type: textarea 16 | attributes: 17 | label: Describe your proposed solution 18 | validations: 19 | required: true 20 | - type: textarea 21 | attributes: 22 | label: Describe alternatives you've considered, if relevant 23 | - type: textarea 24 | attributes: 25 | label: Additional context 26 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 5 | 6 | #### Reference Issues/PRs 7 | 13 | 14 | 15 | #### What does this implement/fix? Explain your changes. 16 | 17 | 18 | #### Any other comments? 19 | 20 | 21 | 33 | -------------------------------------------------------------------------------- /.github/labeler-file-extensions.yml: -------------------------------------------------------------------------------- 1 | cython: 2 | - sklearn/**/*.pyx 3 | - sklearn/**/*.pxd 4 | - sklearn/**/*.pxi 5 | # Tempita templates 6 | - sklearn/**/*.pyx.tp 7 | - sklearn/**/*.pxd.tp 8 | - sklearn/**/*.pxi.tp 9 | -------------------------------------------------------------------------------- /.github/scripts/label_title_regex.py: -------------------------------------------------------------------------------- 1 | """Labels PRs based on title. Must be run in a github action with the 2 | pull_request_target event.""" 3 | from github import Github 4 | import os 5 | import json 6 | import re 7 | 8 | context_dict = json.loads(os.getenv("CONTEXT_GITHUB")) 9 | 10 | repo = context_dict["repository"] 11 | g = Github(context_dict["token"]) 12 | repo = g.get_repo(repo) 13 | pr_number = context_dict["event"]["number"] 14 | issue = repo.get_issue(number=pr_number) 15 | title = issue.title 16 | 17 | 18 | regex_to_labels = [(r"\bDOC\b", "Documentation"), (r"\bCI\b", "Build / CI")] 19 | 20 | labels_to_add = [label for regex, label in regex_to_labels if re.search(regex, title)] 21 | 22 | if labels_to_add: 23 | issue.add_to_labels(*labels_to_add) 24 | -------------------------------------------------------------------------------- /.github/workflows/assign.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Assign 3 | on: 4 | issue_comment: 5 | types: created 6 | 7 | jobs: 8 | one: 9 | runs-on: ubuntu-latest 10 | # Note that string comparisons is not case sensitive. 11 | if: >- 12 | startsWith(github.event.comment.body, '/take') 13 | && !github.event.issue.assignee 14 | steps: 15 | - run: | 16 | echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}" 17 | curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees 18 | curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -X "DELETE" https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels/help%20wanted 19 | -------------------------------------------------------------------------------- /.github/workflows/check-manifest.yml: -------------------------------------------------------------------------------- 1 | name: "Check Manifest" 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | 7 | jobs: 8 | check-manifest: 9 | # Don't run on forks 10 | if: github.repository == 'scikit-learn/scikit-learn' 11 | 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - uses: actions/setup-python@v2 16 | with: 17 | python-version: '3.9' 18 | - name: Install dependencies 19 | # scipy and cython are required to build sdist 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install check-manifest scipy cython 23 | - run: | 24 | check-manifest -v 25 | 26 | update-tracker: 27 | uses: ./.github/workflows/update_tracking_issue.yml 28 | if: ${{ always() }} 29 | needs: [check-manifest] 30 | with: 31 | job_status: ${{ needs.check-manifest.result }} 32 | secrets: 33 | BOT_GITHUB_TOKEN: ${{ secrets.BOT_GITHUB_TOKEN }} 34 | -------------------------------------------------------------------------------- /.github/workflows/label-blank-issue.yml: -------------------------------------------------------------------------------- 1 | name: Labels Blank issues 2 | 3 | on: 4 | issues: 5 | types: [opened] 6 | 7 | jobs: 8 | label-blank-issues: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: andymckay/labeler@1.0.4 12 | with: 13 | add-labels: "Needs Triage" 14 | ignore-if-labeled: true 15 | -------------------------------------------------------------------------------- /.github/workflows/labeler-module.yml: -------------------------------------------------------------------------------- 1 | name: "Pull Request Labeler" 2 | on: 3 | pull_request_target: 4 | types: [opened] 5 | 6 | jobs: 7 | triage: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: thomasjpfan/labeler@v2.5.0 11 | continue-on-error: true 12 | if: github.repository == 'scikit-learn/scikit-learn' 13 | with: 14 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 15 | max-labels: "3" 16 | configuration-path: ".github/labeler-module.yml" 17 | 18 | triage_file_extensions: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - uses: thomasjpfan/labeler@v2.5.0 22 | continue-on-error: true 23 | if: github.repository == 'scikit-learn/scikit-learn' 24 | with: 25 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 26 | configuration-path: ".github/labeler-file-extensions.yml" 27 | -------------------------------------------------------------------------------- /.github/workflows/labeler-title-regex.yml: -------------------------------------------------------------------------------- 1 | name: Pull Request Regex Title Labeler 2 | on: 3 | pull_request_target: 4 | types: [opened, edited] 5 | 6 | permissions: 7 | contents: read 8 | pull-requests: write 9 | 10 | jobs: 11 | 12 | labeler: 13 | runs-on: ubuntu-20.04 14 | steps: 15 | - uses: actions/checkout@v2 16 | - uses: actions/setup-python@v2 17 | with: 18 | python-version: '3.9' 19 | - name: Install PyGithub 20 | run: pip install -Uq PyGithub 21 | - name: Label pull request 22 | run: python .github/scripts/label_title_regex.py 23 | env: 24 | CONTEXT_GITHUB: ${{ toJson(github) }} 25 | -------------------------------------------------------------------------------- /.github/workflows/twitter.yml: -------------------------------------------------------------------------------- 1 | # Tweet the URL of a commit on @sklearn_commits whenever a push event 2 | # happens on the main branch 3 | name: Twitter Push Notification 4 | 5 | 6 | on: 7 | push: 8 | branches: 9 | - main 10 | 11 | 12 | jobs: 13 | tweet: 14 | name: Twitter Notification 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: Tweet URL of last commit as @sklearn_commits 18 | if: github.repository == 'scikit-learn/scikit-learn' 19 | uses: docker://thomasjpfan/twitter-action:0.3 20 | with: 21 | args: "-message \"https://github.com/scikit-learn/scikit-learn/commit/${{ github.sha }}\"" 22 | env: 23 | TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} 24 | TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} 25 | TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} 26 | TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }} 27 | -------------------------------------------------------------------------------- /.github/workflows/unassign.yml: -------------------------------------------------------------------------------- 1 | name: Unassign 2 | #Runs when a contributor has unassigned themselves from the issue and adds 'help wanted' 3 | on: 4 | issues: 5 | types: unassigned 6 | 7 | jobs: 8 | one: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: 12 | if: github.event.issue.state == 'open' 13 | run: | 14 | echo "Marking issue ${{ github.event.issue.number }} as help wanted" 15 | curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"labels": ["help wanted"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels 16 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.3.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: https://github.com/psf/black 9 | rev: 22.1.0 10 | hooks: 11 | - id: black 12 | - repo: https://gitlab.com/pycqa/flake8 13 | rev: 3.9.2 14 | hooks: 15 | - id: flake8 16 | types: [file, python] 17 | - repo: https://github.com/pre-commit/mirrors-mypy 18 | rev: v0.782 19 | hooks: 20 | - id: mypy 21 | files: sklearn/ 22 | additional_dependencies: [pytest==6.2.4] 23 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | We are a community based on openness, as well as friendly and didactic discussions. 4 | 5 | We aspire to treat everybody equally, and value their contributions. 6 | 7 | Decisions are made based on technical merit and consensus. 8 | 9 | Code is not the only way to help the project. Reviewing pull requests, 10 | answering questions to help others on mailing lists or issues, organizing and 11 | teaching tutorials, working on the website, improving the documentation, are 12 | all priceless contributions. 13 | 14 | We abide by the principles of openness, respect, and consideration of others of 15 | the Python Software Foundation: https://www.python.org/psf/codeofconduct/ 16 | 17 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.rst 2 | recursive-include doc * 3 | recursive-include examples * 4 | recursive-include sklearn *.c *.h *.pyx *.pxd *.pxi *.tp 5 | recursive-include sklearn/datasets *.csv *.csv.gz *.rst *.jpg *.txt *.arff.gz *.json.gz 6 | include COPYING 7 | include README.rst 8 | include pyproject.toml 9 | include sklearn/externals/README 10 | include sklearn/svm/src/liblinear/COPYRIGHT 11 | include sklearn/svm/src/libsvm/LIBSVM_CHANGES 12 | include conftest.py 13 | include Makefile 14 | include MANIFEST.in 15 | include .coveragerc 16 | 17 | # exclude from sdist 18 | recursive-exclude asv_benchmarks * 19 | recursive-exclude benchmarks * 20 | recursive-exclude build_tools * 21 | recursive-exclude maint_tools * 22 | recursive-exclude benchmarks * 23 | recursive-exclude .binder * 24 | recursive-exclude .circleci * 25 | exclude .codecov.yml 26 | exclude .git-blame-ignore-revs 27 | exclude .mailmap 28 | exclude .pre-commit-config.yaml 29 | exclude azure-pipelines.yml 30 | exclude lgtm.yml 31 | exclude CODE_OF_CONDUCT.md 32 | exclude CONTRIBUTING.md 33 | exclude SECURITY.md 34 | exclude PULL_REQUEST_TEMPLATE.md 35 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | | Version | Supported | 6 | | --------- | ------------------ | 7 | | 1.0.1 | :white_check_mark: | 8 | | < 1.0.1 | :x: | 9 | 10 | ## Reporting a Vulnerability 11 | 12 | Please report security vulnerabilities by email to `security@scikit-learn.org`. 13 | This email is an alias to a subset of the scikit-learn maintainers' team. 14 | 15 | If the security vulnerability is accepted, a patch will be crafted privately 16 | in order to prepare a dedicated bugfix release as timely as possible (depending 17 | on the complexity of the fix). 18 | -------------------------------------------------------------------------------- /asv_benchmarks/.gitignore: -------------------------------------------------------------------------------- 1 | *__pycache__* 2 | env/ 3 | html/ 4 | results/ 5 | scikit-learn/ 6 | benchmarks/cache/ 7 | -------------------------------------------------------------------------------- /asv_benchmarks/benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | """Benchmark suite for scikit-learn using ASV""" 2 | -------------------------------------------------------------------------------- /asv_benchmarks/benchmarks/manifold.py: -------------------------------------------------------------------------------- 1 | from sklearn.manifold import TSNE 2 | 3 | from .common import Benchmark, Estimator 4 | from .datasets import _digits_dataset 5 | 6 | 7 | class TSNEBenchmark(Estimator, Benchmark): 8 | """ 9 | Benchmarks for t-SNE. 10 | """ 11 | 12 | param_names = ["method"] 13 | params = (["exact", "barnes_hut"],) 14 | 15 | def setup_cache(self): 16 | super().setup_cache() 17 | 18 | def make_data(self, params): 19 | (method,) = params 20 | 21 | n_samples = 500 if method == "exact" else None 22 | 23 | return _digits_dataset(n_samples=n_samples) 24 | 25 | def make_estimator(self, params): 26 | (method,) = params 27 | 28 | estimator = TSNE(random_state=0, method=method) 29 | 30 | return estimator 31 | 32 | def make_scorers(self): 33 | self.train_scorer = lambda _, __: self.estimator.kl_divergence_ 34 | self.test_scorer = lambda _, __: self.estimator.kl_divergence_ 35 | -------------------------------------------------------------------------------- /asv_benchmarks/benchmarks/svm.py: -------------------------------------------------------------------------------- 1 | from sklearn.svm import SVC 2 | 3 | from .common import Benchmark, Estimator, Predictor 4 | from .datasets import _synth_classification_dataset 5 | from .utils import make_gen_classif_scorers 6 | 7 | 8 | class SVCBenchmark(Predictor, Estimator, Benchmark): 9 | """Benchmarks for SVC.""" 10 | 11 | param_names = ["kernel"] 12 | params = (["linear", "poly", "rbf", "sigmoid"],) 13 | 14 | def setup_cache(self): 15 | super().setup_cache() 16 | 17 | def make_data(self, params): 18 | return _synth_classification_dataset() 19 | 20 | def make_estimator(self, params): 21 | (kernel,) = params 22 | 23 | estimator = SVC( 24 | max_iter=100, tol=1e-16, kernel=kernel, random_state=0, gamma="scale" 25 | ) 26 | 27 | return estimator 28 | 29 | def make_scorers(self): 30 | make_gen_classif_scorers(self) 31 | -------------------------------------------------------------------------------- /benchmarks/.gitignore: -------------------------------------------------------------------------------- 1 | /bhtsne 2 | *.npy 3 | *.json 4 | /mnist_tsne_output/ 5 | -------------------------------------------------------------------------------- /benchmarks/plot_tsne_mnist.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import os.path as op 4 | 5 | import argparse 6 | 7 | 8 | LOG_DIR = "mnist_tsne_output" 9 | 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser("Plot benchmark results for t-SNE") 13 | parser.add_argument( 14 | "--labels", 15 | type=str, 16 | default=op.join(LOG_DIR, "mnist_original_labels_10000.npy"), 17 | help="1D integer numpy array for labels", 18 | ) 19 | parser.add_argument( 20 | "--embedding", 21 | type=str, 22 | default=op.join(LOG_DIR, "mnist_sklearn_TSNE_10000.npy"), 23 | help="2D float numpy array for embedded data", 24 | ) 25 | args = parser.parse_args() 26 | 27 | X = np.load(args.embedding) 28 | y = np.load(args.labels) 29 | 30 | for i in np.unique(y): 31 | mask = y == i 32 | plt.scatter(X[mask, 0], X[mask, 1], alpha=0.2, label=int(i)) 33 | plt.legend(loc="best") 34 | plt.show() 35 | -------------------------------------------------------------------------------- /build_tools/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for maintenance tools 2 | 3 | authors: 4 | python generate_authors_table.py 5 | -------------------------------------------------------------------------------- /build_tools/azure/install_win.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | if [[ "$PYTHON_ARCH" == "64" ]]; then 7 | conda create -n $VIRTUALENV -q -y python=$PYTHON_VERSION numpy scipy cython matplotlib wheel pillow joblib 8 | 9 | source activate $VIRTUALENV 10 | 11 | pip install threadpoolctl 12 | 13 | if [[ "$PYTEST_VERSION" == "*" ]]; then 14 | pip install pytest 15 | else 16 | pip install pytest==$PYTEST_VERSION 17 | fi 18 | else 19 | pip install numpy scipy cython pytest wheel pillow joblib threadpoolctl 20 | fi 21 | 22 | if [[ "$PYTEST_XDIST_VERSION" != "none" ]]; then 23 | pip install pytest-xdist 24 | fi 25 | 26 | if [[ "$COVERAGE" == "true" ]]; then 27 | # XXX: coverage is temporary pinned to 6.2 because 6.3 is not fork-safe 28 | # cf. https://github.com/nedbat/coveragepy/issues/1310 29 | pip install coverage codecov pytest-cov coverage==6.2 30 | fi 31 | 32 | python --version 33 | pip --version 34 | 35 | # Build scikit-learn 36 | python setup.py bdist_wheel 37 | 38 | # Install the generated wheel package to test it 39 | pip install --pre --no-index --find-links dist scikit-learn 40 | -------------------------------------------------------------------------------- /build_tools/azure/test_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [[ "$DISTRIB" =~ ^conda.* ]]; then 6 | source activate $VIRTUALENV 7 | elif [[ "$DISTRIB" == "ubuntu" ]]; then 8 | source $VIRTUALENV/bin/activate 9 | fi 10 | 11 | if [[ "$BUILD_WITH_ICC" == "true" ]]; then 12 | source /opt/intel/oneapi/setvars.sh 13 | fi 14 | 15 | make test-doc 16 | -------------------------------------------------------------------------------- /build_tools/azure/test_pytest_soft_dependency.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # called when DISTRIB=="conda" 6 | source activate $VIRTUALENV 7 | conda remove -y py pytest || pip uninstall -y py pytest 8 | 9 | if [[ "$COVERAGE" == "true" ]]; then 10 | # conda may remove coverage when uninstall pytest and py 11 | pip install coverage 12 | # Need to append the coverage to the existing .coverage generated by 13 | # running the tests. Make sure to reuse the same coverage 14 | # configuration as the one used by the main pytest run to be 15 | # able to combine the results. 16 | CMD="coverage run --rcfile=$BUILD_SOURCESDIRECTORY/.coveragerc" 17 | else 18 | CMD="python" 19 | fi 20 | 21 | # .coverage from running the tests is in TEST_DIR 22 | pushd $TEST_DIR 23 | $CMD -m sklearn.utils.tests.test_estimator_checks 24 | popd 25 | -------------------------------------------------------------------------------- /build_tools/azure/upload_codecov.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # called when COVERAGE=="true" and DISTRIB=="conda" 6 | export PATH=$HOME/miniconda3/bin:$PATH 7 | source activate $VIRTUALENV 8 | 9 | # Need to run codecov from a git checkout, so we copy .coverage 10 | # from TEST_DIR where pytest has been run 11 | pushd $TEST_DIR 12 | coverage combine --append 13 | popd 14 | cp $TEST_DIR/.coverage $BUILD_REPOSITORY_LOCALPATH 15 | 16 | codecov --root $BUILD_REPOSITORY_LOCALPATH -t $CODECOV_TOKEN || echo "codecov upload failed" 17 | -------------------------------------------------------------------------------- /build_tools/circle/checkout_merge_commit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | # Add `main` branch to the update list. 5 | # Otherwise CircleCI will give us a cached one. 6 | FETCH_REFS="+main:main" 7 | 8 | # Update PR refs for testing. 9 | if [[ -n "${CIRCLE_PR_NUMBER}" ]] 10 | then 11 | FETCH_REFS="${FETCH_REFS} +refs/pull/${CIRCLE_PR_NUMBER}/head:pr/${CIRCLE_PR_NUMBER}/head" 12 | FETCH_REFS="${FETCH_REFS} +refs/pull/${CIRCLE_PR_NUMBER}/merge:pr/${CIRCLE_PR_NUMBER}/merge" 13 | fi 14 | 15 | # Retrieve the refs. 16 | git fetch -u origin ${FETCH_REFS} 17 | 18 | # Checkout the PR merge ref. 19 | if [[ -n "${CIRCLE_PR_NUMBER}" ]] 20 | then 21 | git checkout -qf "pr/${CIRCLE_PR_NUMBER}/merge" || ( 22 | echo Could not fetch merge commit. >&2 23 | echo There may be conflicts in merging PR \#${CIRCLE_PR_NUMBER} with main. >&2; 24 | exit 1) 25 | fi 26 | 27 | # Check for merge conflicts. 28 | if [[ -n "${CIRCLE_PR_NUMBER}" ]] 29 | then 30 | git branch --merged | grep main > /dev/null 31 | git branch --merged | grep "pr/${CIRCLE_PR_NUMBER}/head" > /dev/null 32 | fi 33 | -------------------------------------------------------------------------------- /build_tools/codespell_ignore_words.txt: -------------------------------------------------------------------------------- 1 | aggresive 2 | aline 3 | ba 4 | basf 5 | boun 6 | bre 7 | cach 8 | complies 9 | coo 10 | copys 11 | deine 12 | didi 13 | feld 14 | fo 15 | fpr 16 | fro 17 | fwe 18 | gool 19 | hart 20 | hist 21 | ines 22 | inout 23 | ist 24 | jaques 25 | linke 26 | lod 27 | mape 28 | mor 29 | nd 30 | nmae 31 | ocur 32 | pullrequest 33 | ro 34 | soler 35 | suh 36 | suprised 37 | te 38 | technic 39 | teh 40 | thi 41 | usal 42 | vie 43 | wan 44 | winn 45 | yau 46 | -------------------------------------------------------------------------------- /build_tools/github/Windows: -------------------------------------------------------------------------------- 1 | # Get the Python version of the base image from a build argument 2 | ARG PYTHON_VERSION 3 | FROM winamd64/python:$PYTHON_VERSION-windowsservercore 4 | 5 | ARG WHEEL_NAME 6 | ARG CONFTEST_NAME 7 | ARG CIBW_TEST_REQUIRES 8 | 9 | # Copy and install the Windows wheel 10 | COPY $WHEEL_NAME $WHEEL_NAME 11 | COPY $CONFTEST_NAME $CONFTEST_NAME 12 | RUN pip install $env:WHEEL_NAME 13 | 14 | # Install the testing dependencies 15 | RUN pip install $env:CIBW_TEST_REQUIRES.split(" ") 16 | -------------------------------------------------------------------------------- /build_tools/github/build_minimal_windows_image.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | PYTHON_VERSION=$1 7 | BITNESS=$2 8 | 9 | if [[ "$BITNESS" == "32" ]]; then 10 | # 32-bit architectures are not supported 11 | # by the official Docker images: Tests will just be run 12 | # on the host (instead of the minimal Docker container). 13 | exit 0 14 | fi 15 | 16 | TEMP_FOLDER="$HOME/AppData/Local/Temp" 17 | WHEEL_PATH=$(ls -d $TEMP_FOLDER/*/repaired_wheel/*) 18 | WHEEL_NAME=$(basename $WHEEL_PATH) 19 | 20 | cp $WHEEL_PATH $WHEEL_NAME 21 | 22 | # Dot the Python version for identyfing the base Docker image 23 | PYTHON_VERSION=$(echo ${PYTHON_VERSION:0:1}.${PYTHON_VERSION:1:2}) 24 | 25 | # Build a minimal Windows Docker image for testing the wheels 26 | docker build --build-arg PYTHON_VERSION=$PYTHON_VERSION \ 27 | --build-arg WHEEL_NAME=$WHEEL_NAME \ 28 | --build-arg CONFTEST_NAME=$CONFTEST_NAME \ 29 | --build-arg CIBW_TEST_REQUIRES="$CIBW_TEST_REQUIRES" \ 30 | -f build_tools/github/Windows \ 31 | -t scikit-learn/minimal-windows . 32 | -------------------------------------------------------------------------------- /build_tools/github/build_source.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | # Move up two levels to create the virtual 7 | # environment outside of the source folder 8 | cd ../../ 9 | 10 | python -m venv build_env 11 | source build_env/bin/activate 12 | 13 | python -m pip install numpy scipy cython 14 | python -m pip install twine 15 | 16 | cd scikit-learn/scikit-learn 17 | python setup.py sdist 18 | 19 | # Check whether the source distribution will render correctly 20 | twine check dist/*.tar.gz 21 | -------------------------------------------------------------------------------- /build_tools/github/check_build_trigger.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | COMMIT_MSG=$(git log --no-merges -1 --oneline) 7 | 8 | # The commit marker "[cd build]" or "[cd build gh]" will trigger the build when required 9 | if [[ "$GITHUB_EVENT_NAME" == schedule || 10 | "$COMMIT_MSG" =~ \[cd\ build\] || 11 | "$COMMIT_MSG" =~ \[cd\ build\ gh\] ]]; then 12 | echo "::set-output name=build::true" 13 | fi 14 | -------------------------------------------------------------------------------- /build_tools/github/check_wheels.py: -------------------------------------------------------------------------------- 1 | """Checks that dist/* contains the number of wheels built from the 2 | .github/workflows/wheels.yml config.""" 3 | import yaml 4 | from pathlib import Path 5 | import sys 6 | 7 | gh_wheel_path = Path.cwd() / ".github" / "workflows" / "wheels.yml" 8 | with gh_wheel_path.open("r") as f: 9 | wheel_config = yaml.safe_load(f) 10 | 11 | build_matrix = wheel_config["jobs"]["build_wheels"]["strategy"]["matrix"]["include"] 12 | n_wheels = len(build_matrix) 13 | 14 | # plus one more for the sdist 15 | n_wheels += 1 16 | 17 | # aarch64 builds from travis 18 | travis_config_path = Path.cwd() / ".travis.yml" 19 | with travis_config_path.open("r") as f: 20 | travis_config = yaml.safe_load(f) 21 | 22 | jobs = travis_config["jobs"]["include"] 23 | travis_builds = [j for j in jobs if any("CIBW_BUILD" in env for env in j["env"])] 24 | n_wheels += len(travis_builds) 25 | 26 | dist_files = list(Path("dist").glob("**/*")) 27 | n_dist_files = len(dist_files) 28 | 29 | if n_dist_files != n_wheels: 30 | print( 31 | f"Expected {n_wheels} wheels in dist/* but " 32 | f"got {n_dist_files} artifacts instead." 33 | ) 34 | sys.exit(1) 35 | 36 | print(f"dist/* has the expected {n_wheels} wheels:") 37 | print("\n".join(file.name for file in dist_files)) 38 | -------------------------------------------------------------------------------- /build_tools/github/repair_windows_wheels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | WHEEL=$1 7 | DEST_DIR=$2 8 | BITNESS=$3 9 | 10 | # By default, the Windows wheels are not repaired. 11 | # In this case, we need to vendor VCRUNTIME140.dll 12 | wheel unpack "$WHEEL" 13 | WHEEL_DIRNAME=$(ls -d scikit_learn-*) 14 | python build_tools/github/vendor.py "$WHEEL_DIRNAME" "$BITNESS" 15 | wheel pack "$WHEEL_DIRNAME" -d "$DEST_DIR" 16 | rm -rf "$WHEEL_DIRNAME" 17 | -------------------------------------------------------------------------------- /build_tools/github/test_source.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | cd ../../ 7 | 8 | python -m venv test_env 9 | source test_env/bin/activate 10 | 11 | python -m pip install scikit-learn/scikit-learn/dist/*.tar.gz 12 | python -m pip install pytest pandas 13 | 14 | # Run the tests on the installed source distribution 15 | mkdir tmp_for_test 16 | cp scikit-learn/scikit-learn/conftest.py tmp_for_test 17 | cd tmp_for_test 18 | 19 | pytest --pyargs sklearn 20 | -------------------------------------------------------------------------------- /build_tools/github/test_wheels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | if [[ "$OSTYPE" != "linux-gnu" ]]; then 7 | # The Linux test environment is run in a Docker container and 8 | # it is not possible to copy the test configuration file (yet) 9 | cp $CONFTEST_PATH $CONFTEST_NAME 10 | fi 11 | 12 | # Test that there are no links to system libraries in the 13 | # threadpoolctl output section of the show_versions output: 14 | python -c "import sklearn; sklearn.show_versions()" 15 | pytest --pyargs sklearn 16 | -------------------------------------------------------------------------------- /build_tools/github/test_windows_wheels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | PYTHON_VERSION=$1 7 | BITNESS=$2 8 | 9 | if [[ "$BITNESS" == "32" ]]; then 10 | # 32-bit architectures use the regular 11 | # test command (outside of the minimal Docker container) 12 | cp $CONFTEST_PATH $CONFTEST_NAME 13 | python -c "import sklearn; sklearn.show_versions()" 14 | pytest --pyargs sklearn 15 | else 16 | docker container run \ 17 | --rm scikit-learn/minimal-windows \ 18 | powershell -Command "python -c 'import sklearn; sklearn.show_versions()'" 19 | 20 | docker container run \ 21 | -e SKLEARN_SKIP_NETWORK_TESTS=1 \ 22 | -e OMP_NUM_THREADS=2 \ 23 | -e OPENBLAS_NUM_THREADS=2 \ 24 | --rm scikit-learn/minimal-windows \ 25 | powershell -Command "pytest --pyargs sklearn" 26 | fi 27 | -------------------------------------------------------------------------------- /build_tools/github/upload_anaconda.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | if [ "$GITHUB_EVENT_NAME" == "schedule" ]; then 7 | ANACONDA_ORG="scipy-wheels-nightly" 8 | ANACONDA_TOKEN="$SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN" 9 | else 10 | ANACONDA_ORG="scikit-learn-wheels-staging" 11 | ANACONDA_TOKEN="$SCIKIT_LEARN_STAGING_UPLOAD_TOKEN" 12 | fi 13 | 14 | # Install Python 3.8 because of a bug with Python 3.9 15 | export PATH=$CONDA/bin:$PATH 16 | conda create -n upload -y python=3.8 17 | source activate upload 18 | conda install -y anaconda-client 19 | 20 | # Force a replacement if the remote file already exists 21 | anaconda -t $ANACONDA_TOKEN upload --force -u $ANACONDA_ORG dist/artifact/* 22 | echo "Index: https://pypi.anaconda.org/$ANACONDA_ORG/simple" 23 | -------------------------------------------------------------------------------- /build_tools/shared.sh: -------------------------------------------------------------------------------- 1 | get_dep() { 2 | package="$1" 3 | version="$2" 4 | if [[ "$version" == "none" ]]; then 5 | # do not install with none 6 | echo 7 | elif [[ "${version%%[^0-9.]*}" ]]; then 8 | # version number is explicitly passed 9 | echo "$package==$version" 10 | elif [[ "$version" == "latest" ]]; then 11 | # use latest 12 | echo "$package" 13 | elif [[ "$version" == "min" ]]; then 14 | echo "$package==$(python sklearn/_min_dependencies.py $package)" 15 | fi 16 | } 17 | 18 | show_installed_libraries(){ 19 | # use conda list when inside a conda environment. conda list shows more 20 | # info than pip list, e.g. whether OpenBLAS or MKL is installed as well as 21 | # the version of OpenBLAS or MKL 22 | if [[ -n "$CONDA_PREFIX" ]]; then 23 | conda list 24 | else 25 | python -m pip list 26 | fi 27 | } 28 | -------------------------------------------------------------------------------- /build_tools/travis/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script is meant to be called by the "install" step 4 | # defined in the ".travis.yml" file. In particular, it is 5 | # important that we call to the right installation script. 6 | 7 | if [[ $BUILD_WHEEL == true ]]; then 8 | source build_tools/travis/install_wheels.sh || travis_terminate 1 9 | else 10 | source build_tools/travis/install_main.sh || travis_terminate 1 11 | fi 12 | -------------------------------------------------------------------------------- /build_tools/travis/install_wheels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python -m pip install cibuildwheel || travis_terminate $? 4 | python -m cibuildwheel --output-dir wheelhouse || travis_terminate $? 5 | -------------------------------------------------------------------------------- /build_tools/travis/script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script is meant to be called by the "script" step defined 4 | # in the ".travis.yml" file. While this step is forbidden by the 5 | # continuous deployment jobs, we have to execute the scripts for 6 | # testing the continuous integration jobs. 7 | 8 | if [[ $BUILD_WHEEL != true ]]; then 9 | # This trick will make Travis terminate the continuation of the pipeline 10 | bash build_tools/travis/test_script.sh || travis_terminate 1 11 | bash build_tools/travis/test_docs.sh || travis_terminate 1 12 | fi 13 | -------------------------------------------------------------------------------- /build_tools/travis/test_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [[ $TRAVIS_CPU_ARCH != arm64 ]]; then 6 | # Faster run of the documentation tests 7 | PYTEST="pytest -n $CPU_COUNT" make test-doc 8 | fi 9 | -------------------------------------------------------------------------------- /build_tools/travis/test_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | python --version 6 | python -c "import numpy; print(f'numpy {numpy.__version__}')" 7 | python -c "import scipy; print(f'scipy {scipy.__version__}')" 8 | python -c "\ 9 | try: 10 | import pandas 11 | print(f'pandas {pandas.__version__}') 12 | except ImportError: 13 | pass 14 | " 15 | python -c "import joblib; print(f'{joblib.cpu_count()} CPUs')" 16 | python -c "import platform; print(f'{platform.machine()}')" 17 | 18 | TEST_CMD="pytest --showlocals --durations=20 --pyargs" 19 | 20 | # Run the tests on the installed version 21 | mkdir -p $TEST_DIR 22 | 23 | # Copy "setup.cfg" for the test settings 24 | cp setup.cfg $TEST_DIR 25 | cd $TEST_DIR 26 | 27 | if [[ $TRAVIS_CPU_ARCH == arm64 ]]; then 28 | # Faster run of the source code tests 29 | TEST_CMD="$TEST_CMD -n $CPU_COUNT" 30 | 31 | # Remove the option to test the docstring 32 | sed -i -e 's/--doctest-modules//g' setup.cfg 33 | fi 34 | 35 | if [[ -n $CHECK_WARNINGS ]]; then 36 | TEST_CMD="$TEST_CMD -Werror::DeprecationWarning -Werror::FutureWarning" 37 | fi 38 | 39 | $TEST_CMD sklearn 40 | -------------------------------------------------------------------------------- /build_tools/travis/test_wheels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pip install --upgrade pip || travis_terminate $? 4 | pip install pytest pytest-xdist || travis_terminate $? 5 | 6 | # Test that there are no links to system libraries in the threadpoolctl 7 | # section of the show_versions output. 8 | python -c "import sklearn; sklearn.show_versions()" || travis_terminate $? 9 | python -m pytest -n $CPU_COUNT --pyargs sklearn || travis_terminate $? 10 | -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | # Even if empty this file is useful so that when running from the root folder 2 | # ./sklearn is added to sys.path by pytest. See 3 | # https://docs.pytest.org/en/latest/explanation/pythonpath.html for more 4 | # details. For example, this allows to build extensions in place and run pytest 5 | # doc/modules/clustering.rst and use sklearn from the local folder rather than 6 | # the one from site-packages. 7 | -------------------------------------------------------------------------------- /doc/README.md: -------------------------------------------------------------------------------- 1 | # Documentation for scikit-learn 2 | 3 | This directory contains the full manual and website as displayed at 4 | http://scikit-learn.org. See 5 | http://scikit-learn.org/dev/developers/contributing.html#documentation for 6 | detailed information about the documentation. 7 | -------------------------------------------------------------------------------- /doc/authors_emeritus.rst: -------------------------------------------------------------------------------- 1 | - Mathieu Blondel 2 | - Matthieu Brucher 3 | - Lars Buitinck 4 | - David Cournapeau 5 | - Noel Dawe 6 | - Vincent Dubourg 7 | - Edouard Duchesnay 8 | - Alexander Fabisch 9 | - Virgile Fritsch 10 | - Satrajit Ghosh 11 | - Angel Soler Gollonet 12 | - Chris Gorgolewski 13 | - Jaques Grobler 14 | - Brian Holt 15 | - Arnaud Joly 16 | - Thouis (Ray) Jones 17 | - Kyle Kastner 18 | - manoj kumar 19 | - Robert Layton 20 | - Wei Li 21 | - Paolo Losi 22 | - Gilles Louppe 23 | - Vincent Michel 24 | - Jarrod Millman 25 | - Alexandre Passos 26 | - Fabian Pedregosa 27 | - Peter Prettenhofer 28 | - (Venkat) Raghav, Rajagopalan 29 | - Jacob Schreiber 30 | - Du Shiqiao 31 | - Jake Vanderplas 32 | - David Warde-Farley 33 | - Ron Weiss 34 | -------------------------------------------------------------------------------- /doc/binder/requirements.txt: -------------------------------------------------------------------------------- 1 | # A binder requirement file is required by sphinx-gallery. 2 | # We don't really need one since our binder requirement file lives in the 3 | # .binder directory. 4 | # This file can be removed if 'dependencies' is made an optional key for 5 | # binder in sphinx-gallery. 6 | -------------------------------------------------------------------------------- /doc/communication_team.rst: -------------------------------------------------------------------------------- 1 | .. raw :: html 2 | 3 | 4 |
5 | 8 |
9 |
10 |

Reshama Shaikh

11 |
12 |
13 |
14 |

Lauren Burke

15 |
16 |
17 | -------------------------------------------------------------------------------- /doc/computing.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | ============================ 6 | Computing with scikit-learn 7 | ============================ 8 | 9 | .. include:: includes/big_toc_css.rst 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | computing/scaling_strategies 15 | computing/computational_performance 16 | computing/parallelism 17 | -------------------------------------------------------------------------------- /doc/contents.rst: -------------------------------------------------------------------------------- 1 | .. include:: includes/big_toc_css.rst 2 | .. include:: tune_toc.rst 3 | 4 | .. Places global toc into the sidebar 5 | 6 | :globalsidebartoc: True 7 | 8 | ================= 9 | Table Of Contents 10 | ================= 11 | 12 | .. Define an order for the Table of Contents: 13 | 14 | .. toctree:: 15 | :maxdepth: 2 16 | 17 | preface 18 | tutorial/index 19 | getting_started 20 | user_guide 21 | glossary 22 | auto_examples/index 23 | modules/classes 24 | developers/index 25 | -------------------------------------------------------------------------------- /doc/datasets/real_world.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. _real_world_datasets: 6 | 7 | Real world datasets 8 | =================== 9 | 10 | .. currentmodule:: sklearn.datasets 11 | 12 | scikit-learn provides tools to load larger datasets, downloading them if 13 | necessary. 14 | 15 | They can be loaded using the following functions: 16 | 17 | .. autosummary:: 18 | 19 | fetch_olivetti_faces 20 | fetch_20newsgroups 21 | fetch_20newsgroups_vectorized 22 | fetch_lfw_people 23 | fetch_lfw_pairs 24 | fetch_covtype 25 | fetch_rcv1 26 | fetch_kddcup99 27 | fetch_california_housing 28 | 29 | .. include:: ../../sklearn/datasets/descr/olivetti_faces.rst 30 | 31 | .. include:: ../../sklearn/datasets/descr/twenty_newsgroups.rst 32 | 33 | .. include:: ../../sklearn/datasets/descr/lfw.rst 34 | 35 | .. include:: ../../sklearn/datasets/descr/covtype.rst 36 | 37 | .. include:: ../../sklearn/datasets/descr/rcv1.rst 38 | 39 | .. include:: ../../sklearn/datasets/descr/kddcup99.rst 40 | 41 | .. include:: ../../sklearn/datasets/descr/california_housing.rst 42 | -------------------------------------------------------------------------------- /doc/datasets/toy_dataset.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. _toy_datasets: 6 | 7 | Toy datasets 8 | ============ 9 | 10 | .. currentmodule:: sklearn.datasets 11 | 12 | scikit-learn comes with a few small standard datasets that do not require to 13 | download any file from some external website. 14 | 15 | They can be loaded using the following functions: 16 | 17 | .. autosummary:: 18 | 19 | load_boston 20 | load_iris 21 | load_diabetes 22 | load_digits 23 | load_linnerud 24 | load_wine 25 | load_breast_cancer 26 | 27 | These datasets are useful to quickly illustrate the behavior of the 28 | various algorithms implemented in scikit-learn. They are however often too 29 | small to be representative of real world machine learning tasks. 30 | 31 | .. include:: ../../sklearn/datasets/descr/boston_house_prices.rst 32 | 33 | .. include:: ../../sklearn/datasets/descr/iris.rst 34 | 35 | .. include:: ../../sklearn/datasets/descr/diabetes.rst 36 | 37 | .. include:: ../../sklearn/datasets/descr/digits.rst 38 | 39 | .. include:: ../../sklearn/datasets/descr/linnerud.rst 40 | 41 | .. include:: ../../sklearn/datasets/descr/wine_data.rst 42 | 43 | .. include:: ../../sklearn/datasets/descr/breast_cancer.rst 44 | -------------------------------------------------------------------------------- /doc/developers/index.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. _developers_guide: 6 | 7 | ================= 8 | Developer's Guide 9 | ================= 10 | 11 | .. include:: ../includes/big_toc_css.rst 12 | .. include:: ../tune_toc.rst 13 | 14 | .. toctree:: 15 | 16 | contributing 17 | minimal_reproducer 18 | develop 19 | tips 20 | utilities 21 | performance 22 | advanced_installation 23 | bug_triaging 24 | maintainer 25 | plotting 26 | -------------------------------------------------------------------------------- /doc/images/axa-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/axa-small.png -------------------------------------------------------------------------------- /doc/images/axa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/axa.png -------------------------------------------------------------------------------- /doc/images/bcg-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/bcg-small.png -------------------------------------------------------------------------------- /doc/images/bcg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/bcg.png -------------------------------------------------------------------------------- /doc/images/bnp-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/bnp-small.png -------------------------------------------------------------------------------- /doc/images/bnp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/bnp.png -------------------------------------------------------------------------------- /doc/images/cds-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/cds-logo.png -------------------------------------------------------------------------------- /doc/images/columbia-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/columbia-small.png -------------------------------------------------------------------------------- /doc/images/columbia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/columbia.png -------------------------------------------------------------------------------- /doc/images/dataiku-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/dataiku-small.png -------------------------------------------------------------------------------- /doc/images/dataiku.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/dataiku.png -------------------------------------------------------------------------------- /doc/images/digicosme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/digicosme.png -------------------------------------------------------------------------------- /doc/images/dysco.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/dysco.png -------------------------------------------------------------------------------- /doc/images/fnrs-logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/fnrs-logo-small.png -------------------------------------------------------------------------------- /doc/images/fujitsu-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/fujitsu-small.png -------------------------------------------------------------------------------- /doc/images/fujitsu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/fujitsu.png -------------------------------------------------------------------------------- /doc/images/google-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/google-small.png -------------------------------------------------------------------------------- /doc/images/grid_search_cross_validation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/grid_search_cross_validation.png -------------------------------------------------------------------------------- /doc/images/grid_search_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/grid_search_workflow.png -------------------------------------------------------------------------------- /doc/images/huggingface_logo-noborder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/huggingface_logo-noborder.png -------------------------------------------------------------------------------- /doc/images/inria-logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/inria-logo.jpg -------------------------------------------------------------------------------- /doc/images/inria-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/inria-small.png -------------------------------------------------------------------------------- /doc/images/intel-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/intel-small.png -------------------------------------------------------------------------------- /doc/images/intel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/intel.png -------------------------------------------------------------------------------- /doc/images/iris.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/iris.pdf -------------------------------------------------------------------------------- /doc/images/last_digit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/last_digit.png -------------------------------------------------------------------------------- /doc/images/lda_model_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/lda_model_graph.png -------------------------------------------------------------------------------- /doc/images/logo_APHP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/logo_APHP.png -------------------------------------------------------------------------------- /doc/images/logo_APHP_text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/logo_APHP_text.png -------------------------------------------------------------------------------- /doc/images/microsoft-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/microsoft-small.png -------------------------------------------------------------------------------- /doc/images/microsoft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/microsoft.png -------------------------------------------------------------------------------- /doc/images/ml_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/ml_map.png -------------------------------------------------------------------------------- /doc/images/multi_org_chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/multi_org_chart.png -------------------------------------------------------------------------------- /doc/images/multilayerperceptron_network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/multilayerperceptron_network.png -------------------------------------------------------------------------------- /doc/images/no_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/no_image.png -------------------------------------------------------------------------------- /doc/images/nvidia-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/nvidia-small.png -------------------------------------------------------------------------------- /doc/images/nvidia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/nvidia.png -------------------------------------------------------------------------------- /doc/images/nyu_short_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/nyu_short_color.png -------------------------------------------------------------------------------- /doc/images/plot_digits_classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/plot_digits_classification.png -------------------------------------------------------------------------------- /doc/images/plot_face_recognition_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/plot_face_recognition_1.png -------------------------------------------------------------------------------- /doc/images/plot_face_recognition_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/plot_face_recognition_2.png -------------------------------------------------------------------------------- /doc/images/png-logo-inria-la-fondation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/png-logo-inria-la-fondation.png -------------------------------------------------------------------------------- /doc/images/quansight-labs-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/quansight-labs-small.png -------------------------------------------------------------------------------- /doc/images/quansight-labs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/quansight-labs.png -------------------------------------------------------------------------------- /doc/images/rbm_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/rbm_graph.png -------------------------------------------------------------------------------- /doc/images/scikit-learn-logo-notext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/scikit-learn-logo-notext.png -------------------------------------------------------------------------------- /doc/images/scikit-learn-logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/scikit-learn-logo-small.png -------------------------------------------------------------------------------- /doc/images/sloan_banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/sloan_banner.png -------------------------------------------------------------------------------- /doc/images/sloan_logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/sloan_logo-small.png -------------------------------------------------------------------------------- /doc/images/sydney-primary.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/sydney-primary.jpeg -------------------------------------------------------------------------------- /doc/images/sydney-stacked-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/sydney-stacked-small.png -------------------------------------------------------------------------------- /doc/images/telecom-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/telecom-small.png -------------------------------------------------------------------------------- /doc/images/telecom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/telecom.png -------------------------------------------------------------------------------- /doc/images/visual-studio-build-tools-selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/images/visual-studio-build-tools-selection.png -------------------------------------------------------------------------------- /doc/includes/big_toc_css.rst: -------------------------------------------------------------------------------- 1 | .. 2 | File to ..include in a document with a big table of content, to give 3 | it 'style' 4 | 5 | .. raw:: html 6 | 7 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /doc/inspection.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. include:: includes/big_toc_css.rst 6 | 7 | .. _inspection: 8 | 9 | Inspection 10 | ---------- 11 | 12 | Predictive performance is often the main goal of developing machine learning 13 | models. Yet summarising performance with an evaluation metric is often 14 | insufficient: it assumes that the evaluation metric and test dataset 15 | perfectly reflect the target domain, which is rarely true. In certain domains, 16 | a model needs a certain level of interpretability before it can be deployed. 17 | A model that is exhibiting performance issues needs to be debugged for one to 18 | understand the model's underlying issue. The 19 | :mod:`sklearn.inspection` module provides tools to help understand the 20 | predictions from a model and what affects them. This can be used to 21 | evaluate assumptions and biases of a model, design a better model, or 22 | to diagnose issues with model performance. 23 | 24 | .. topic:: Examples: 25 | 26 | * :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` 27 | 28 | .. toctree:: 29 | 30 | modules/partial_dependence 31 | modules/permutation_importance 32 | -------------------------------------------------------------------------------- /doc/logos/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/logos/favicon.ico -------------------------------------------------------------------------------- /doc/logos/identity.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/logos/identity.pdf -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo-notext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/logos/scikit-learn-logo-notext.png -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/logos/scikit-learn-logo-small.png -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo-thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/logos/scikit-learn-logo-thumb.png -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/logos/scikit-learn-logo.bmp -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/logos/scikit-learn-logo.png -------------------------------------------------------------------------------- /doc/model_selection.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. include:: includes/big_toc_css.rst 6 | 7 | .. _model_selection: 8 | 9 | Model selection and evaluation 10 | ------------------------------ 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | modules/cross_validation 16 | modules/grid_search 17 | modules/model_evaluation 18 | modules/learning_curve 19 | -------------------------------------------------------------------------------- /doc/modules/glm_data/lasso_enet_coordinate_descent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/modules/glm_data/lasso_enet_coordinate_descent.png -------------------------------------------------------------------------------- /doc/modules/glm_data/poisson_gamma_tweedie_distributions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/modules/glm_data/poisson_gamma_tweedie_distributions.png -------------------------------------------------------------------------------- /doc/modules/pipeline.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. raw:: html 4 | 5 | 6 | 9 | 10 | This content is now at :ref:`combining_estimators`. 11 | -------------------------------------------------------------------------------- /doc/preface.rst: -------------------------------------------------------------------------------- 1 | .. This helps define the TOC ordering for "about us" sections. Particularly 2 | useful for PDF output as this section is not linked from elsewhere. 3 | 4 | .. Places global toc into the sidebar 5 | 6 | :globalsidebartoc: True 7 | 8 | .. _preface_menu: 9 | 10 | .. include:: includes/big_toc_css.rst 11 | .. include:: tune_toc.rst 12 | 13 | ======================= 14 | Welcome to scikit-learn 15 | ======================= 16 | 17 | | 18 | 19 | .. toctree:: 20 | :maxdepth: 2 21 | 22 | install 23 | faq 24 | support 25 | related_projects 26 | about 27 | testimonials/testimonials 28 | whats_new 29 | roadmap 30 | governance 31 | 32 | | 33 | -------------------------------------------------------------------------------- /doc/sphinxext/MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include tests *.py 2 | include *.txt 3 | -------------------------------------------------------------------------------- /doc/supervised_learning.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. include:: includes/big_toc_css.rst 6 | 7 | .. _supervised-learning: 8 | 9 | Supervised learning 10 | ------------------- 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | modules/linear_model 16 | modules/lda_qda.rst 17 | modules/kernel_ridge.rst 18 | modules/svm 19 | modules/sgd 20 | modules/neighbors 21 | modules/gaussian_process 22 | modules/cross_decomposition.rst 23 | modules/naive_bayes 24 | modules/tree 25 | modules/ensemble 26 | modules/multiclass 27 | modules/feature_selection.rst 28 | modules/semi_supervised.rst 29 | modules/isotonic.rst 30 | modules/calibration.rst 31 | modules/neural_networks_supervised 32 | -------------------------------------------------------------------------------- /doc/templates/class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /doc/templates/class_with_call.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}=============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __call__ 10 | {% endblock %} 11 | 12 | .. include:: {{module}}.{{objname}}.examples 13 | 14 | .. raw:: html 15 | 16 |
17 | -------------------------------------------------------------------------------- /doc/templates/deprecated_class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autoclass:: {{ objname }} 14 | 15 | {% block methods %} 16 | .. automethod:: __init__ 17 | {% endblock %} 18 | 19 | .. include:: {{module}}.{{objname}}.examples 20 | 21 | .. raw:: html 22 | 23 |
24 | -------------------------------------------------------------------------------- /doc/templates/deprecated_class_with_call.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}=============== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autoclass:: {{ objname }} 14 | 15 | {% block methods %} 16 | .. automethod:: __init__ 17 | .. automethod:: __call__ 18 | {% endblock %} 19 | 20 | .. include:: {{module}}.{{objname}}.examples 21 | 22 | .. raw:: html 23 | 24 |
25 | -------------------------------------------------------------------------------- /doc/templates/deprecated_class_without_init.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autoclass:: {{ objname }} 14 | 15 | .. include:: {{module}}.{{objname}}.examples 16 | 17 | .. raw:: html 18 | 19 |
20 | -------------------------------------------------------------------------------- /doc/templates/deprecated_function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autofunction:: {{ objname }} 14 | 15 | .. include:: {{module}}.{{objname}}.examples 16 | 17 | .. raw:: html 18 | 19 |
20 | -------------------------------------------------------------------------------- /doc/templates/function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /doc/templates/generate_deprecated.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for f in [^d]*; do (head -n2 < $f; echo ' 3 | .. meta:: 4 | :robots: noindex 5 | 6 | .. warning:: 7 | **DEPRECATED** 8 | '; tail -n+3 $f) > deprecated_$f; done 9 | -------------------------------------------------------------------------------- /doc/templates/numpydoc_docstring.rst: -------------------------------------------------------------------------------- 1 | {{index}} 2 | {{summary}} 3 | {{extended_summary}} 4 | {{parameters}} 5 | {{returns}} 6 | {{yields}} 7 | {{other_parameters}} 8 | {{attributes}} 9 | {{raises}} 10 | {{warns}} 11 | {{warnings}} 12 | {{see_also}} 13 | {{notes}} 14 | {{references}} 15 | {{examples}} 16 | {{methods}} 17 | -------------------------------------------------------------------------------- /doc/templates/redirects.html: -------------------------------------------------------------------------------- 1 | {% set redirect = pathto(redirects[pagename]) %} 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | scikit-learn: machine learning in Python 11 | 12 | 13 |

You will be automatically redirected to the new location of this page.

14 | 15 | 16 | -------------------------------------------------------------------------------- /doc/testimonials/README.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | To find the list of people we contacted, see: 4 | https://docs.google.com/spreadsheet/ccc?key=0AhGnAxuBDhjmdDYwNzlZVE5SMkFsMjNBbGlaWkpNZ1E&usp=sharing 5 | 6 | To obtain access to this file, send an email to: 7 | nelle dot varoquaux at gmail dot com 8 | 9 | -------------------------------------------------------------------------------- /doc/testimonials/images/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/Makefile -------------------------------------------------------------------------------- /doc/testimonials/images/aweber.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/aweber.png -------------------------------------------------------------------------------- /doc/testimonials/images/bestofmedia-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/bestofmedia-logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/betaworks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/betaworks.png -------------------------------------------------------------------------------- /doc/testimonials/images/birchbox.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/birchbox.jpg -------------------------------------------------------------------------------- /doc/testimonials/images/bnp_paribas_cardif.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/bnp_paribas_cardif.png -------------------------------------------------------------------------------- /doc/testimonials/images/booking.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/booking.png -------------------------------------------------------------------------------- /doc/testimonials/images/change-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/change-logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/dataiku_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/dataiku_logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/datapublica.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/datapublica.png -------------------------------------------------------------------------------- /doc/testimonials/images/datarobot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/datarobot.png -------------------------------------------------------------------------------- /doc/testimonials/images/evernote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/evernote.png -------------------------------------------------------------------------------- /doc/testimonials/images/howaboutwe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/howaboutwe.png -------------------------------------------------------------------------------- /doc/testimonials/images/huggingface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/huggingface.png -------------------------------------------------------------------------------- /doc/testimonials/images/infonea.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/infonea.jpg -------------------------------------------------------------------------------- /doc/testimonials/images/inria.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/inria.png -------------------------------------------------------------------------------- /doc/testimonials/images/jpmorgan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/jpmorgan.png -------------------------------------------------------------------------------- /doc/testimonials/images/lovely.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/lovely.png -------------------------------------------------------------------------------- /doc/testimonials/images/machinalis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/machinalis.png -------------------------------------------------------------------------------- /doc/testimonials/images/mars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/mars.png -------------------------------------------------------------------------------- /doc/testimonials/images/okcupid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/okcupid.png -------------------------------------------------------------------------------- /doc/testimonials/images/ottogroup_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/ottogroup_logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/peerindex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/peerindex.png -------------------------------------------------------------------------------- /doc/testimonials/images/phimeca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/phimeca.png -------------------------------------------------------------------------------- /doc/testimonials/images/rangespan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/rangespan.png -------------------------------------------------------------------------------- /doc/testimonials/images/solido_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/solido_logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/spotify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/spotify.png -------------------------------------------------------------------------------- /doc/testimonials/images/telecomparistech.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/telecomparistech.jpg -------------------------------------------------------------------------------- /doc/testimonials/images/yhat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/yhat.png -------------------------------------------------------------------------------- /doc/testimonials/images/zopa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/doc/testimonials/images/zopa.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn-modern/search.html: -------------------------------------------------------------------------------- 1 | {%- extends "basic/search.html" %} 2 | {% block extrahead %} 3 | 4 | 5 | 6 | 7 | 8 | {% endblock %} 9 | -------------------------------------------------------------------------------- /doc/themes/scikit-learn-modern/theme.conf: -------------------------------------------------------------------------------- 1 | [theme] 2 | inherit = basic 3 | pygments_style = default 4 | stylesheet = css/theme.css 5 | 6 | [options] 7 | google_analytics = true 8 | mathjax_path = 9 | -------------------------------------------------------------------------------- /doc/tutorial/common_includes/info.txt: -------------------------------------------------------------------------------- 1 | Meant to share common RST file snippets that we want to reuse by inclusion 2 | in the real tutorial in order to lower the maintenance burden 3 | of redundant sections. 4 | -------------------------------------------------------------------------------- /doc/tutorial/index.rst: -------------------------------------------------------------------------------- 1 | .. Places global toc into the sidebar 2 | 3 | :globalsidebartoc: True 4 | 5 | .. _tutorial_menu: 6 | 7 | 8 | .. include:: ../includes/big_toc_css.rst 9 | .. include:: ../tune_toc.rst 10 | 11 | ====================== 12 | scikit-learn Tutorials 13 | ====================== 14 | 15 | | 16 | 17 | .. toctree:: 18 | :maxdepth: 2 19 | 20 | basic/tutorial.rst 21 | statistical_inference/index.rst 22 | text_analytics/working_with_text_data.rst 23 | machine_learning_map/index 24 | ../presentations 25 | 26 | | 27 | 28 | .. note:: **Doctest Mode** 29 | 30 | The code-examples in the above tutorials are written in a 31 | *python-console* format. If you wish to easily execute these examples 32 | in **IPython**, use:: 33 | 34 | %doctest_mode 35 | 36 | in the IPython-console. You can then simply copy and paste the examples 37 | directly into IPython without having to worry about removing the **>>>** 38 | manually. 39 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/.gitignore: -------------------------------------------------------------------------------- 1 | # cruft 2 | .*.swp 3 | *.pyc 4 | .DS_Store 5 | *.pdf 6 | 7 | # folder to be used for working on the exercises 8 | workspace 9 | 10 | # output of the sphinx build of the documentation 11 | tutorial/_build 12 | 13 | # datasets to be fetched from the web and cached locally 14 | data/twenty_newsgroups/20news-bydate.tar.gz 15 | data/twenty_newsgroups/20news-bydate-train 16 | data/twenty_newsgroups/20news-bydate-test 17 | 18 | data/movie_reviews/txt_sentoken 19 | data/movie_reviews/poldata.README.2.0 20 | 21 | data/languages/paragraphs 22 | data/languages/short_paragraphs 23 | data/languages/html 24 | 25 | data/labeled_faces_wild/lfw_preprocessed/ 26 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/data/movie_reviews/fetch_data.py: -------------------------------------------------------------------------------- 1 | """Script to download the movie review dataset""" 2 | 3 | import os 4 | import tarfile 5 | from contextlib import closing 6 | from urllib.request import urlopen 7 | 8 | 9 | URL = ("http://www.cs.cornell.edu/people/pabo/" 10 | "movie-review-data/review_polarity.tar.gz") 11 | 12 | ARCHIVE_NAME = URL.rsplit('/', 1)[1] 13 | DATA_FOLDER = "txt_sentoken" 14 | 15 | 16 | if not os.path.exists(DATA_FOLDER): 17 | 18 | if not os.path.exists(ARCHIVE_NAME): 19 | print("Downloading dataset from %s (3 MB)" % URL) 20 | opener = urlopen(URL) 21 | with open(ARCHIVE_NAME, 'wb') as archive: 22 | archive.write(opener.read()) 23 | 24 | print("Decompressing %s" % ARCHIVE_NAME) 25 | with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive: 26 | archive.extractall(path='.') 27 | os.remove(ARCHIVE_NAME) 28 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/data/twenty_newsgroups/fetch_data.py: -------------------------------------------------------------------------------- 1 | """Script to download the 20 newsgroups text classification set""" 2 | 3 | import os 4 | import tarfile 5 | from contextlib import closing 6 | from urllib.request import urlopen 7 | 8 | URL = ("http://people.csail.mit.edu/jrennie/" 9 | "20Newsgroups/20news-bydate.tar.gz") 10 | 11 | ARCHIVE_NAME = URL.rsplit('/', 1)[1] 12 | TRAIN_FOLDER = "20news-bydate-train" 13 | TEST_FOLDER = "20news-bydate-test" 14 | 15 | 16 | if not os.path.exists(TRAIN_FOLDER) or not os.path.exists(TEST_FOLDER): 17 | 18 | if not os.path.exists(ARCHIVE_NAME): 19 | print("Downloading dataset from %s (14 MB)" % URL) 20 | opener = urlopen(URL) 21 | with open(ARCHIVE_NAME, 'wb') as archive: 22 | archive.write(opener.read()) 23 | 24 | print("Decompressing %s" % ARCHIVE_NAME) 25 | with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive: 26 | archive.extractall(path='.') 27 | os.remove(ARCHIVE_NAME) 28 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/solutions/generate_skeletons.py: -------------------------------------------------------------------------------- 1 | """Generate skeletons from the example code""" 2 | import os 3 | 4 | exercise_dir = os.path.dirname(__file__) 5 | if exercise_dir == '': 6 | exercise_dir = '.' 7 | 8 | skeleton_dir = os.path.abspath(os.path.join(exercise_dir, '..', 'skeletons')) 9 | if not os.path.exists(skeleton_dir): 10 | os.makedirs(skeleton_dir) 11 | 12 | solutions = os.listdir(exercise_dir) 13 | 14 | for f in solutions: 15 | if not f.endswith('.py'): 16 | continue 17 | 18 | if f == os.path.basename(__file__): 19 | continue 20 | 21 | print("Generating skeleton for %s" % f) 22 | 23 | input_file = open(os.path.join(exercise_dir, f)) 24 | output_file = open(os.path.join(skeleton_dir, f), 'w') 25 | 26 | in_exercise_region = False 27 | 28 | for line in input_file: 29 | linestrip = line.strip() 30 | if len(linestrip) == 0: 31 | in_exercise_region = False 32 | elif linestrip.startswith('# TASK:'): 33 | in_exercise_region = True 34 | 35 | if not in_exercise_region or linestrip.startswith('#'): 36 | output_file.write(line) 37 | 38 | output_file.close() 39 | -------------------------------------------------------------------------------- /doc/unsupervised_learning.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. include:: includes/big_toc_css.rst 6 | 7 | .. _unsupervised-learning: 8 | 9 | Unsupervised learning 10 | ----------------------- 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | modules/mixture 16 | modules/manifold 17 | modules/clustering 18 | modules/biclustering 19 | modules/decomposition 20 | modules/covariance 21 | modules/outlier_detection 22 | modules/density 23 | modules/neural_networks_unsupervised 24 | -------------------------------------------------------------------------------- /doc/user_guide.rst: -------------------------------------------------------------------------------- 1 | .. Places parent toc into the sidebar 2 | 3 | :parenttoc: True 4 | 5 | .. title:: User guide: contents 6 | 7 | .. _user_guide: 8 | 9 | ========== 10 | User Guide 11 | ========== 12 | 13 | .. include:: includes/big_toc_css.rst 14 | 15 | .. nice layout in the toc 16 | 17 | .. include:: tune_toc.rst 18 | 19 | .. toctree:: 20 | :numbered: 21 | :maxdepth: 3 22 | 23 | supervised_learning.rst 24 | unsupervised_learning.rst 25 | model_selection.rst 26 | inspection.rst 27 | visualizations.rst 28 | data_transforms.rst 29 | datasets.rst 30 | computing.rst 31 | model_persistence.rst 32 | common_pitfalls.rst 33 | -------------------------------------------------------------------------------- /doc/whats_new.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: sklearn 2 | .. include:: whats_new/_contributors.rst 3 | 4 | Release History 5 | =============== 6 | 7 | Release notes for all scikit-learn releases are linked in this page. 8 | 9 | **Tip:** `Subscribe to scikit-learn releases `__ 10 | on libraries.io to be notified when new versions are released. 11 | 12 | .. toctree:: 13 | :maxdepth: 1 14 | 15 | Version 1.1 16 | Version 1.0 17 | Version 0.24 18 | Version 0.23 19 | Version 0.22 20 | Version 0.21 21 | Version 0.20 22 | Version 0.19 23 | Version 0.18 24 | Version 0.17 25 | Version 0.16 26 | Version 0.15 27 | Version 0.14 28 | Version 0.13 29 | Older Versions 30 | -------------------------------------------------------------------------------- /doc/whats_new/changelog_legend.inc: -------------------------------------------------------------------------------- 1 | Legend for changelogs 2 | --------------------- 3 | 4 | - |MajorFeature|: something big that you couldn't do before. 5 | - |Feature|: something that you couldn't do before. 6 | - |Efficiency|: an existing feature now may not require as much computation or 7 | memory. 8 | - |Enhancement|: a miscellaneous minor improvement. 9 | - |Fix|: something that previously didn't work as documentated -- or according 10 | to reasonable expectations -- should now work. 11 | - |API|: you will need to change your code to have the same effect in the 12 | future; or a feature will be removed in the future. 13 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _general_examples: 2 | 3 | Examples 4 | ======== 5 | -------------------------------------------------------------------------------- /examples/applications/README.txt: -------------------------------------------------------------------------------- 1 | .. _realworld_examples: 2 | 3 | Examples based on real world datasets 4 | ------------------------------------- 5 | 6 | Applications to real world problems with some medium sized datasets or 7 | interactive user interface. 8 | -------------------------------------------------------------------------------- /examples/bicluster/README.txt: -------------------------------------------------------------------------------- 1 | .. _bicluster_examples: 2 | 3 | Biclustering 4 | ------------ 5 | 6 | Examples concerning the :mod:`sklearn.cluster.bicluster` module. 7 | -------------------------------------------------------------------------------- /examples/calibration/README.txt: -------------------------------------------------------------------------------- 1 | .. _calibration_examples: 2 | 3 | Calibration 4 | ----------------------- 5 | 6 | Examples illustrating the calibration of predicted probabilities of classifiers. 7 | -------------------------------------------------------------------------------- /examples/classification/README.txt: -------------------------------------------------------------------------------- 1 | .. _classification_examples: 2 | 3 | Classification 4 | ----------------------- 5 | 6 | General examples about classification algorithms. 7 | -------------------------------------------------------------------------------- /examples/cluster/README.txt: -------------------------------------------------------------------------------- 1 | .. _cluster_examples: 2 | 3 | Clustering 4 | ---------- 5 | 6 | Examples concerning the :mod:`sklearn.cluster` module. 7 | -------------------------------------------------------------------------------- /examples/compose/README.txt: -------------------------------------------------------------------------------- 1 | .. _compose_examples: 2 | 3 | Pipelines and composite estimators 4 | ---------------------------------- 5 | 6 | Examples of how to compose transformers and pipelines from other estimators. See the :ref:`User Guide `. 7 | -------------------------------------------------------------------------------- /examples/covariance/README.txt: -------------------------------------------------------------------------------- 1 | .. _covariance_examples: 2 | 3 | Covariance estimation 4 | --------------------- 5 | 6 | Examples concerning the :mod:`sklearn.covariance` module. 7 | -------------------------------------------------------------------------------- /examples/cross_decomposition/README.txt: -------------------------------------------------------------------------------- 1 | .. _cross_decomposition_examples: 2 | 3 | Cross decomposition 4 | ------------------- 5 | 6 | Examples concerning the :mod:`sklearn.cross_decomposition` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/datasets/README.txt: -------------------------------------------------------------------------------- 1 | .. _dataset_examples: 2 | 3 | Dataset examples 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.datasets` module. 7 | -------------------------------------------------------------------------------- /examples/datasets/plot_digits_last_image.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ========================================================= 4 | The Digit Dataset 5 | ========================================================= 6 | 7 | This dataset is made up of 1797 8x8 images. Each image, 8 | like the one shown below, is of a hand-written digit. 9 | In order to utilize an 8x8 figure like this, we'd have to 10 | first transform it into a feature vector with length 64. 11 | 12 | See `here 13 | `_ 14 | for more information about this dataset. 15 | 16 | """ 17 | 18 | # Code source: Gaël Varoquaux 19 | # Modified for documentation by Jaques Grobler 20 | # License: BSD 3 clause 21 | 22 | from sklearn import datasets 23 | 24 | import matplotlib.pyplot as plt 25 | 26 | # Load the digits dataset 27 | digits = datasets.load_digits() 28 | 29 | # Display the last digit 30 | plt.figure(1, figsize=(3, 3)) 31 | plt.imshow(digits.images[-1], cmap=plt.cm.gray_r, interpolation="nearest") 32 | plt.show() 33 | -------------------------------------------------------------------------------- /examples/decomposition/README.txt: -------------------------------------------------------------------------------- 1 | .. _decomposition_examples: 2 | 3 | Decomposition 4 | ------------- 5 | 6 | Examples concerning the :mod:`sklearn.decomposition` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/decomposition/plot_beta_divergence.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================== 3 | Beta-divergence loss functions 4 | ============================== 5 | 6 | A plot that compares the various Beta-divergence loss functions supported by 7 | the Multiplicative-Update ('mu') solver in :class:`~sklearn.decomposition.NMF`. 8 | 9 | """ 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | from sklearn.decomposition._nmf import _beta_divergence 14 | 15 | x = np.linspace(0.001, 4, 1000) 16 | y = np.zeros(x.shape) 17 | 18 | colors = "mbgyr" 19 | for j, beta in enumerate((0.0, 0.5, 1.0, 1.5, 2.0)): 20 | for i, xi in enumerate(x): 21 | y[i] = _beta_divergence(1, xi, 1, beta) 22 | name = "beta = %1.1f" % beta 23 | plt.plot(x, y, label=name, color=colors[j]) 24 | 25 | plt.xlabel("x") 26 | plt.title("beta-divergence(1, x)") 27 | plt.legend(loc=0) 28 | plt.axis([0, 4, 0, 3]) 29 | plt.show() 30 | -------------------------------------------------------------------------------- /examples/ensemble/README.txt: -------------------------------------------------------------------------------- 1 | .. _ensemble_examples: 2 | 3 | Ensemble methods 4 | ---------------- 5 | 6 | Examples concerning the :mod:`sklearn.ensemble` module. 7 | -------------------------------------------------------------------------------- /examples/exercises/README.txt: -------------------------------------------------------------------------------- 1 | Tutorial exercises 2 | ------------------ 3 | 4 | Exercises for the tutorials 5 | -------------------------------------------------------------------------------- /examples/exercises/plot_digits_classification_exercise.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================ 3 | Digits Classification Exercise 4 | ================================ 5 | 6 | A tutorial exercise regarding the use of classification techniques on 7 | the Digits dataset. 8 | 9 | This exercise is used in the :ref:`clf_tut` part of the 10 | :ref:`supervised_learning_tut` section of the 11 | :ref:`stat_learn_tut_index`. 12 | 13 | """ 14 | 15 | from sklearn import datasets, neighbors, linear_model 16 | 17 | X_digits, y_digits = datasets.load_digits(return_X_y=True) 18 | X_digits = X_digits / X_digits.max() 19 | 20 | n_samples = len(X_digits) 21 | 22 | X_train = X_digits[: int(0.9 * n_samples)] 23 | y_train = y_digits[: int(0.9 * n_samples)] 24 | X_test = X_digits[int(0.9 * n_samples) :] 25 | y_test = y_digits[int(0.9 * n_samples) :] 26 | 27 | knn = neighbors.KNeighborsClassifier() 28 | logistic = linear_model.LogisticRegression(max_iter=1000) 29 | 30 | print("KNN score: %f" % knn.fit(X_train, y_train).score(X_test, y_test)) 31 | print( 32 | "LogisticRegression score: %f" 33 | % logistic.fit(X_train, y_train).score(X_test, y_test) 34 | ) 35 | -------------------------------------------------------------------------------- /examples/feature_selection/README.txt: -------------------------------------------------------------------------------- 1 | .. _feature_selection_examples: 2 | 3 | Feature Selection 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.feature_selection` module. 7 | -------------------------------------------------------------------------------- /examples/feature_selection/plot_rfe_digits.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================= 3 | Recursive feature elimination 4 | ============================= 5 | 6 | A recursive feature elimination example showing the relevance of pixels in 7 | a digit classification task. 8 | 9 | .. note:: 10 | 11 | See also :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py` 12 | 13 | """ # noqa: E501 14 | 15 | from sklearn.svm import SVC 16 | from sklearn.datasets import load_digits 17 | from sklearn.feature_selection import RFE 18 | import matplotlib.pyplot as plt 19 | 20 | # Load the digits dataset 21 | digits = load_digits() 22 | X = digits.images.reshape((len(digits.images), -1)) 23 | y = digits.target 24 | 25 | # Create the RFE object and rank each pixel 26 | svc = SVC(kernel="linear", C=1) 27 | rfe = RFE(estimator=svc, n_features_to_select=1, step=1) 28 | rfe.fit(X, y) 29 | ranking = rfe.ranking_.reshape(digits.images[0].shape) 30 | 31 | # Plot pixel ranking 32 | plt.matshow(ranking, cmap=plt.cm.Blues) 33 | plt.colorbar() 34 | plt.title("Ranking of pixels with RFE") 35 | plt.show() 36 | -------------------------------------------------------------------------------- /examples/gaussian_process/README.txt: -------------------------------------------------------------------------------- 1 | .. _gaussian_process_examples: 2 | 3 | Gaussian Process for Machine Learning 4 | ------------------------------------- 5 | 6 | Examples concerning the :mod:`sklearn.gaussian_process` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/impute/README.txt: -------------------------------------------------------------------------------- 1 | .. _impute_examples: 2 | 3 | Missing Value Imputation 4 | ------------------------ 5 | 6 | Examples concerning the :mod:`sklearn.impute` module. 7 | -------------------------------------------------------------------------------- /examples/inspection/README.txt: -------------------------------------------------------------------------------- 1 | .. _inspection_examples: 2 | 3 | Inspection 4 | ---------- 5 | 6 | Examples related to the :mod:`sklearn.inspection` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/kernel_approximation/README.txt: -------------------------------------------------------------------------------- 1 | .. _kernel_approximation_examples: 2 | 3 | Kernel Approximation 4 | -------------------- 5 | 6 | Examples concerning the :mod:`sklearn.kernel_approximation` module. 7 | -------------------------------------------------------------------------------- /examples/linear_model/README.txt: -------------------------------------------------------------------------------- 1 | .. _linear_examples: 2 | 3 | Generalized Linear Models 4 | ------------------------- 5 | 6 | Examples concerning the :mod:`sklearn.linear_model` module. 7 | -------------------------------------------------------------------------------- /examples/linear_model/plot_lasso_lars.py: -------------------------------------------------------------------------------- 1 | """ 2 | ===================== 3 | Lasso path using LARS 4 | ===================== 5 | 6 | Computes Lasso Path along the regularization parameter using the LARS 7 | algorithm on the diabetes dataset. Each color represents a different 8 | feature of the coefficient vector, and this is displayed as a function 9 | of the regularization parameter. 10 | 11 | """ 12 | 13 | # Author: Fabian Pedregosa 14 | # Alexandre Gramfort 15 | # License: BSD 3 clause 16 | 17 | import numpy as np 18 | import matplotlib.pyplot as plt 19 | 20 | from sklearn import linear_model 21 | from sklearn import datasets 22 | 23 | X, y = datasets.load_diabetes(return_X_y=True) 24 | 25 | print("Computing regularization path using the LARS ...") 26 | _, _, coefs = linear_model.lars_path(X, y, method="lasso", verbose=True) 27 | 28 | xx = np.sum(np.abs(coefs.T), axis=1) 29 | xx /= xx[-1] 30 | 31 | plt.plot(xx, coefs.T) 32 | ymin, ymax = plt.ylim() 33 | plt.vlines(xx, ymin, ymax, linestyle="dashed") 34 | plt.xlabel("|coef| / max|coef|") 35 | plt.ylabel("Coefficients") 36 | plt.title("LASSO Path") 37 | plt.axis("tight") 38 | plt.show() 39 | -------------------------------------------------------------------------------- /examples/manifold/README.txt: -------------------------------------------------------------------------------- 1 | .. _manifold_examples: 2 | 3 | Manifold learning 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.manifold` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/miscellaneous/README.txt: -------------------------------------------------------------------------------- 1 | .. _miscellaneous_examples: 2 | 3 | Miscellaneous 4 | ------------- 5 | 6 | Miscellaneous and introductory examples for scikit-learn. 7 | 8 | -------------------------------------------------------------------------------- /examples/miscellaneous/plot_changed_only_pprint_parameter.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================= 3 | Compact estimator representations 4 | ================================= 5 | 6 | This example illustrates the use of the print_changed_only global parameter. 7 | 8 | Setting print_changed_only to True will alternate the representation of 9 | estimators to only show the parameters that have been set to non-default 10 | values. This can be used to have more compact representations. 11 | 12 | """ 13 | 14 | from sklearn.linear_model import LogisticRegression 15 | from sklearn import set_config 16 | 17 | 18 | lr = LogisticRegression(penalty="l1") 19 | print("Default representation:") 20 | print(lr) 21 | # LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, 22 | # intercept_scaling=1, l1_ratio=None, max_iter=100, 23 | # multi_class='auto', n_jobs=None, penalty='l1', 24 | # random_state=None, solver='warn', tol=0.0001, verbose=0, 25 | # warm_start=False) 26 | 27 | set_config(print_changed_only=True) 28 | print("\nWith changed_only option:") 29 | print(lr) 30 | # LogisticRegression(penalty='l1') 31 | -------------------------------------------------------------------------------- /examples/mixture/README.txt: -------------------------------------------------------------------------------- 1 | .. _mixture_examples: 2 | 3 | Gaussian Mixture Models 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.mixture` module. 7 | -------------------------------------------------------------------------------- /examples/model_selection/README.txt: -------------------------------------------------------------------------------- 1 | .. _model_selection_examples: 2 | 3 | Model Selection 4 | ----------------------- 5 | 6 | Examples related to the :mod:`sklearn.model_selection` module. 7 | -------------------------------------------------------------------------------- /examples/model_selection/plot_cv_predict.py: -------------------------------------------------------------------------------- 1 | """ 2 | ==================================== 3 | Plotting Cross-Validated Predictions 4 | ==================================== 5 | 6 | This example shows how to use 7 | :func:`~sklearn.model_selection.cross_val_predict` to visualize prediction 8 | errors. 9 | 10 | """ 11 | 12 | from sklearn import datasets 13 | from sklearn.model_selection import cross_val_predict 14 | from sklearn import linear_model 15 | import matplotlib.pyplot as plt 16 | 17 | lr = linear_model.LinearRegression() 18 | X, y = datasets.load_diabetes(return_X_y=True) 19 | 20 | # cross_val_predict returns an array of the same size as `y` where each entry 21 | # is a prediction obtained by cross validation: 22 | predicted = cross_val_predict(lr, X, y, cv=10) 23 | 24 | fig, ax = plt.subplots() 25 | ax.scatter(y, predicted, edgecolors=(0, 0, 0)) 26 | ax.plot([y.min(), y.max()], [y.min(), y.max()], "k--", lw=4) 27 | ax.set_xlabel("Measured") 28 | ax.set_ylabel("Predicted") 29 | plt.show() 30 | -------------------------------------------------------------------------------- /examples/multioutput/README.txt: -------------------------------------------------------------------------------- 1 | .. _multioutput_examples: 2 | 3 | Multioutput methods 4 | ------------------- 5 | 6 | Examples concerning the :mod:`sklearn.multioutput` module. 7 | -------------------------------------------------------------------------------- /examples/neighbors/README.txt: -------------------------------------------------------------------------------- 1 | .. _neighbors_examples: 2 | 3 | Nearest Neighbors 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.neighbors` module. 7 | -------------------------------------------------------------------------------- /examples/neural_networks/README.txt: -------------------------------------------------------------------------------- 1 | .. _neural_network_examples: 2 | 3 | Neural Networks 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.neural_network` module. 7 | -------------------------------------------------------------------------------- /examples/preprocessing/README.txt: -------------------------------------------------------------------------------- 1 | .. _preprocessing_examples: 2 | 3 | Preprocessing 4 | ------------- 5 | 6 | Examples concerning the :mod:`sklearn.preprocessing` module. 7 | -------------------------------------------------------------------------------- /examples/release_highlights/README.txt: -------------------------------------------------------------------------------- 1 | .. _release_highlights_examples: 2 | 3 | Release Highlights 4 | ------------------ 5 | 6 | These examples illustrate the main features of the releases of scikit-learn. 7 | -------------------------------------------------------------------------------- /examples/semi_supervised/README.txt: -------------------------------------------------------------------------------- 1 | .. _semi_supervised_examples: 2 | 3 | Semi Supervised Classification 4 | ------------------------------ 5 | 6 | Examples concerning the :mod:`sklearn.semi_supervised` module. 7 | -------------------------------------------------------------------------------- /examples/svm/README.txt: -------------------------------------------------------------------------------- 1 | .. _svm_examples: 2 | 3 | Support Vector Machines 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.svm` module. 7 | -------------------------------------------------------------------------------- /examples/svm/plot_svm_nonlinear.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============== 3 | Non-linear SVM 4 | ============== 5 | 6 | Perform binary classification using non-linear SVC 7 | with RBF kernel. The target to predict is a XOR of the 8 | inputs. 9 | 10 | The color map illustrates the decision function learned by the SVC. 11 | 12 | """ 13 | 14 | import numpy as np 15 | import matplotlib.pyplot as plt 16 | from sklearn import svm 17 | 18 | xx, yy = np.meshgrid(np.linspace(-3, 3, 500), np.linspace(-3, 3, 500)) 19 | np.random.seed(0) 20 | X = np.random.randn(300, 2) 21 | Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) 22 | 23 | # fit the model 24 | clf = svm.NuSVC(gamma="auto") 25 | clf.fit(X, Y) 26 | 27 | # plot the decision function for each datapoint on the grid 28 | Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) 29 | Z = Z.reshape(xx.shape) 30 | 31 | plt.imshow( 32 | Z, 33 | interpolation="nearest", 34 | extent=(xx.min(), xx.max(), yy.min(), yy.max()), 35 | aspect="auto", 36 | origin="lower", 37 | cmap=plt.cm.PuOr_r, 38 | ) 39 | contours = plt.contour(xx, yy, Z, levels=[0], linewidths=2, linestyles="dashed") 40 | plt.scatter(X[:, 0], X[:, 1], s=30, c=Y, cmap=plt.cm.Paired, edgecolors="k") 41 | plt.xticks(()) 42 | plt.yticks(()) 43 | plt.axis([-3, 3, -3, 3]) 44 | plt.show() 45 | -------------------------------------------------------------------------------- /examples/text/README.txt: -------------------------------------------------------------------------------- 1 | .. _text_examples: 2 | 3 | Working with text documents 4 | ---------------------------- 5 | 6 | Examples concerning the :mod:`sklearn.feature_extraction.text` module. 7 | -------------------------------------------------------------------------------- /examples/tree/README.txt: -------------------------------------------------------------------------------- 1 | .. _tree_examples: 2 | 3 | Decision Trees 4 | -------------- 5 | 6 | Examples concerning the :mod:`sklearn.tree` module. 7 | -------------------------------------------------------------------------------- /lgtm.yml: -------------------------------------------------------------------------------- 1 | extraction: 2 | cpp: 3 | before_index: 4 | - pip3 install numpy==1.16.3 5 | - pip3 install --no-deps scipy Cython 6 | index: 7 | build_command: 8 | - python3 setup.py build_ext -i 9 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | # Minimum requirements for the build system to execute. 3 | requires = [ 4 | "setuptools<60.0", 5 | "wheel", 6 | "Cython>=0.28.5", 7 | 8 | # use oldest-supported-numpy which provides the oldest numpy version with 9 | # wheels on PyPI 10 | # 11 | # see: https://github.com/scipy/oldest-supported-numpy/blob/main/setup.cfg 12 | "oldest-supported-numpy", 13 | 14 | "scipy>=1.3.2", 15 | ] 16 | 17 | [tool.black] 18 | line-length = 88 19 | target_version = ['py38', 'py39', 'py310'] 20 | preview = true 21 | exclude = ''' 22 | /( 23 | \.eggs # exclude a few common directories in the 24 | | \.git # root of the project 25 | | \.mypy_cache 26 | | \.vscode 27 | | build 28 | | dist 29 | | doc/tutorial 30 | | doc/_build 31 | | doc/auto_examples 32 | | sklearn/externals 33 | | asv_benchmarks/env 34 | )/ 35 | ''' 36 | -------------------------------------------------------------------------------- /sklearn/__check_build/_check_build.pyx: -------------------------------------------------------------------------------- 1 | def check_build(): 2 | return 3 | -------------------------------------------------------------------------------- /sklearn/__check_build/setup.py: -------------------------------------------------------------------------------- 1 | # Author: Virgile Fritsch 2 | # License: BSD 3 clause 3 | 4 | import numpy 5 | 6 | 7 | def configuration(parent_package="", top_path=None): 8 | from numpy.distutils.misc_util import Configuration 9 | 10 | config = Configuration("__check_build", parent_package, top_path) 11 | config.add_extension( 12 | "_check_build", sources=["_check_build.pyx"], include_dirs=[numpy.get_include()] 13 | ) 14 | 15 | return config 16 | 17 | 18 | if __name__ == "__main__": 19 | from numpy.distutils.core import setup 20 | 21 | setup(**configuration(top_path="").todict()) 22 | -------------------------------------------------------------------------------- /sklearn/_distributor_init.py: -------------------------------------------------------------------------------- 1 | """ Distributor init file 2 | 3 | Distributors: you can add custom code here to support particular distributions 4 | of scikit-learn. 5 | 6 | For example, this is a good place to put any checks for hardware requirements. 7 | 8 | The scikit-learn standard source distribution will not put code in this file, 9 | so you can safely replace this file with your own version. 10 | """ 11 | -------------------------------------------------------------------------------- /sklearn/_loss/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn._loss` module includes loss function classes suitable for 3 | fitting classification and regression tasks. 4 | """ 5 | 6 | from .loss import ( 7 | HalfSquaredError, 8 | AbsoluteError, 9 | PinballLoss, 10 | HalfPoissonLoss, 11 | HalfGammaLoss, 12 | HalfTweedieLoss, 13 | HalfBinomialLoss, 14 | HalfMultinomialLoss, 15 | ) 16 | 17 | 18 | __all__ = [ 19 | "HalfSquaredError", 20 | "AbsoluteError", 21 | "PinballLoss", 22 | "HalfPoissonLoss", 23 | "HalfGammaLoss", 24 | "HalfTweedieLoss", 25 | "HalfBinomialLoss", 26 | "HalfMultinomialLoss", 27 | ] 28 | -------------------------------------------------------------------------------- /sklearn/_loss/setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numpy.distutils.misc_util import Configuration 3 | from sklearn._build_utils import gen_from_templates 4 | 5 | 6 | def configuration(parent_package="", top_path=None): 7 | config = Configuration("_loss", parent_package, top_path) 8 | 9 | # generate _loss.pyx from template 10 | templates = ["sklearn/_loss/_loss.pyx.tp"] 11 | gen_from_templates(templates) 12 | 13 | config.add_extension( 14 | "_loss", 15 | sources=["_loss.pyx"], 16 | include_dirs=[numpy.get_include()], 17 | # define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], 18 | ) 19 | return config 20 | 21 | 22 | if __name__ == "__main__": 23 | from numpy.distutils.core import setup 24 | 25 | setup(**configuration().todict()) 26 | -------------------------------------------------------------------------------- /sklearn/_loss/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/_loss/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/cluster/_k_means_common.pxd: -------------------------------------------------------------------------------- 1 | from cython cimport floating 2 | cimport numpy as np 3 | 4 | 5 | cdef floating _euclidean_dense_dense(floating*, floating*, int, bint) nogil 6 | 7 | cdef floating _euclidean_sparse_dense(floating[::1], int[::1], floating[::1], 8 | floating, bint) nogil 9 | 10 | cpdef void _relocate_empty_clusters_dense( 11 | floating[:, ::1], floating[::1], floating[:, ::1], 12 | floating[:, ::1], floating[::1], int[::1]) 13 | 14 | cpdef void _relocate_empty_clusters_sparse( 15 | floating[::1], int[::1], int[::1], floating[::1], floating[:, ::1], 16 | floating[:, ::1], floating[::1], int[::1]) 17 | 18 | cdef void _average_centers(floating[:, ::1], floating[::1]) 19 | 20 | cdef void _center_shift(floating[:, ::1], floating[:, ::1], floating[::1]) 21 | -------------------------------------------------------------------------------- /sklearn/cluster/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/cluster/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/cluster/tests/common.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common utilities for testing clustering. 3 | 4 | """ 5 | 6 | import numpy as np 7 | 8 | 9 | ############################################################################### 10 | # Generate sample data 11 | 12 | 13 | def generate_clustered_data( 14 | seed=0, n_clusters=3, n_features=2, n_samples_per_cluster=20, std=0.4 15 | ): 16 | prng = np.random.RandomState(seed) 17 | 18 | # the data is voluntary shifted away from zero to check clustering 19 | # algorithm robustness with regards to non centered data 20 | means = ( 21 | np.array( 22 | [ 23 | [1, 1, 1, 0], 24 | [-1, -1, 0, 1], 25 | [1, -1, 1, 1], 26 | [-1, 1, 1, 0], 27 | ] 28 | ) 29 | + 10 30 | ) 31 | 32 | X = np.empty((0, n_features)) 33 | for i in range(n_clusters): 34 | X = np.r_[ 35 | X, 36 | means[i][:n_features] + std * prng.randn(n_samples_per_cluster, n_features), 37 | ] 38 | return X 39 | -------------------------------------------------------------------------------- /sklearn/compose/__init__.py: -------------------------------------------------------------------------------- 1 | """Meta-estimators for building composite models with transformers 2 | 3 | In addition to its current contents, this module will eventually be home to 4 | refurbished versions of Pipeline and FeatureUnion. 5 | 6 | """ 7 | 8 | from ._column_transformer import ( 9 | ColumnTransformer, 10 | make_column_transformer, 11 | make_column_selector, 12 | ) 13 | from ._target import TransformedTargetRegressor 14 | 15 | 16 | __all__ = [ 17 | "ColumnTransformer", 18 | "make_column_transformer", 19 | "TransformedTargetRegressor", 20 | "make_column_selector", 21 | ] 22 | -------------------------------------------------------------------------------- /sklearn/compose/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/compose/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/covariance/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/covariance/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/cross_decomposition/__init__.py: -------------------------------------------------------------------------------- 1 | from ._pls import PLSCanonical, PLSRegression, PLSSVD, CCA 2 | 3 | __all__ = ["PLSCanonical", "PLSRegression", "PLSSVD", "CCA"] 4 | -------------------------------------------------------------------------------- /sklearn/cross_decomposition/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/cross_decomposition/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/data/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/data/diabetes_data_raw.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/data/diabetes_data_raw.csv.gz -------------------------------------------------------------------------------- /sklearn/datasets/data/diabetes_target.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/data/diabetes_target.csv.gz -------------------------------------------------------------------------------- /sklearn/datasets/data/digits.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/data/digits.csv.gz -------------------------------------------------------------------------------- /sklearn/datasets/data/linnerud_exercise.csv: -------------------------------------------------------------------------------- 1 | Chins Situps Jumps 2 | 5 162 60 3 | 2 110 60 4 | 12 101 101 5 | 12 105 37 6 | 13 155 58 7 | 4 101 42 8 | 8 101 38 9 | 6 125 40 10 | 15 200 40 11 | 17 251 250 12 | 17 120 38 13 | 13 210 115 14 | 14 215 105 15 | 1 50 50 16 | 6 70 31 17 | 12 210 120 18 | 4 60 25 19 | 11 230 80 20 | 15 225 73 21 | 2 110 43 22 | -------------------------------------------------------------------------------- /sklearn/datasets/data/linnerud_physiological.csv: -------------------------------------------------------------------------------- 1 | Weight Waist Pulse 2 | 191 36 50 3 | 189 37 52 4 | 193 38 58 5 | 162 35 62 6 | 189 35 46 7 | 182 36 56 8 | 211 38 56 9 | 167 34 60 10 | 176 31 74 11 | 154 33 56 12 | 169 34 50 13 | 166 33 52 14 | 154 34 64 15 | 247 46 50 16 | 193 36 46 17 | 202 37 62 18 | 176 37 54 19 | 157 32 52 20 | 156 33 54 21 | 138 33 68 22 | -------------------------------------------------------------------------------- /sklearn/datasets/descr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/descr/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/descr/linnerud.rst: -------------------------------------------------------------------------------- 1 | .. _linnerrud_dataset: 2 | 3 | Linnerrud dataset 4 | ----------------- 5 | 6 | **Data Set Characteristics:** 7 | 8 | :Number of Instances: 20 9 | :Number of Attributes: 3 10 | :Missing Attribute Values: None 11 | 12 | The Linnerud dataset is a multi-output regression dataset. It consists of three 13 | exercise (data) and three physiological (target) variables collected from 14 | twenty middle-aged men in a fitness club: 15 | 16 | - *physiological* - CSV containing 20 observations on 3 physiological variables: 17 | Weight, Waist and Pulse. 18 | - *exercise* - CSV containing 20 observations on 3 exercise variables: 19 | Chins, Situps and Jumps. 20 | 21 | .. topic:: References 22 | 23 | * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris: 24 | Editions Technic. 25 | -------------------------------------------------------------------------------- /sklearn/datasets/images/README.txt: -------------------------------------------------------------------------------- 1 | Image: china.jpg 2 | Released under a creative commons license. [1] 3 | Attribution: Some rights reserved by danielbuechele [2] 4 | Retrieved 21st August, 2011 from [3] by Robert Layton 5 | 6 | [1] https://creativecommons.org/licenses/by/2.0/ 7 | [2] https://www.flickr.com/photos/danielbuechele/ 8 | [3] https://www.flickr.com/photos/danielbuechele/6061409035/sizes/z/in/photostream/ 9 | 10 | 11 | Image: flower.jpg 12 | Released under a creative commons license. [1] 13 | Attribution: Some rights reserved by danielbuechele [2] 14 | Retrieved 21st August, 2011 from [3] by Robert Layton 15 | 16 | [1] https://creativecommons.org/licenses/by/2.0/ 17 | [2] https://www.flickr.com/photos/vultilion/ 18 | [3] https://www.flickr.com/photos/vultilion/6056698931/sizes/z/in/photostream/ 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /sklearn/datasets/images/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/images/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/images/china.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/images/china.jpg -------------------------------------------------------------------------------- /sklearn/datasets/images/flower.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/images/flower.jpg -------------------------------------------------------------------------------- /sklearn/datasets/setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import os 3 | import platform 4 | 5 | 6 | def configuration(parent_package="", top_path=None): 7 | from numpy.distutils.misc_util import Configuration 8 | 9 | config = Configuration("datasets", parent_package, top_path) 10 | config.add_data_dir("data") 11 | config.add_data_dir("descr") 12 | config.add_data_dir("images") 13 | config.add_data_dir(os.path.join("tests", "data")) 14 | if platform.python_implementation() != "PyPy": 15 | config.add_extension( 16 | "_svmlight_format_fast", 17 | sources=["_svmlight_format_fast.pyx"], 18 | include_dirs=[numpy.get_include()], 19 | ) 20 | config.add_subpackage("tests") 21 | return config 22 | 23 | 24 | if __name__ == "__main__": 25 | from numpy.distutils.core import setup 26 | 27 | setup(**configuration(top_path="").todict()) 28 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/conftest.py: -------------------------------------------------------------------------------- 1 | """ Network tests are only run, if data is already locally available, 2 | or if download is specifically requested by environment variable.""" 3 | import builtins 4 | import pytest 5 | 6 | 7 | @pytest.fixture 8 | def hide_available_pandas(monkeypatch): 9 | """Pretend pandas was not installed.""" 10 | import_orig = builtins.__import__ 11 | 12 | def mocked_import(name, *args, **kwargs): 13 | if name == "pandas": 14 | raise ImportError() 15 | return import_orig(name, *args, **kwargs) 16 | 17 | monkeypatch.setattr(builtins, "__import__", mocked_import) 18 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1/api-v1-jd-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1/api-v1-jd-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1/api-v1-jdf-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1/api-v1-jdf-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1/api-v1-jdq-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1/api-v1-jdq-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1/data-v1-dl-1.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1/data-v1-dl-1.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1119/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1119/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1119/api-v1-jd-1119.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1119/api-v1-jd-1119.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1119/api-v1-jdf-1119.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdf-1119.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-dv-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-dv-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdl-dn-adult-census-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1119/api-v1-jdq-1119.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1119/api-v1-jdq-1119.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_1119/data-v1-dl-54002.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_1119/data-v1-dl-54002.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_2/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_2/api-v1-jd-2.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_2/api-v1-jd-2.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_2/api-v1-jdf-2.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_2/api-v1-jdf-2.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-dv-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-dv-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_2/api-v1-jdl-dn-anneal-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_2/api-v1-jdq-2.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_2/api-v1-jdq-2.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_2/data-v1-dl-1666876.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_2/data-v1-dl-1666876.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/api-v1-jd-292.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/api-v1-jd-292.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/api-v1-jd-40981.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/api-v1-jd-40981.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-292.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-292.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-40981.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/api-v1-jdf-40981.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1-s-dact.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1-s-dact.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-dv-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/api-v1-jdl-dn-australian-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_292/data-v1-dl-49822.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_292/data-v1-dl-49822.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_3/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_3/api-v1-jd-3.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_3/api-v1-jd-3.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_3/api-v1-jdf-3.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_3/api-v1-jdf-3.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_3/api-v1-jdq-3.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_3/api-v1-jdq-3.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_3/data-v1-dl-3.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_3/data-v1-dl-3.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40589/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40589/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40589/api-v1-jd-40589.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40589/api-v1-jd-40589.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40589/api-v1-jdf-40589.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdf-40589.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-dv-3.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-dv-3.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdl-dn-emotions-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40589/api-v1-jdq-40589.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40589/api-v1-jdq-40589.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40589/data-v1-dl-4644182.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40589/data-v1-dl-4644182.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/api-v1-jd-40675.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/api-v1-jd-40675.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/api-v1-jdf-40675.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdf-40675.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1-s-dact.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1-s-dact.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-dv-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdl-dn-glass2-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/api-v1-jdq-40675.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/api-v1-jdq-40675.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40675/data-v1-dl-4965250.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40675/data-v1-dl-4965250.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40945/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40945/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40945/api-v1-jd-40945.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40945/api-v1-jd-40945.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40945/api-v1-jdf-40945.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40945/api-v1-jdf-40945.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40945/api-v1-jdq-40945.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40945/api-v1-jdq-40945.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40945/data-v1-dl-16826755.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40945/data-v1-dl-16826755.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40966/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40966/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40966/api-v1-jd-40966.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40966/api-v1-jd-40966.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40966/api-v1-jdf-40966.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdf-40966.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-dv-4.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-dv-4.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdl-dn-miceprotein-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40966/api-v1-jdq-40966.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40966/api-v1-jdq-40966.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_40966/data-v1-dl-17928620.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_40966/data-v1-dl-17928620.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_42585/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_42585/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_42585/api-v1-jd-42585.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_42585/api-v1-jd-42585.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_42585/api-v1-jdf-42585.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_42585/api-v1-jdf-42585.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_42585/api-v1-jdq-42585.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_42585/api-v1-jdq-42585.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_42585/data-v1-dl-21854866.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_42585/data-v1-dl-21854866.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_561/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_561/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_561/api-v1-jd-561.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_561/api-v1-jd-561.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_561/api-v1-jdf-561.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_561/api-v1-jdf-561.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-dv-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-dv-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_561/api-v1-jdl-dn-cpu-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_561/api-v1-jdq-561.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_561/api-v1-jdq-561.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_561/data-v1-dl-52739.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_561/data-v1-dl-52739.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_61/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_61/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_61/api-v1-jd-61.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_61/api-v1-jd-61.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_61/api-v1-jdf-61.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_61/api-v1-jdf-61.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-dv-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-dv-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-s-act-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_61/api-v1-jdl-dn-iris-l-2-s-act-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_61/api-v1-jdq-61.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_61/api-v1-jdq-61.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_61/data-v1-dl-61.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_61/data-v1-dl-61.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_62/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_62/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_62/api-v1-jd-62.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_62/api-v1-jd-62.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_62/api-v1-jdf-62.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_62/api-v1-jdf-62.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_62/api-v1-jdq-62.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_62/api-v1-jdq-62.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/id_62/data-v1-dl-52352.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/datasets/tests/data/openml/id_62/data-v1-dl-52352.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_classification.txt: -------------------------------------------------------------------------------- 1 | # comment 2 | # note: the next line contains a tab 3 | 1.0 3:2.5 11:-5.2 16:1.5 # and an inline comment 4 | 2.0 6:1.0 13:-3 5 | # another comment 6 | 3.0 21:27 7 | 4.0 2:1.234567890123456e10 # double precision value 8 | 1.0 # empty line, all zeros 9 | 2.0 3:0 # explicit zeros 10 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_invalid.txt: -------------------------------------------------------------------------------- 1 | python 2:2.5 10:-5.2 15:1.5 2 | 2.0 5:1.0 12:-3 3 | 3.0 20:27 4 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_invalid_order.txt: -------------------------------------------------------------------------------- 1 | -1 5:2.5 2:-5.2 15:1.5 2 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_multilabel.txt: -------------------------------------------------------------------------------- 1 | # multilabel dataset in SVMlight format 2 | 1,0 2:2.5 10:-5.2 15:1.5 3 | 2 5:1.0 12:-3 4 | 2:3.5 11:26 5 | 1,2 20:27 6 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/test_olivetti_faces.py: -------------------------------------------------------------------------------- 1 | """Test Olivetti faces fetcher, if the data is available, 2 | or if specifically requested via environment variable 3 | (e.g. for travis cron job).""" 4 | 5 | import numpy as np 6 | 7 | from sklearn.utils import Bunch 8 | from sklearn.datasets.tests.test_common import check_return_X_y 9 | 10 | from sklearn.utils._testing import assert_array_equal 11 | 12 | 13 | def test_olivetti_faces(fetch_olivetti_faces_fxt): 14 | data = fetch_olivetti_faces_fxt(shuffle=True, random_state=0) 15 | 16 | assert isinstance(data, Bunch) 17 | for expected_keys in ("data", "images", "target", "DESCR"): 18 | assert expected_keys in data.keys() 19 | 20 | assert data.data.shape == (400, 4096) 21 | assert data.images.shape == (400, 64, 64) 22 | assert data.target.shape == (400,) 23 | assert_array_equal(np.unique(np.sort(data.target)), np.arange(40)) 24 | assert data.DESCR.startswith(".. _olivetti_faces_dataset:") 25 | 26 | # test the return_X_y option 27 | check_return_X_y(data, fetch_olivetti_faces_fxt) 28 | -------------------------------------------------------------------------------- /sklearn/decomposition/_cdnmf_fast.pyx: -------------------------------------------------------------------------------- 1 | # Author: Mathieu Blondel, Tom Dupre la Tour 2 | # License: BSD 3 clause 3 | 4 | from cython cimport floating 5 | from libc.math cimport fabs 6 | 7 | 8 | def _update_cdnmf_fast(floating[:, ::1] W, floating[:, :] HHt, 9 | floating[:, :] XHt, Py_ssize_t[::1] permutation): 10 | cdef: 11 | floating violation = 0 12 | Py_ssize_t n_components = W.shape[1] 13 | Py_ssize_t n_samples = W.shape[0] # n_features for H update 14 | floating grad, pg, hess 15 | Py_ssize_t i, r, s, t 16 | 17 | with nogil: 18 | for s in range(n_components): 19 | t = permutation[s] 20 | 21 | for i in range(n_samples): 22 | # gradient = GW[t, i] where GW = np.dot(W, HHt) - XHt 23 | grad = -XHt[i, t] 24 | 25 | for r in range(n_components): 26 | grad += HHt[t, r] * W[i, r] 27 | 28 | # projected gradient 29 | pg = min(0., grad) if W[i, t] == 0 else grad 30 | violation += fabs(pg) 31 | 32 | # Hessian 33 | hess = HHt[t, t] 34 | 35 | if hess != 0: 36 | W[i, t] = max(W[i, t] - grad / hess, 0.) 37 | 38 | return violation 39 | -------------------------------------------------------------------------------- /sklearn/decomposition/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy 3 | from numpy.distutils.misc_util import Configuration 4 | 5 | 6 | def configuration(parent_package="", top_path=None): 7 | config = Configuration("decomposition", parent_package, top_path) 8 | 9 | libraries = [] 10 | if os.name == "posix": 11 | libraries.append("m") 12 | 13 | config.add_extension( 14 | "_online_lda_fast", 15 | sources=["_online_lda_fast.pyx"], 16 | include_dirs=[numpy.get_include()], 17 | libraries=libraries, 18 | ) 19 | 20 | config.add_extension( 21 | "_cdnmf_fast", 22 | sources=["_cdnmf_fast.pyx"], 23 | include_dirs=[numpy.get_include()], 24 | libraries=libraries, 25 | ) 26 | 27 | config.add_subpackage("tests") 28 | 29 | return config 30 | 31 | 32 | if __name__ == "__main__": 33 | from numpy.distutils.core import setup 34 | 35 | setup(**configuration().todict()) 36 | -------------------------------------------------------------------------------- /sklearn/decomposition/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/decomposition/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/ensemble/_hist_gradient_boosting/__init__.py: -------------------------------------------------------------------------------- 1 | """This module implements histogram-based gradient boosting estimators. 2 | 3 | The implementation is a port from pygbm which is itself strongly inspired 4 | from LightGBM. 5 | """ 6 | -------------------------------------------------------------------------------- /sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd: -------------------------------------------------------------------------------- 1 | from .common cimport X_BINNED_DTYPE_C 2 | from .common cimport BITSET_DTYPE_C 3 | from .common cimport BITSET_INNER_DTYPE_C 4 | from .common cimport X_DTYPE_C 5 | 6 | cdef void init_bitset(BITSET_DTYPE_C bitset) nogil 7 | 8 | cdef void set_bitset(BITSET_DTYPE_C bitset, X_BINNED_DTYPE_C val) nogil 9 | 10 | cdef unsigned char in_bitset(BITSET_DTYPE_C bitset, X_BINNED_DTYPE_C val) nogil 11 | 12 | cpdef unsigned char in_bitset_memoryview(const BITSET_INNER_DTYPE_C[:] bitset, 13 | X_BINNED_DTYPE_C val) nogil 14 | 15 | cdef unsigned char in_bitset_2d_memoryview( 16 | const BITSET_INNER_DTYPE_C [:, :] bitset, 17 | X_BINNED_DTYPE_C val, 18 | unsigned int row) nogil 19 | -------------------------------------------------------------------------------- /sklearn/ensemble/_hist_gradient_boosting/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/ensemble/_hist_gradient_boosting/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/ensemble/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/ensemble/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.experimental` module provides importable modules that enable 3 | the use of experimental features or estimators. 4 | 5 | The features and estimators that are experimental aren't subject to 6 | deprecation cycles. Use them at your own risks! 7 | """ 8 | -------------------------------------------------------------------------------- /sklearn/experimental/enable_hist_gradient_boosting.py: -------------------------------------------------------------------------------- 1 | """This is now a no-op and can be safely removed from your code. 2 | 3 | It used to enable the use of 4 | :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and 5 | :class:`~sklearn.ensemble.HistGradientBoostingRegressor` when they were still 6 | :term:`experimental`, but these estimators are now stable and can be imported 7 | normally from `sklearn.ensemble`. 8 | """ 9 | # Don't remove this file, we don't want to break users code just because the 10 | # feature isn't experimental anymore. 11 | 12 | 13 | import warnings 14 | 15 | 16 | warnings.warn( 17 | "Since version 1.0, " 18 | "it is not needed to import enable_hist_gradient_boosting anymore. " 19 | "HistGradientBoostingClassifier and HistGradientBoostingRegressor are now " 20 | "stable and can be normally imported from sklearn.ensemble." 21 | ) 22 | -------------------------------------------------------------------------------- /sklearn/experimental/enable_iterative_imputer.py: -------------------------------------------------------------------------------- 1 | """Enables IterativeImputer 2 | 3 | The API and results of this estimator might change without any deprecation 4 | cycle. 5 | 6 | Importing this file dynamically sets :class:`~sklearn.impute.IterativeImputer` 7 | as an attribute of the impute module:: 8 | 9 | >>> # explicitly require this experimental feature 10 | >>> from sklearn.experimental import enable_iterative_imputer # noqa 11 | >>> # now you can import normally from impute 12 | >>> from sklearn.impute import IterativeImputer 13 | """ 14 | 15 | from ..impute._iterative import IterativeImputer 16 | from .. import impute 17 | 18 | # use settattr to avoid mypy errors when monkeypatching 19 | setattr(impute, "IterativeImputer", IterativeImputer) 20 | impute.__all__ += ["IterativeImputer"] 21 | -------------------------------------------------------------------------------- /sklearn/experimental/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/experimental/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/experimental/tests/test_enable_hist_gradient_boosting.py: -------------------------------------------------------------------------------- 1 | """Tests for making sure experimental imports work as expected.""" 2 | 3 | import textwrap 4 | 5 | from sklearn.utils._testing import assert_run_python_script 6 | 7 | 8 | def test_import_raises_warning(): 9 | code = """ 10 | import pytest 11 | with pytest.warns(UserWarning, match="it is not needed to import"): 12 | from sklearn.experimental import enable_hist_gradient_boosting # noqa 13 | """ 14 | assert_run_python_script(textwrap.dedent(code)) 15 | -------------------------------------------------------------------------------- /sklearn/externals/README: -------------------------------------------------------------------------------- 1 | This directory contains bundled external dependencies that are updated 2 | every once in a while. 3 | 4 | Note for distribution packagers: if you want to remove the duplicated 5 | code and depend on a packaged version, we suggest that you simply do a 6 | symbolic link in this directory. 7 | 8 | -------------------------------------------------------------------------------- /sklearn/externals/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | External, bundled dependencies. 4 | 5 | """ 6 | -------------------------------------------------------------------------------- /sklearn/externals/_packaging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/externals/_packaging/__init__.py -------------------------------------------------------------------------------- /sklearn/externals/conftest.py: -------------------------------------------------------------------------------- 1 | # Do not collect any tests in externals. This is more robust than using 2 | # --ignore because --ignore needs a path and it is not convenient to pass in 3 | # the externals path (very long install-dependent path in site-packages) when 4 | # using --pyargs 5 | def pytest_ignore_collect(path, config): 6 | return True 7 | 8 | -------------------------------------------------------------------------------- /sklearn/feature_extraction/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.feature_extraction` module deals with feature extraction 3 | from raw data. It currently includes methods to extract features from text and 4 | images. 5 | """ 6 | 7 | from ._dict_vectorizer import DictVectorizer 8 | from ._hash import FeatureHasher 9 | from .image import img_to_graph, grid_to_graph 10 | from . import text 11 | 12 | __all__ = [ 13 | "DictVectorizer", 14 | "image", 15 | "img_to_graph", 16 | "grid_to_graph", 17 | "text", 18 | "FeatureHasher", 19 | ] 20 | -------------------------------------------------------------------------------- /sklearn/feature_extraction/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | 4 | 5 | def configuration(parent_package="", top_path=None): 6 | import numpy 7 | from numpy.distutils.misc_util import Configuration 8 | 9 | config = Configuration("feature_extraction", parent_package, top_path) 10 | libraries = [] 11 | if os.name == "posix": 12 | libraries.append("m") 13 | 14 | if platform.python_implementation() != "PyPy": 15 | config.add_extension( 16 | "_hashing_fast", 17 | sources=["_hashing_fast.pyx"], 18 | include_dirs=[numpy.get_include()], 19 | libraries=libraries, 20 | ) 21 | config.add_subpackage("tests") 22 | 23 | return config 24 | -------------------------------------------------------------------------------- /sklearn/feature_extraction/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/feature_extraction/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/feature_selection/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/feature_selection/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/gaussian_process/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Author: Jan Hendrik Metzen 4 | # Vincent Dubourg 5 | # (mostly translation, see implementation details) 6 | # License: BSD 3 clause 7 | 8 | """ 9 | The :mod:`sklearn.gaussian_process` module implements Gaussian Process 10 | based regression and classification. 11 | """ 12 | 13 | from ._gpr import GaussianProcessRegressor 14 | from ._gpc import GaussianProcessClassifier 15 | from . import kernels 16 | 17 | 18 | __all__ = ["GaussianProcessRegressor", "GaussianProcessClassifier", "kernels"] 19 | -------------------------------------------------------------------------------- /sklearn/gaussian_process/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/gaussian_process/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/impute/__init__.py: -------------------------------------------------------------------------------- 1 | """Transformers for missing value imputation""" 2 | import typing 3 | 4 | from ._base import MissingIndicator, SimpleImputer 5 | from ._knn import KNNImputer 6 | 7 | if typing.TYPE_CHECKING: 8 | # Avoid errors in type checkers (e.g. mypy) for experimental estimators. 9 | # TODO: remove this check once the estimator is no longer experimental. 10 | from ._iterative import IterativeImputer # noqa 11 | 12 | __all__ = ["MissingIndicator", "SimpleImputer", "KNNImputer"] 13 | -------------------------------------------------------------------------------- /sklearn/impute/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/impute/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/inspection/__init__.py: -------------------------------------------------------------------------------- 1 | """The :mod:`sklearn.inspection` module includes tools for model inspection.""" 2 | 3 | 4 | from ._permutation_importance import permutation_importance 5 | 6 | from ._partial_dependence import partial_dependence 7 | from ._plot.partial_dependence import plot_partial_dependence 8 | from ._plot.partial_dependence import PartialDependenceDisplay 9 | 10 | 11 | __all__ = [ 12 | "partial_dependence", 13 | "plot_partial_dependence", 14 | "permutation_importance", 15 | "PartialDependenceDisplay", 16 | ] 17 | -------------------------------------------------------------------------------- /sklearn/inspection/_plot/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/inspection/_plot/__init__.py -------------------------------------------------------------------------------- /sklearn/inspection/_plot/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/inspection/_plot/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/inspection/setup.py: -------------------------------------------------------------------------------- 1 | from numpy.distutils.misc_util import Configuration 2 | 3 | 4 | def configuration(parent_package="", top_path=None): 5 | config = Configuration("inspection", parent_package, top_path) 6 | 7 | config.add_subpackage("_plot") 8 | config.add_subpackage("_plot.tests") 9 | 10 | config.add_subpackage("tests") 11 | 12 | return config 13 | 14 | 15 | if __name__ == "__main__": 16 | from numpy.distutils.core import setup 17 | 18 | setup(**configuration().todict()) 19 | -------------------------------------------------------------------------------- /sklearn/inspection/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/inspection/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/linear_model/_glm/__init__.py: -------------------------------------------------------------------------------- 1 | # License: BSD 3 clause 2 | 3 | from .glm import ( 4 | GeneralizedLinearRegressor, 5 | PoissonRegressor, 6 | GammaRegressor, 7 | TweedieRegressor, 8 | ) 9 | 10 | __all__ = [ 11 | "GeneralizedLinearRegressor", 12 | "PoissonRegressor", 13 | "GammaRegressor", 14 | "TweedieRegressor", 15 | ] 16 | -------------------------------------------------------------------------------- /sklearn/linear_model/_glm/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # License: BSD 3 clause 2 | -------------------------------------------------------------------------------- /sklearn/linear_model/_sgd_fast.pxd: -------------------------------------------------------------------------------- 1 | # License: BSD 3 clause 2 | """Helper to load LossFunction from sgd_fast.pyx to sag_fast.pyx""" 3 | 4 | cdef class LossFunction: 5 | cdef double loss(self, double p, double y) nogil 6 | cdef double dloss(self, double p, double y) nogil 7 | 8 | 9 | cdef class Regression(LossFunction): 10 | cdef double loss(self, double p, double y) nogil 11 | cdef double dloss(self, double p, double y) nogil 12 | 13 | 14 | cdef class Classification(LossFunction): 15 | cdef double loss(self, double p, double y) nogil 16 | cdef double dloss(self, double p, double y) nogil 17 | 18 | 19 | cdef class Log(Classification): 20 | cdef double loss(self, double p, double y) nogil 21 | cdef double dloss(self, double p, double y) nogil 22 | 23 | 24 | cdef class SquaredLoss(Regression): 25 | cdef double loss(self, double p, double y) nogil 26 | cdef double dloss(self, double p, double y) nogil 27 | -------------------------------------------------------------------------------- /sklearn/linear_model/_sgd_fast_helpers.h: -------------------------------------------------------------------------------- 1 | // We cannot directly reuse the npy_isfinite from npy_math.h as numpy 2 | // and scikit-learn are not necessarily built with the same compiler. 3 | // When re-declaring the functions in the template for cython 4 | // specific for each parameter input type, it needs to be 2 different functions 5 | // as cython doesn't support function overloading. 6 | #ifdef _MSC_VER 7 | # include 8 | # define skl_isfinite _finite 9 | # define skl_isfinite32 _finite 10 | # define skl_isfinite64 _finite 11 | #else 12 | # include 13 | # define skl_isfinite npy_isfinite 14 | # define skl_isfinite32 npy_isfinite 15 | # define skl_isfinite64 npy_isfinite 16 | #endif 17 | -------------------------------------------------------------------------------- /sklearn/linear_model/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/linear_model/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/manifold/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.manifold` module implements data embedding techniques. 3 | """ 4 | 5 | from ._locally_linear import locally_linear_embedding, LocallyLinearEmbedding 6 | from ._isomap import Isomap 7 | from ._mds import MDS, smacof 8 | from ._spectral_embedding import SpectralEmbedding, spectral_embedding 9 | from ._t_sne import TSNE, trustworthiness 10 | 11 | __all__ = [ 12 | "locally_linear_embedding", 13 | "LocallyLinearEmbedding", 14 | "Isomap", 15 | "MDS", 16 | "smacof", 17 | "SpectralEmbedding", 18 | "spectral_embedding", 19 | "TSNE", 20 | "trustworthiness", 21 | ] 22 | -------------------------------------------------------------------------------- /sklearn/manifold/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy 4 | 5 | 6 | def configuration(parent_package="", top_path=None): 7 | from numpy.distutils.misc_util import Configuration 8 | 9 | config = Configuration("manifold", parent_package, top_path) 10 | 11 | libraries = [] 12 | if os.name == "posix": 13 | libraries.append("m") 14 | 15 | config.add_extension( 16 | "_utils", 17 | sources=["_utils.pyx"], 18 | include_dirs=[numpy.get_include()], 19 | libraries=libraries, 20 | extra_compile_args=["-O3"], 21 | ) 22 | 23 | config.add_extension( 24 | "_barnes_hut_tsne", 25 | sources=["_barnes_hut_tsne.pyx"], 26 | include_dirs=[numpy.get_include()], 27 | libraries=libraries, 28 | extra_compile_args=["-O3"], 29 | ) 30 | 31 | config.add_subpackage("tests") 32 | 33 | return config 34 | 35 | 36 | if __name__ == "__main__": 37 | from numpy.distutils.core import setup 38 | 39 | setup(**configuration().todict()) 40 | -------------------------------------------------------------------------------- /sklearn/manifold/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/manifold/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/metrics/_plot/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/metrics/_plot/__init__.py -------------------------------------------------------------------------------- /sklearn/metrics/_plot/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/metrics/_plot/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/metrics/cluster/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy 4 | from numpy.distutils.misc_util import Configuration 5 | 6 | 7 | def configuration(parent_package="", top_path=None): 8 | config = Configuration("cluster", parent_package, top_path) 9 | libraries = [] 10 | if os.name == "posix": 11 | libraries.append("m") 12 | config.add_extension( 13 | "_expected_mutual_info_fast", 14 | sources=["_expected_mutual_info_fast.pyx"], 15 | include_dirs=[numpy.get_include()], 16 | libraries=libraries, 17 | ) 18 | 19 | config.add_subpackage("tests") 20 | 21 | return config 22 | 23 | 24 | if __name__ == "__main__": 25 | from numpy.distutils.core import setup 26 | 27 | setup(**configuration().todict()) 28 | -------------------------------------------------------------------------------- /sklearn/metrics/cluster/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/metrics/cluster/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/metrics/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/metrics/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/mixture/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.mixture` module implements mixture modeling algorithms. 3 | """ 4 | 5 | from ._gaussian_mixture import GaussianMixture 6 | from ._bayesian_mixture import BayesianGaussianMixture 7 | 8 | 9 | __all__ = ["GaussianMixture", "BayesianGaussianMixture"] 10 | -------------------------------------------------------------------------------- /sklearn/mixture/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/mixture/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/mixture/tests/test_mixture.py: -------------------------------------------------------------------------------- 1 | # Author: Guillaume Lemaitre 2 | # License: BSD 3 clause 3 | 4 | import pytest 5 | import numpy as np 6 | 7 | from sklearn.mixture import GaussianMixture 8 | from sklearn.mixture import BayesianGaussianMixture 9 | 10 | 11 | @pytest.mark.parametrize("estimator", [GaussianMixture(), BayesianGaussianMixture()]) 12 | def test_gaussian_mixture_n_iter(estimator): 13 | # check that n_iter is the number of iteration performed. 14 | rng = np.random.RandomState(0) 15 | X = rng.rand(10, 5) 16 | max_iter = 1 17 | estimator.set_params(max_iter=max_iter) 18 | estimator.fit(X) 19 | assert estimator.n_iter_ == max_iter 20 | 21 | 22 | @pytest.mark.parametrize("estimator", [GaussianMixture(), BayesianGaussianMixture()]) 23 | def test_mixture_n_components_greater_than_n_samples_error(estimator): 24 | """Check error when n_components <= n_samples""" 25 | rng = np.random.RandomState(0) 26 | X = rng.rand(10, 5) 27 | estimator.set_params(n_components=12) 28 | 29 | msg = "Expected n_samples >= n_components" 30 | with pytest.raises(ValueError, match=msg): 31 | estimator.fit(X) 32 | -------------------------------------------------------------------------------- /sklearn/model_selection/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/model_selection/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/model_selection/tests/common.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common utilities for testing model selection. 3 | """ 4 | 5 | import numpy as np 6 | 7 | from sklearn.model_selection import KFold 8 | 9 | 10 | class OneTimeSplitter: 11 | """A wrapper to make KFold single entry cv iterator""" 12 | 13 | def __init__(self, n_splits=4, n_samples=99): 14 | self.n_splits = n_splits 15 | self.n_samples = n_samples 16 | self.indices = iter(KFold(n_splits=n_splits).split(np.ones(n_samples))) 17 | 18 | def split(self, X=None, y=None, groups=None): 19 | """Split can be called only once""" 20 | for index in self.indices: 21 | yield index 22 | 23 | def get_n_splits(self, X=None, y=None, groups=None): 24 | return self.n_splits 25 | -------------------------------------------------------------------------------- /sklearn/neighbors/_distance_metric.py: -------------------------------------------------------------------------------- 1 | # TODO: Remove this file in 1.3 2 | import warnings 3 | 4 | from ..metrics import DistanceMetric as _DistanceMetric 5 | 6 | 7 | class DistanceMetric(_DistanceMetric): 8 | @classmethod 9 | def _warn(cls): 10 | warnings.warn( 11 | "sklearn.neighbors.DistanceMetric has been moved " 12 | "to sklearn.metrics.DistanceMetric in 1.0. " 13 | "This import path will be removed in 1.3", 14 | category=FutureWarning, 15 | ) 16 | 17 | @classmethod 18 | def get_metric(cls, metric, **kwargs): 19 | DistanceMetric._warn() 20 | return _DistanceMetric.get_metric(metric, **kwargs) 21 | -------------------------------------------------------------------------------- /sklearn/neighbors/_partition_nodes.pxd: -------------------------------------------------------------------------------- 1 | from ..utils._typedefs cimport DTYPE_t, ITYPE_t 2 | 3 | cdef int partition_node_indices( 4 | DTYPE_t *data, 5 | ITYPE_t *node_indices, 6 | ITYPE_t split_dim, 7 | ITYPE_t split_index, 8 | ITYPE_t n_features, 9 | ITYPE_t n_points) except -1 10 | -------------------------------------------------------------------------------- /sklearn/neighbors/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def configuration(parent_package="", top_path=None): 5 | import numpy 6 | from numpy.distutils.misc_util import Configuration 7 | 8 | config = Configuration("neighbors", parent_package, top_path) 9 | libraries = [] 10 | if os.name == "posix": 11 | libraries.append("m") 12 | 13 | config.add_extension( 14 | "_ball_tree", 15 | sources=["_ball_tree.pyx"], 16 | include_dirs=[numpy.get_include()], 17 | libraries=libraries, 18 | ) 19 | 20 | config.add_extension( 21 | "_kd_tree", 22 | sources=["_kd_tree.pyx"], 23 | include_dirs=[numpy.get_include()], 24 | libraries=libraries, 25 | ) 26 | 27 | config.add_extension( 28 | "_partition_nodes", 29 | sources=["_partition_nodes.pyx"], 30 | include_dirs=[numpy.get_include()], 31 | language="c++", 32 | libraries=libraries, 33 | ) 34 | 35 | config.add_extension( 36 | "_quad_tree", 37 | sources=["_quad_tree.pyx"], 38 | include_dirs=[numpy.get_include()], 39 | libraries=libraries, 40 | ) 41 | 42 | config.add_subpackage("tests") 43 | 44 | return config 45 | -------------------------------------------------------------------------------- /sklearn/neighbors/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/neighbors/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/neighbors/tests/test_kd_tree.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from joblib import Parallel 4 | from sklearn.utils.fixes import delayed 5 | 6 | from sklearn.neighbors._kd_tree import KDTree 7 | 8 | DIMENSION = 3 9 | 10 | METRICS = {"euclidean": {}, "manhattan": {}, "chebyshev": {}, "minkowski": dict(p=3)} 11 | 12 | 13 | def test_array_object_type(): 14 | """Check that we do not accept object dtype array.""" 15 | X = np.array([(1, 2, 3), (2, 5), (5, 5, 1, 2)], dtype=object) 16 | with pytest.raises(ValueError, match="setting an array element with a sequence"): 17 | KDTree(X) 18 | 19 | 20 | def test_kdtree_picklable_with_joblib(): 21 | """Make sure that KDTree queries work when joblib memmaps. 22 | 23 | Non-regression test for #21685 and #21228.""" 24 | rng = np.random.RandomState(0) 25 | X = rng.random_sample((10, 3)) 26 | tree = KDTree(X, leaf_size=2) 27 | 28 | # Call Parallel with max_nbytes=1 to trigger readonly memory mapping that 29 | # use to raise "ValueError: buffer source array is read-only" in a previous 30 | # version of the Cython code. 31 | Parallel(n_jobs=2, max_nbytes=1)(delayed(tree.query)(data) for data in 2 * [X]) 32 | -------------------------------------------------------------------------------- /sklearn/neural_network/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.neural_network` module includes models based on neural 3 | networks. 4 | """ 5 | 6 | # License: BSD 3 clause 7 | 8 | from ._rbm import BernoulliRBM 9 | 10 | from ._multilayer_perceptron import MLPClassifier 11 | from ._multilayer_perceptron import MLPRegressor 12 | 13 | __all__ = ["BernoulliRBM", "MLPClassifier", "MLPRegressor"] 14 | -------------------------------------------------------------------------------- /sklearn/neural_network/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/neural_network/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/neural_network/tests/test_base.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from sklearn.neural_network._base import binary_log_loss 5 | from sklearn.neural_network._base import log_loss 6 | 7 | 8 | def test_binary_log_loss_1_prob_finite(): 9 | # y_proba is equal to one should result in a finite logloss 10 | y_true = np.array([[0, 0, 1]]).T 11 | y_prob = np.array([[0.9, 1.0, 1.0]]).T 12 | 13 | loss = binary_log_loss(y_true, y_prob) 14 | assert np.isfinite(loss) 15 | 16 | 17 | @pytest.mark.parametrize( 18 | "y_true, y_prob", 19 | [ 20 | ( 21 | np.array([[1, 0, 0], [0, 1, 0]]), 22 | np.array([[0.0, 1.0, 0.0], [0.9, 0.05, 0.05]]), 23 | ), 24 | (np.array([[0, 0, 1]]).T, np.array([[0.9, 1.0, 1.0]]).T), 25 | ], 26 | ) 27 | def test_log_loss_1_prob_finite(y_true, y_prob): 28 | # y_proba is equal to 1 should result in a finite logloss 29 | loss = log_loss(y_true, y_prob) 30 | assert np.isfinite(loss) 31 | -------------------------------------------------------------------------------- /sklearn/preprocessing/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def configuration(parent_package="", top_path=None): 5 | import numpy 6 | from numpy.distutils.misc_util import Configuration 7 | 8 | config = Configuration("preprocessing", parent_package, top_path) 9 | libraries = [] 10 | if os.name == "posix": 11 | libraries.append("m") 12 | 13 | config.add_extension( 14 | "_csr_polynomial_expansion", 15 | sources=["_csr_polynomial_expansion.pyx"], 16 | include_dirs=[numpy.get_include()], 17 | libraries=libraries, 18 | ) 19 | 20 | config.add_subpackage("tests") 21 | 22 | return config 23 | -------------------------------------------------------------------------------- /sklearn/preprocessing/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/preprocessing/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/semi_supervised/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.semi_supervised` module implements semi-supervised learning 3 | algorithms. These algorithms utilize small amounts of labeled data and large 4 | amounts of unlabeled data for classification tasks. This module includes Label 5 | Propagation. 6 | """ 7 | 8 | from ._label_propagation import LabelPropagation, LabelSpreading 9 | from ._self_training import SelfTrainingClassifier 10 | 11 | __all__ = ["SelfTrainingClassifier", "LabelPropagation", "LabelSpreading"] 12 | -------------------------------------------------------------------------------- /sklearn/semi_supervised/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/semi_supervised/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/svm/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.svm` module includes Support Vector Machine algorithms. 3 | """ 4 | 5 | # See http://scikit-learn.sourceforge.net/modules/svm.html for complete 6 | # documentation. 7 | 8 | # Author: Fabian Pedregosa with help from 9 | # the scikit-learn community. LibSVM and LibLinear are copyright 10 | # of their respective owners. 11 | # License: BSD 3 clause (C) INRIA 2010 12 | 13 | from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, LinearSVR 14 | from ._bounds import l1_min_c 15 | 16 | __all__ = [ 17 | "LinearSVC", 18 | "LinearSVR", 19 | "NuSVC", 20 | "NuSVR", 21 | "OneClassSVM", 22 | "SVC", 23 | "SVR", 24 | "l1_min_c", 25 | ] 26 | -------------------------------------------------------------------------------- /sklearn/svm/_newrand.pyx: -------------------------------------------------------------------------------- 1 | """Wrapper for newrand.h""" 2 | 3 | cdef extern from "newrand.h": 4 | void set_seed(unsigned int) 5 | unsigned int bounded_rand_int(unsigned int) 6 | 7 | def set_seed_wrap(unsigned int custom_seed): 8 | set_seed(custom_seed) 9 | 10 | def bounded_rand_int_wrap(unsigned int range_): 11 | return bounded_rand_int(range_) 12 | -------------------------------------------------------------------------------- /sklearn/svm/src/liblinear/_cython_blas_helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef _CYTHON_BLAS_HELPERS_H 2 | #define _CYTHON_BLAS_HELPERS_H 3 | 4 | typedef double (*dot_func)(int, double*, int, double*, int); 5 | typedef void (*axpy_func)(int, double, double*, int, double*, int); 6 | typedef void (*scal_func)(int, double, double*, int); 7 | typedef double (*nrm2_func)(int, double*, int); 8 | 9 | typedef struct BlasFunctions{ 10 | dot_func dot; 11 | axpy_func axpy; 12 | scal_func scal; 13 | nrm2_func nrm2; 14 | } BlasFunctions; 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /sklearn/svm/src/liblinear/tron.h: -------------------------------------------------------------------------------- 1 | #ifndef _TRON_H 2 | #define _TRON_H 3 | 4 | #include "_cython_blas_helpers.h" 5 | 6 | class function 7 | { 8 | public: 9 | virtual double fun(double *w) = 0 ; 10 | virtual void grad(double *w, double *g) = 0 ; 11 | virtual void Hv(double *s, double *Hs) = 0 ; 12 | 13 | virtual int get_nr_variable(void) = 0 ; 14 | virtual ~function(void){} 15 | }; 16 | 17 | class TRON 18 | { 19 | public: 20 | TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000, BlasFunctions *blas = 0); 21 | ~TRON(); 22 | 23 | int tron(double *w); 24 | void set_print_string(void (*i_print) (const char *buf)); 25 | 26 | private: 27 | int trcg(double delta, double *g, double *s, double *r); 28 | double norm_inf(int n, double *x); 29 | 30 | double eps; 31 | int max_iter; 32 | function *fun_obj; 33 | BlasFunctions *blas; 34 | void info(const char *fmt,...); 35 | void (*tron_print_string)(const char *buf); 36 | }; 37 | #endif 38 | -------------------------------------------------------------------------------- /sklearn/svm/src/libsvm/LIBSVM_CHANGES: -------------------------------------------------------------------------------- 1 | Changes to Libsvm 2 | 3 | This is here mainly as checklist for incorporation of new versions of libsvm. 4 | 5 | * Add copyright to files svm.cpp and svm.h 6 | * Add random_seed support and call to srand in fit function 7 | * Improved random number generator (fix on windows, enhancement on other 8 | platforms). See 9 | * invoke scipy blas api for svm kernel function to improve performance with speedup rate of 1.5X to 2X for dense data only. See 10 | * Expose the number of iterations run in optimization. See 11 | The changes made with respect to upstream are detailed in the heading of svm.cpp 12 | -------------------------------------------------------------------------------- /sklearn/svm/src/libsvm/_svm_cython_blas_helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef _SVM_CYTHON_BLAS_HELPERS_H 2 | #define _SVM_CYTHON_BLAS_HELPERS_H 3 | 4 | typedef double (*dot_func)(int, double*, int, double*, int); 5 | typedef struct BlasFunctions{ 6 | dot_func dot; 7 | } BlasFunctions; 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /sklearn/svm/src/libsvm/libsvm_template.cpp: -------------------------------------------------------------------------------- 1 | 2 | /* this is a hack to generate libsvm with both sparse and dense 3 | methods in the same binary*/ 4 | 5 | #define _DENSE_REP 6 | #include "svm.cpp" 7 | #undef _DENSE_REP 8 | #include "svm.cpp" 9 | -------------------------------------------------------------------------------- /sklearn/svm/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/svm/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/tests/test_check_build.py: -------------------------------------------------------------------------------- 1 | """ 2 | Smoke Test the check_build module 3 | """ 4 | 5 | # Author: G Varoquaux 6 | # License: BSD 3 clause 7 | 8 | import pytest 9 | 10 | from sklearn.__check_build import raise_build_error 11 | 12 | 13 | def test_raise_build_error(): 14 | with pytest.raises(ImportError): 15 | raise_build_error(ImportError()) 16 | -------------------------------------------------------------------------------- /sklearn/tests/test_init.py: -------------------------------------------------------------------------------- 1 | # Basic unittests to test functioning of module's top-level 2 | 3 | 4 | __author__ = "Yaroslav Halchenko" 5 | __license__ = "BSD" 6 | 7 | 8 | try: 9 | from sklearn import * # noqa 10 | 11 | _top_import_error = None 12 | except Exception as e: 13 | _top_import_error = e 14 | 15 | 16 | def test_import_skl(): 17 | # Test either above import has failed for some reason 18 | # "import *" is discouraged outside of the module level, hence we 19 | # rely on setting up the variable above 20 | assert _top_import_error is None 21 | -------------------------------------------------------------------------------- /sklearn/tree/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.tree` module includes decision tree-based models for 3 | classification and regression. 4 | """ 5 | 6 | from ._classes import BaseDecisionTree 7 | from ._classes import DecisionTreeClassifier 8 | from ._classes import DecisionTreeRegressor 9 | from ._classes import ExtraTreeClassifier 10 | from ._classes import ExtraTreeRegressor 11 | from ._export import export_graphviz, plot_tree, export_text 12 | 13 | __all__ = [ 14 | "BaseDecisionTree", 15 | "DecisionTreeClassifier", 16 | "DecisionTreeRegressor", 17 | "ExtraTreeClassifier", 18 | "ExtraTreeRegressor", 19 | "export_graphviz", 20 | "plot_tree", 21 | "export_text", 22 | ] 23 | -------------------------------------------------------------------------------- /sklearn/tree/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/tree/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/utils/_arpack.py: -------------------------------------------------------------------------------- 1 | from .validation import check_random_state 2 | 3 | 4 | def _init_arpack_v0(size, random_state): 5 | """Initialize the starting vector for iteration in ARPACK functions. 6 | 7 | Initialize a ndarray with values sampled from the uniform distribution on 8 | [-1, 1]. This initialization model has been chosen to be consistent with 9 | the ARPACK one as another initialization can lead to convergence issues. 10 | 11 | Parameters 12 | ---------- 13 | size : int 14 | The size of the eigenvalue vector to be initialized. 15 | 16 | random_state : int, RandomState instance or None, default=None 17 | The seed of the pseudo random number generator used to generate a 18 | uniform distribution. If int, random_state is the seed used by the 19 | random number generator; If RandomState instance, random_state is the 20 | random number generator; If None, the random number generator is the 21 | RandomState instance used by `np.random`. 22 | 23 | Returns 24 | ------- 25 | v0 : ndarray of shape (size,) 26 | The initialized vector. 27 | """ 28 | random_state = check_random_state(random_state) 29 | v0 = random_state.uniform(-1, 1, size) 30 | return v0 31 | -------------------------------------------------------------------------------- /sklearn/utils/_fast_dict.pxd: -------------------------------------------------------------------------------- 1 | # Author: Gael Varoquaux 2 | # License: BSD 3 | """ 4 | Uses C++ map containers for fast dict-like behavior with keys being 5 | integers, and values float. 6 | """ 7 | 8 | from libcpp.map cimport map as cpp_map 9 | 10 | # Import the C-level symbols of numpy 11 | cimport numpy as np 12 | 13 | ctypedef np.float64_t DTYPE_t 14 | 15 | ctypedef np.intp_t ITYPE_t 16 | 17 | ############################################################################### 18 | # An object to be used in Python 19 | 20 | cdef class IntFloatDict: 21 | cdef cpp_map[ITYPE_t, DTYPE_t] my_map 22 | cdef _to_arrays(self, ITYPE_t [:] keys, DTYPE_t [:] values) 23 | -------------------------------------------------------------------------------- /sklearn/utils/_heap.pxd: -------------------------------------------------------------------------------- 1 | # Heap routines, used in various Cython implementations. 2 | 3 | from cython cimport floating 4 | 5 | from ._typedefs cimport ITYPE_t 6 | 7 | cdef int simultaneous_sort( 8 | floating* dist, 9 | ITYPE_t* idx, 10 | ITYPE_t size 11 | ) nogil 12 | 13 | cdef int heap_push( 14 | floating* values, 15 | ITYPE_t* indices, 16 | ITYPE_t size, 17 | floating val, 18 | ITYPE_t val_idx, 19 | ) nogil 20 | -------------------------------------------------------------------------------- /sklearn/utils/_joblib.py: -------------------------------------------------------------------------------- 1 | import warnings as _warnings 2 | 3 | with _warnings.catch_warnings(): 4 | _warnings.simplefilter("ignore") 5 | # joblib imports may raise DeprecationWarning on certain Python 6 | # versions 7 | import joblib 8 | from joblib import logger 9 | from joblib import dump, load 10 | from joblib import __version__ 11 | from joblib import effective_n_jobs 12 | from joblib import hash 13 | from joblib import cpu_count, Parallel, Memory, delayed 14 | from joblib import parallel_backend, register_parallel_backend 15 | 16 | 17 | __all__ = [ 18 | "parallel_backend", 19 | "register_parallel_backend", 20 | "cpu_count", 21 | "Parallel", 22 | "Memory", 23 | "delayed", 24 | "effective_n_jobs", 25 | "hash", 26 | "logger", 27 | "dump", 28 | "load", 29 | "joblib", 30 | "__version__", 31 | ] 32 | -------------------------------------------------------------------------------- /sklearn/utils/_logistic_sigmoid.pyx: -------------------------------------------------------------------------------- 1 | from libc.math cimport log, exp 2 | 3 | import numpy as np 4 | cimport numpy as np 5 | 6 | np.import_array() 7 | ctypedef np.float64_t DTYPE_t 8 | 9 | 10 | cdef inline DTYPE_t _inner_log_logistic_sigmoid(const DTYPE_t x): 11 | """Log of the logistic sigmoid function log(1 / (1 + e ** -x))""" 12 | if x > 0: 13 | return -log(1. + exp(-x)) 14 | else: 15 | return x - log(1. + exp(x)) 16 | 17 | 18 | def _log_logistic_sigmoid(unsigned int n_samples, 19 | unsigned int n_features, 20 | DTYPE_t[:, :] X, 21 | DTYPE_t[:, :] out): 22 | cdef: 23 | unsigned int i 24 | unsigned int j 25 | 26 | for i in range(n_samples): 27 | for j in range(n_features): 28 | out[i, j] = _inner_log_logistic_sigmoid(X[i, j]) 29 | return out 30 | -------------------------------------------------------------------------------- /sklearn/utils/_openmp_helpers.pxd: -------------------------------------------------------------------------------- 1 | # Helpers to access OpenMP threads information 2 | # 3 | # Those interfaces act as indirections which allows the non-support of OpenMP 4 | # for implementations which have been written for it. 5 | 6 | cdef int _openmp_thread_num() nogil 7 | -------------------------------------------------------------------------------- /sklearn/utils/_typedefs.pxd: -------------------------------------------------------------------------------- 1 | #!python 2 | cimport numpy as np 3 | 4 | # Floating point/data type 5 | ctypedef np.float64_t DTYPE_t # WARNING: should match DTYPE in typedefs.pyx 6 | 7 | cdef enum: 8 | DTYPECODE = np.NPY_FLOAT64 9 | ITYPECODE = np.NPY_INTP 10 | 11 | # Index/integer type. 12 | # WARNING: ITYPE_t must be a signed integer type or you will have a bad time! 13 | ctypedef np.intp_t ITYPE_t # WARNING: should match ITYPE in typedefs.pyx 14 | -------------------------------------------------------------------------------- /sklearn/utils/_typedefs.pyx: -------------------------------------------------------------------------------- 1 | #!python 2 | 3 | import numpy as np 4 | cimport numpy as np 5 | from libc.math cimport sqrt 6 | 7 | np.import_array() 8 | 9 | 10 | # use a hack to determine the associated numpy data types 11 | # NOTE: the following requires the buffer interface, only available in 12 | # numpy 1.5+. We'll choose the DTYPE by hand instead. 13 | #cdef ITYPE_t idummy 14 | #cdef ITYPE_t[:] idummy_view = &idummy 15 | #ITYPE = np.asarray(idummy_view).dtype 16 | ITYPE = np.intp # WARNING: this should match ITYPE_t in typedefs.pxd 17 | 18 | #cdef DTYPE_t ddummy 19 | #cdef DTYPE_t[:] ddummy_view = &ddummy 20 | #DTYPE = np.asarray(ddummy_view).dtype 21 | DTYPE = np.float64 # WARNING: this should match DTYPE_t in typedefs.pxd 22 | 23 | # some handy constants 24 | cdef DTYPE_t INF = np.inf 25 | cdef DTYPE_t PI = np.pi 26 | cdef DTYPE_t ROOT_2PI = sqrt(2 * PI) 27 | -------------------------------------------------------------------------------- /sklearn/utils/murmurhash.pxd: -------------------------------------------------------------------------------- 1 | """Export fast murmurhash C/C++ routines + cython wrappers""" 2 | 3 | cimport numpy as np 4 | 5 | # The C API is disabled for now, since it requires -I flags to get 6 | # compilation to work even when these functions are not used. 7 | #cdef extern from "MurmurHash3.h": 8 | # void MurmurHash3_x86_32(void* key, int len, unsigned int seed, 9 | # void* out) 10 | # 11 | # void MurmurHash3_x86_128(void* key, int len, unsigned int seed, 12 | # void* out) 13 | # 14 | # void MurmurHash3_x64_128(void* key, int len, unsigned int seed, 15 | # void* out) 16 | 17 | 18 | cpdef np.uint32_t murmurhash3_int_u32(int key, unsigned int seed) 19 | cpdef np.int32_t murmurhash3_int_s32(int key, unsigned int seed) 20 | cpdef np.uint32_t murmurhash3_bytes_u32(bytes key, unsigned int seed) 21 | cpdef np.int32_t murmurhash3_bytes_s32(bytes key, unsigned int seed) 22 | -------------------------------------------------------------------------------- /sklearn/utils/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ageron/scikit-learn/5afd5e160bb731a0445c960fd94740080a44ebd7/sklearn/utils/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/utils/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import sklearn 4 | 5 | 6 | @pytest.fixture 7 | def print_changed_only_false(): 8 | sklearn.set_config(print_changed_only=False) 9 | yield 10 | sklearn.set_config(print_changed_only=True) # reset to default 11 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_arpack.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from numpy.testing import assert_allclose 3 | 4 | from sklearn.utils import check_random_state 5 | from sklearn.utils._arpack import _init_arpack_v0 6 | 7 | 8 | @pytest.mark.parametrize("seed", range(100)) 9 | def test_init_arpack_v0(seed): 10 | # check that the initialization a sampling from an uniform distribution 11 | # where we can fix the random state 12 | size = 1000 13 | v0 = _init_arpack_v0(size, seed) 14 | 15 | rng = check_random_state(seed) 16 | assert_allclose(v0, rng.uniform(-1, 1, size=size)) 17 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_arrayfuncs.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from sklearn.utils._testing import assert_allclose 5 | from sklearn.utils.arrayfuncs import min_pos 6 | 7 | 8 | def test_min_pos(): 9 | # Check that min_pos returns a positive value and that it's consistent 10 | # between float and double 11 | X = np.random.RandomState(0).randn(100) 12 | 13 | min_double = min_pos(X) 14 | min_float = min_pos(X.astype(np.float32)) 15 | 16 | assert_allclose(min_double, min_float) 17 | assert min_double >= 0 18 | 19 | 20 | @pytest.mark.parametrize("dtype", [np.float32, np.float64]) 21 | def test_min_pos_no_positive(dtype): 22 | # Check that the return value of min_pos is the maximum representable 23 | # value of the input dtype when all input elements are <= 0 (#19328) 24 | X = np.full(100, -1.0).astype(dtype, copy=False) 25 | 26 | assert min_pos(X) == np.finfo(dtype).max 27 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_cython_templating.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import pytest 3 | import sklearn 4 | 5 | 6 | def test_files_generated_by_templates_are_git_ignored(): 7 | """Check the consistence of the files generated from template files.""" 8 | gitignore_file = pathlib.Path(sklearn.__file__).parent.parent / ".gitignore" 9 | if not gitignore_file.exists(): 10 | pytest.skip("Tests are not run from the source folder") 11 | 12 | base_dir = pathlib.Path(sklearn.__file__).parent 13 | ignored_files = gitignore_file.read_text().split("\n") 14 | ignored_files = [pathlib.Path(line) for line in ignored_files] 15 | 16 | for filename in base_dir.glob("**/*.tp"): 17 | filename = filename.relative_to(base_dir.parent) 18 | # From "path/to/template.p??.tp" to "path/to/template.p??" 19 | filename_wo_tempita_suffix = filename.with_suffix("") 20 | assert filename_wo_tempita_suffix in ignored_files 21 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_fast_dict.py: -------------------------------------------------------------------------------- 1 | """ Test fast_dict. 2 | """ 3 | import numpy as np 4 | 5 | from sklearn.utils._fast_dict import IntFloatDict, argmin 6 | 7 | 8 | def test_int_float_dict(): 9 | rng = np.random.RandomState(0) 10 | keys = np.unique(rng.randint(100, size=10).astype(np.intp)) 11 | values = rng.rand(len(keys)) 12 | 13 | d = IntFloatDict(keys, values) 14 | for key, value in zip(keys, values): 15 | assert d[key] == value 16 | assert len(d) == len(keys) 17 | 18 | d.append(120, 3.0) 19 | assert d[120] == 3.0 20 | assert len(d) == len(keys) + 1 21 | for i in range(2000): 22 | d.append(i + 1000, 4.0) 23 | assert d[1100] == 4.0 24 | 25 | 26 | def test_int_float_dict_argmin(): 27 | # Test the argmin implementation on the IntFloatDict 28 | keys = np.arange(100, dtype=np.intp) 29 | values = np.arange(100, dtype=np.float64) 30 | d = IntFloatDict(keys, values) 31 | assert argmin(d) == (0, 0) 32 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_optimize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.utils.optimize import _newton_cg 4 | from scipy.optimize import fmin_ncg 5 | 6 | from sklearn.utils._testing import assert_array_almost_equal 7 | 8 | 9 | def test_newton_cg(): 10 | # Test that newton_cg gives same result as scipy's fmin_ncg 11 | 12 | rng = np.random.RandomState(0) 13 | A = rng.normal(size=(10, 10)) 14 | x0 = np.ones(10) 15 | 16 | def func(x): 17 | Ax = A.dot(x) 18 | return 0.5 * (Ax).dot(Ax) 19 | 20 | def grad(x): 21 | return A.T.dot(A.dot(x)) 22 | 23 | def hess(x, p): 24 | return p.dot(A.T.dot(A.dot(x.all()))) 25 | 26 | def grad_hess(x): 27 | return grad(x), lambda x: A.T.dot(A.dot(x)) 28 | 29 | assert_array_almost_equal( 30 | _newton_cg(grad_hess, func, grad, x0, tol=1e-10)[0], 31 | fmin_ncg(f=func, x0=x0, fprime=grad, fhess_p=hess), 32 | ) 33 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_parallel.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from joblib import Parallel 3 | 4 | from numpy.testing import assert_array_equal 5 | 6 | from sklearn._config import config_context, get_config 7 | from sklearn.utils.fixes import delayed 8 | 9 | 10 | def get_working_memory(): 11 | return get_config()["working_memory"] 12 | 13 | 14 | @pytest.mark.parametrize("n_jobs", [1, 2]) 15 | @pytest.mark.parametrize("backend", ["loky", "threading", "multiprocessing"]) 16 | def test_configuration_passes_through_to_joblib(n_jobs, backend): 17 | # Tests that the global global configuration is passed to joblib jobs 18 | 19 | with config_context(working_memory=123): 20 | results = Parallel(n_jobs=n_jobs, backend=backend)( 21 | delayed(get_working_memory)() for _ in range(2) 22 | ) 23 | 24 | assert_array_equal(results, [123] * 2) 25 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_show_versions.py: -------------------------------------------------------------------------------- 1 | from sklearn.utils.fixes import threadpool_info 2 | from sklearn.utils._show_versions import _get_sys_info 3 | from sklearn.utils._show_versions import _get_deps_info 4 | from sklearn.utils._show_versions import show_versions 5 | from sklearn.utils._testing import ignore_warnings 6 | 7 | 8 | def test_get_sys_info(): 9 | sys_info = _get_sys_info() 10 | 11 | assert "python" in sys_info 12 | assert "executable" in sys_info 13 | assert "machine" in sys_info 14 | 15 | 16 | def test_get_deps_info(): 17 | with ignore_warnings(): 18 | deps_info = _get_deps_info() 19 | 20 | assert "pip" in deps_info 21 | assert "setuptools" in deps_info 22 | assert "sklearn" in deps_info 23 | assert "numpy" in deps_info 24 | assert "scipy" in deps_info 25 | assert "Cython" in deps_info 26 | assert "pandas" in deps_info 27 | assert "matplotlib" in deps_info 28 | assert "joblib" in deps_info 29 | 30 | 31 | def test_show_versions(capsys): 32 | with ignore_warnings(): 33 | show_versions() 34 | out, err = capsys.readouterr() 35 | 36 | assert "python" in out 37 | assert "numpy" in out 38 | 39 | info = threadpool_info() 40 | if info: 41 | assert "threadpoolctl info:" in out 42 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_weight_vector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from sklearn.utils._weight_vector import ( 4 | WeightVector32, 5 | WeightVector64, 6 | ) 7 | 8 | 9 | @pytest.mark.parametrize( 10 | "dtype, WeightVector", 11 | [ 12 | (np.float32, WeightVector32), 13 | (np.float64, WeightVector64), 14 | ], 15 | ) 16 | def test_type_invariance(dtype, WeightVector): 17 | """Check the `dtype` consistency of `WeightVector`.""" 18 | weights = np.random.rand(100).astype(dtype) 19 | average_weights = np.random.rand(100).astype(dtype) 20 | 21 | weight_vector = WeightVector(weights, average_weights) 22 | 23 | assert np.asarray(weight_vector.w).dtype is np.dtype(dtype) 24 | assert np.asarray(weight_vector.aw).dtype is np.dtype(dtype) 25 | --------------------------------------------------------------------------------