├── .binder ├── postBuild └── requirements.txt ├── .circleci ├── artifact_path └── config.yml ├── .codecov.yml ├── .coveragerc ├── .gitattributes ├── .gitignore ├── .landscape.yml ├── .mailmap ├── .travis.yml ├── CONTRIBUTING.md ├── COPYING ├── ISSUE_TEMPLATE.md ├── MANIFEST.in ├── Makefile ├── PULL_REQUEST_TEMPLATE.md ├── README.rst ├── azure-pipelines.yml ├── benchmarks ├── .gitignore ├── bench_20newsgroups.py ├── bench_covertype.py ├── bench_feature_expansions.py ├── bench_glm.py ├── bench_glmnet.py ├── bench_hist_gradient_boosting.py ├── bench_hist_gradient_boosting_higgsboson.py ├── bench_isolation_forest.py ├── bench_isotonic.py ├── bench_lasso.py ├── bench_lof.py ├── bench_mnist.py ├── bench_multilabel_metrics.py ├── bench_plot_fastkmeans.py ├── bench_plot_hierarchical.py ├── bench_plot_incremental_pca.py ├── bench_plot_lasso_path.py ├── bench_plot_neighbors.py ├── bench_plot_nmf.py ├── bench_plot_omp_lars.py ├── bench_plot_parallel_pairwise.py ├── bench_plot_randomized_svd.py ├── bench_plot_svd.py ├── bench_plot_ward.py ├── bench_random_projections.py ├── bench_rcv1_logreg_convergence.py ├── bench_saga.py ├── bench_sample_without_replacement.py ├── bench_sgd_regression.py ├── bench_sparsify.py ├── bench_text_vectorizers.py ├── bench_tree.py ├── bench_tsne_mnist.py └── plot_tsne_mnist.py ├── build_tools ├── Makefile ├── azure │ ├── install.cmd │ ├── install.sh │ ├── posix-32.yml │ ├── posix.yml │ ├── test_docs.sh │ ├── test_pytest_soft_dependency.sh │ ├── test_script.cmd │ ├── test_script.sh │ ├── upload_codecov.cmd │ ├── upload_codecov.sh │ └── windows.yml ├── circle │ ├── build_doc.sh │ ├── build_test_pypy.sh │ ├── checkout_merge_commit.sh │ ├── linting.sh │ ├── list_versions.py │ └── push_doc.sh ├── generate_authors_table.py └── travis │ ├── after_success.sh │ ├── install.sh │ ├── test_docs.sh │ ├── test_pytest_soft_dependency.sh │ ├── test_script.sh │ └── travis_fastfail.sh ├── conftest.py ├── doc ├── Makefile ├── README.md ├── about.rst ├── authors.rst ├── authors_emeritus.rst ├── binder │ └── requirements.txt ├── conf.py ├── conftest.py ├── contents.rst ├── data_transforms.rst ├── datasets │ └── index.rst ├── developers │ ├── advanced_installation.rst │ ├── contributing.rst │ ├── develop.rst │ ├── index.rst │ ├── maintainer.rst │ ├── performance.rst │ ├── plotting.rst │ ├── tips.rst │ └── utilities.rst ├── faq.rst ├── getting_started.rst ├── glossary.rst ├── governance.rst ├── images │ ├── anaconda-small.png │ ├── anaconda.png │ ├── axa-small.png │ ├── axa.png │ ├── bcg-small.png │ ├── bcg.png │ ├── bnp-small.png │ ├── bnp.png │ ├── cds-logo.png │ ├── columbia-small.png │ ├── dataiku-small.png │ ├── dataiku.png │ ├── dysco.png │ ├── fnrs-logo-small.png │ ├── fujitsu-small.png │ ├── fujitsu.png │ ├── google-small.png │ ├── grid_search_cross_validation.png │ ├── grid_search_workflow.png │ ├── inria-logo.jpg │ ├── inria-small.png │ ├── intel-small.png │ ├── intel.png │ ├── iris.pdf │ ├── iris.svg │ ├── last_digit.png │ ├── lda_model_graph.png │ ├── microsoft-small.png │ ├── microsoft.png │ ├── ml_map.png │ ├── multilayerperceptron_network.png │ ├── no_image.png │ ├── nvidia-small.png │ ├── nvidia.png │ ├── nyu_short_color.png │ ├── plot_digits_classification.png │ ├── plot_face_recognition_1.png │ ├── plot_face_recognition_2.png │ ├── png-logo-inria-la-fondation.png │ ├── rbm_graph.png │ ├── scikit-learn-logo-notext.png │ ├── scikit-learn-logo-small.png │ ├── sloan_banner.png │ ├── sloan_logo-small.png │ ├── sydney-stacked-small.png │ └── telecom-small.png ├── includes │ ├── big_toc_css.rst │ └── bigger_toc_css.rst ├── inspection.rst ├── install.rst ├── logos │ ├── favicon.ico │ ├── identity.pdf │ ├── scikit-learn-logo-notext.png │ ├── scikit-learn-logo-small.png │ ├── scikit-learn-logo-thumb.png │ ├── scikit-learn-logo.bmp │ ├── scikit-learn-logo.png │ └── scikit-learn-logo.svg ├── make.bat ├── model_selection.rst ├── modules │ ├── biclustering.rst │ ├── calibration.rst │ ├── classes.rst │ ├── clustering.rst │ ├── compose.rst │ ├── computing.rst │ ├── covariance.rst │ ├── cross_decomposition.rst │ ├── cross_validation.rst │ ├── decomposition.rst │ ├── density.rst │ ├── ensemble.rst │ ├── feature_extraction.rst │ ├── feature_selection.rst │ ├── gaussian_process.rst │ ├── glm_data │ │ └── lasso_enet_coordinate_descent.png │ ├── grid_search.rst │ ├── impute.rst │ ├── isotonic.rst │ ├── kernel_approximation.rst │ ├── kernel_ridge.rst │ ├── label_propagation.rst │ ├── lda_qda.rst │ ├── learning_curve.rst │ ├── linear_model.rst │ ├── manifold.rst │ ├── metrics.rst │ ├── mixture.rst │ ├── model_evaluation.rst │ ├── model_persistence.rst │ ├── multiclass.rst │ ├── naive_bayes.rst │ ├── neighbors.rst │ ├── neural_networks_supervised.rst │ ├── neural_networks_unsupervised.rst │ ├── outlier_detection.rst │ ├── partial_dependence.rst │ ├── permutation_importance.rst │ ├── pipeline.rst │ ├── preprocessing.rst │ ├── preprocessing_targets.rst │ ├── random_projection.rst │ ├── sgd.rst │ ├── svm.rst │ ├── tree.rst │ └── unsupervised_reduction.rst ├── preface.rst ├── presentations.rst ├── related_projects.rst ├── roadmap.rst ├── sphinxext │ ├── MANIFEST.in │ ├── custom_references_resolver.py │ ├── github_link.py │ └── sphinx_issues.py ├── supervised_learning.rst ├── support.rst ├── templates │ ├── class.rst │ ├── class_with_call.rst │ ├── class_without_init.rst │ ├── deprecated_class.rst │ ├── deprecated_class_with_call.rst │ ├── deprecated_class_without_init.rst │ ├── deprecated_function.rst │ ├── documentation.html │ ├── function.rst │ ├── generate_deprecated.sh │ ├── index.html │ └── numpydoc_docstring.rst ├── testimonials │ ├── README.txt │ ├── images │ │ ├── Makefile │ │ ├── aweber.png │ │ ├── bestofmedia-logo.png │ │ ├── betaworks.png │ │ ├── birchbox.jpg │ │ ├── booking.png │ │ ├── change-logo.png │ │ ├── dataiku_logo.png │ │ ├── datapublica.png │ │ ├── datarobot.png │ │ ├── evernote.png │ │ ├── howaboutwe.png │ │ ├── huggingface.png │ │ ├── infonea.jpg │ │ ├── inria.png │ │ ├── jpmorgan.png │ │ ├── lovely.png │ │ ├── machinalis.png │ │ ├── mars.png │ │ ├── okcupid.png │ │ ├── ottogroup_logo.png │ │ ├── peerindex.png │ │ ├── phimeca.png │ │ ├── rangespan.png │ │ ├── solido_logo.png │ │ ├── spotify.png │ │ ├── telecomparistech.jpg │ │ ├── yhat.png │ │ └── zopa.png │ └── testimonials.rst ├── themes │ ├── scikit-learn-modern │ │ ├── javascript.html │ │ ├── layout.html │ │ ├── nav.html │ │ ├── search.html │ │ ├── static │ │ │ ├── css │ │ │ │ ├── theme.css │ │ │ │ └── vendor │ │ │ │ │ └── bootstrap.min.css │ │ │ └── js │ │ │ │ ├── searchtools.js │ │ │ │ └── vendor │ │ │ │ └── bootstrap.min.js │ │ └── theme.conf │ └── scikit-learn │ │ ├── layout.html │ │ ├── static │ │ ├── ML_MAPS_README.rst │ │ ├── css │ │ │ ├── bootstrap-responsive.css │ │ │ ├── bootstrap-responsive.min.css │ │ │ ├── bootstrap.css │ │ │ ├── bootstrap.min.css │ │ │ └── examples.css │ │ ├── img │ │ │ ├── FNRS-logo.png │ │ │ ├── columbia.png │ │ │ ├── digicosme.png │ │ │ ├── forkme.png │ │ │ ├── glyphicons-halflings-white.png │ │ │ ├── glyphicons-halflings.png │ │ │ ├── google.png │ │ │ ├── inria-small.jpg │ │ │ ├── inria-small.png │ │ │ ├── nyu_short_color.png │ │ │ ├── plot_classifier_comparison_1.png │ │ │ ├── plot_manifold_sphere_1.png │ │ │ ├── scikit-learn-logo-notext.png │ │ │ ├── scikit-learn-logo-small.png │ │ │ ├── scikit-learn-logo.png │ │ │ ├── scikit-learn-logo.svg │ │ │ ├── sloan_logo.jpg │ │ │ ├── sydney-primary.jpeg │ │ │ ├── sydney-stacked.jpeg │ │ │ └── telecom.png │ │ ├── jquery.js │ │ ├── jquery.maphilight.js │ │ ├── jquery.maphilight.min.js │ │ ├── js │ │ │ ├── bootstrap.js │ │ │ ├── bootstrap.min.js │ │ │ ├── copybutton.js │ │ │ └── extra.js │ │ └── nature.css_t │ │ └── theme.conf ├── tune_toc.rst ├── tutorial │ ├── basic │ │ └── tutorial.rst │ ├── common_includes │ │ └── info.txt │ ├── index.rst │ ├── machine_learning_map │ │ ├── ML_MAPS_README.txt │ │ ├── index.rst │ │ ├── parse_path.py │ │ ├── pyparsing.py │ │ └── svg2imagemap.py │ ├── statistical_inference │ │ ├── finding_help.rst │ │ ├── index.rst │ │ ├── model_selection.rst │ │ ├── putting_together.rst │ │ ├── settings.rst │ │ ├── supervised_learning.rst │ │ └── unsupervised_learning.rst │ └── text_analytics │ │ ├── .gitignore │ │ ├── data │ │ ├── languages │ │ │ └── fetch_data.py │ │ ├── movie_reviews │ │ │ └── fetch_data.py │ │ └── twenty_newsgroups │ │ │ └── fetch_data.py │ │ ├── skeletons │ │ ├── exercise_01_language_train_model.py │ │ └── exercise_02_sentiment.py │ │ ├── solutions │ │ ├── exercise_01_language_train_model.py │ │ ├── exercise_02_sentiment.py │ │ └── generate_skeletons.py │ │ └── working_with_text_data.rst ├── unsupervised_learning.rst ├── user_guide.rst ├── visualizations.rst ├── whats_new.rst └── whats_new │ ├── _contributors.rst │ ├── changelog_legend.inc │ ├── older_versions.rst │ ├── v0.13.rst │ ├── v0.14.rst │ ├── v0.15.rst │ ├── v0.16.rst │ ├── v0.17.rst │ ├── v0.18.rst │ ├── v0.19.rst │ ├── v0.20.rst │ ├── v0.21.rst │ ├── v0.22.rst │ └── v0.23.rst ├── examples ├── .flake8 ├── README.txt ├── applications │ ├── README.txt │ ├── plot_face_recognition.py │ ├── plot_model_complexity_influence.py │ ├── plot_out_of_core_classification.py │ ├── plot_outlier_detection_housing.py │ ├── plot_prediction_latency.py │ ├── plot_species_distribution_modeling.py │ ├── plot_stock_market.py │ ├── plot_tomography_l1_reconstruction.py │ ├── plot_topics_extraction_with_nmf_lda.py │ ├── svm_gui.py │ └── wikipedia_principal_eigenvector.py ├── bicluster │ ├── README.txt │ ├── plot_bicluster_newsgroups.py │ ├── plot_spectral_biclustering.py │ └── plot_spectral_coclustering.py ├── calibration │ ├── README.txt │ ├── plot_calibration.py │ ├── plot_calibration_curve.py │ ├── plot_calibration_multiclass.py │ └── plot_compare_calibration.py ├── classification │ ├── README.txt │ ├── plot_classification_probability.py │ ├── plot_classifier_comparison.py │ ├── plot_digits_classification.py │ ├── plot_lda.py │ └── plot_lda_qda.py ├── cluster │ ├── README.txt │ ├── plot_adjusted_for_chance_measures.py │ ├── plot_affinity_propagation.py │ ├── plot_agglomerative_clustering.py │ ├── plot_agglomerative_clustering_metrics.py │ ├── plot_agglomerative_dendrogram.py │ ├── plot_birch_vs_minibatchkmeans.py │ ├── plot_cluster_comparison.py │ ├── plot_cluster_iris.py │ ├── plot_coin_segmentation.py │ ├── plot_coin_ward_segmentation.py │ ├── plot_color_quantization.py │ ├── plot_dbscan.py │ ├── plot_dict_face_patches.py │ ├── plot_digits_agglomeration.py │ ├── plot_digits_linkage.py │ ├── plot_face_compress.py │ ├── plot_feature_agglomeration_vs_univariate_selection.py │ ├── plot_inductive_clustering.py │ ├── plot_kmeans_assumptions.py │ ├── plot_kmeans_digits.py │ ├── plot_kmeans_silhouette_analysis.py │ ├── plot_kmeans_stability_low_dim_dense.py │ ├── plot_linkage_comparison.py │ ├── plot_mean_shift.py │ ├── plot_mini_batch_kmeans.py │ ├── plot_optics.py │ ├── plot_segmentation_toy.py │ └── plot_ward_structured_vs_unstructured.py ├── compose │ ├── README.txt │ ├── plot_column_transformer.py │ ├── plot_column_transformer_mixed_types.py │ ├── plot_compare_reduction.py │ ├── plot_digits_pipe.py │ ├── plot_feature_union.py │ └── plot_transformed_target.py ├── covariance │ ├── README.txt │ ├── plot_covariance_estimation.py │ ├── plot_lw_vs_oas.py │ ├── plot_mahalanobis_distances.py │ ├── plot_robust_vs_empirical_covariance.py │ └── plot_sparse_cov.py ├── cross_decomposition │ ├── README.txt │ └── plot_compare_cross_decomposition.py ├── datasets │ ├── README.txt │ ├── plot_digits_last_image.py │ ├── plot_iris_dataset.py │ ├── plot_random_dataset.py │ └── plot_random_multilabel_dataset.py ├── decomposition │ ├── README.txt │ ├── plot_beta_divergence.py │ ├── plot_faces_decomposition.py │ ├── plot_ica_blind_source_separation.py │ ├── plot_ica_vs_pca.py │ ├── plot_image_denoising.py │ ├── plot_incremental_pca.py │ ├── plot_kernel_pca.py │ ├── plot_pca_3d.py │ ├── plot_pca_iris.py │ ├── plot_pca_vs_fa_model_selection.py │ ├── plot_pca_vs_lda.py │ └── plot_sparse_coding.py ├── ensemble │ ├── README.txt │ ├── plot_adaboost_hastie_10_2.py │ ├── plot_adaboost_multiclass.py │ ├── plot_adaboost_regression.py │ ├── plot_adaboost_twoclass.py │ ├── plot_bias_variance.py │ ├── plot_ensemble_oob.py │ ├── plot_feature_transformation.py │ ├── plot_forest_importances.py │ ├── plot_forest_importances_faces.py │ ├── plot_forest_iris.py │ ├── plot_gradient_boosting_early_stopping.py │ ├── plot_gradient_boosting_oob.py │ ├── plot_gradient_boosting_quantile.py │ ├── plot_gradient_boosting_regression.py │ ├── plot_gradient_boosting_regularization.py │ ├── plot_isolation_forest.py │ ├── plot_random_forest_embedding.py │ ├── plot_random_forest_regression_multioutput.py │ ├── plot_stack_predictors.py │ ├── plot_voting_decision_regions.py │ ├── plot_voting_probas.py │ └── plot_voting_regressor.py ├── exercises │ ├── README.txt │ ├── plot_cv_diabetes.py │ ├── plot_cv_digits.py │ ├── plot_digits_classification_exercise.py │ └── plot_iris_exercise.py ├── feature_selection │ ├── README.txt │ ├── plot_f_test_vs_mi.py │ ├── plot_feature_selection.py │ ├── plot_feature_selection_pipeline.py │ ├── plot_permutation_test_for_classification.py │ ├── plot_rfe_digits.py │ ├── plot_rfe_with_cross_validation.py │ └── plot_select_from_model_boston.py ├── gaussian_process │ ├── README.txt │ ├── plot_compare_gpr_krr.py │ ├── plot_gpc.py │ ├── plot_gpc_iris.py │ ├── plot_gpc_isoprobability.py │ ├── plot_gpc_xor.py │ ├── plot_gpr_co2.py │ ├── plot_gpr_noisy.py │ ├── plot_gpr_noisy_targets.py │ ├── plot_gpr_on_structured_data.py │ └── plot_gpr_prior_posterior.py ├── impute │ ├── README.txt │ ├── plot_iterative_imputer_variants_comparison.py │ └── plot_missing_values.py ├── inspection │ ├── README.txt │ ├── plot_partial_dependence.py │ ├── plot_permutation_importance.py │ └── plot_permutation_importance_multicollinear.py ├── linear_model │ ├── README.txt │ ├── plot_ard.py │ ├── plot_bayesian_ridge.py │ ├── plot_bayesian_ridge_curvefit.py │ ├── plot_huber_vs_ridge.py │ ├── plot_iris_logistic.py │ ├── plot_lasso_and_elasticnet.py │ ├── plot_lasso_coordinate_descent_path.py │ ├── plot_lasso_dense_vs_sparse_data.py │ ├── plot_lasso_lars.py │ ├── plot_lasso_model_selection.py │ ├── plot_logistic.py │ ├── plot_logistic_l1_l2_sparsity.py │ ├── plot_logistic_multinomial.py │ ├── plot_logistic_path.py │ ├── plot_multi_task_lasso_support.py │ ├── plot_ols.py │ ├── plot_ols_3d.py │ ├── plot_ols_ridge_variance.py │ ├── plot_omp.py │ ├── plot_polynomial_interpolation.py │ ├── plot_ransac.py │ ├── plot_ridge_coeffs.py │ ├── plot_ridge_path.py │ ├── plot_robust_fit.py │ ├── plot_sgd_comparison.py │ ├── plot_sgd_early_stopping.py │ ├── plot_sgd_iris.py │ ├── plot_sgd_loss_functions.py │ ├── plot_sgd_penalties.py │ ├── plot_sgd_separating_hyperplane.py │ ├── plot_sgd_weighted_samples.py │ ├── plot_sparse_logistic_regression_20newsgroups.py │ ├── plot_sparse_logistic_regression_mnist.py │ └── plot_theilsen.py ├── manifold │ ├── README.txt │ ├── plot_compare_methods.py │ ├── plot_lle_digits.py │ ├── plot_manifold_sphere.py │ ├── plot_mds.py │ ├── plot_swissroll.py │ └── plot_t_sne_perplexity.py ├── mixture │ ├── README.txt │ ├── plot_concentration_prior.py │ ├── plot_gmm.py │ ├── plot_gmm_covariances.py │ ├── plot_gmm_pdf.py │ ├── plot_gmm_selection.py │ └── plot_gmm_sin.py ├── model_selection │ ├── README.txt │ ├── grid_search_text_feature_extraction.py │ ├── plot_confusion_matrix.py │ ├── plot_cv_indices.py │ ├── plot_cv_predict.py │ ├── plot_grid_search_digits.py │ ├── plot_grid_search_refit_callable.py │ ├── plot_learning_curve.py │ ├── plot_multi_metric_evaluation.py │ ├── plot_nested_cross_validation_iris.py │ ├── plot_precision_recall.py │ ├── plot_randomized_search.py │ ├── plot_roc.py │ ├── plot_roc_crossval.py │ ├── plot_train_error_vs_test_error.py │ ├── plot_underfitting_overfitting.py │ └── plot_validation_curve.py ├── multioutput │ ├── README.txt │ └── plot_classifier_chain_yeast.py ├── neighbors │ ├── README.txt │ ├── approximate_nearest_neighbors.py │ ├── plot_caching_nearest_neighbors.py │ ├── plot_classification.py │ ├── plot_digits_kde_sampling.py │ ├── plot_kde_1d.py │ ├── plot_lof_novelty_detection.py │ ├── plot_lof_outlier_detection.py │ ├── plot_nca_classification.py │ ├── plot_nca_dim_reduction.py │ ├── plot_nca_illustration.py │ ├── plot_nearest_centroid.py │ ├── plot_regression.py │ └── plot_species_kde.py ├── neural_networks │ ├── README.txt │ ├── plot_mlp_alpha.py │ ├── plot_mlp_training_curves.py │ ├── plot_mnist_filters.py │ └── plot_rbm_logistic_classification.py ├── plot_anomaly_comparison.py ├── plot_changed_only_pprint_parameter.py ├── plot_isotonic_regression.py ├── plot_johnson_lindenstrauss_bound.py ├── plot_kernel_approximation.py ├── plot_kernel_ridge_regression.py ├── plot_multilabel.py ├── plot_multioutput_face_completion.py ├── plot_partial_dependence_visualization_api.py ├── plot_roc_curve_visualization_api.py ├── preprocessing │ ├── README.txt │ ├── plot_all_scaling.py │ ├── plot_discretization.py │ ├── plot_discretization_classification.py │ ├── plot_discretization_strategies.py │ ├── plot_function_transformer.py │ ├── plot_map_data_to_normal.py │ └── plot_scaling_importance.py ├── release_highlights │ ├── README.txt │ └── plot_release_highlights_0_22_0.py ├── semi_supervised │ ├── README.txt │ ├── plot_label_propagation_digits.py │ ├── plot_label_propagation_digits_active_learning.py │ ├── plot_label_propagation_structure.py │ └── plot_label_propagation_versus_svm_iris.py ├── svm │ ├── README.txt │ ├── plot_custom_kernel.py │ ├── plot_iris_svc.py │ ├── plot_linearsvc_support_vectors.py │ ├── plot_oneclass.py │ ├── plot_rbf_parameters.py │ ├── plot_separating_hyperplane.py │ ├── plot_separating_hyperplane_unbalanced.py │ ├── plot_svm_anova.py │ ├── plot_svm_kernels.py │ ├── plot_svm_margin.py │ ├── plot_svm_nonlinear.py │ ├── plot_svm_regression.py │ ├── plot_svm_scale_c.py │ ├── plot_svm_tie_breaking.py │ └── plot_weighted_samples.py ├── text │ ├── README.txt │ ├── plot_document_classification_20newsgroups.py │ ├── plot_document_clustering.py │ └── plot_hashing_vs_dict_vectorizer.py └── tree │ ├── README.txt │ ├── plot_cost_complexity_pruning.py │ ├── plot_iris_dtc.py │ ├── plot_tree_regression.py │ ├── plot_tree_regression_multioutput.py │ └── plot_unveil_tree_structure.py ├── lgtm.yml ├── maint_tools ├── sort_whats_new.py ├── test_docstrings.py └── whats_missing.sh ├── setup.cfg ├── setup.py ├── site.cfg └── sklearn ├── __check_build ├── __init__.py ├── _check_build.pyx └── setup.py ├── __init__.py ├── _build_utils ├── __init__.py ├── deprecated_modules.py ├── openmp_helpers.py └── pre_build_helpers.py ├── _config.py ├── _distributor_init.py ├── _isotonic.pyx ├── base.py ├── calibration.py ├── cluster ├── __init__.py ├── _affinity_propagation.py ├── _bicluster.py ├── _birch.py ├── _dbscan.py ├── _dbscan_inner.pyx ├── _feature_agglomeration.py ├── _hierarchical.py ├── _hierarchical_fast.pyx ├── _k_means.py ├── _k_means_elkan.pyx ├── _k_means_fast.pyx ├── _mean_shift.py ├── _optics.py ├── _spectral.py ├── setup.py └── tests │ ├── __init__.py │ ├── common.py │ ├── test_affinity_propagation.py │ ├── test_bicluster.py │ ├── test_birch.py │ ├── test_dbscan.py │ ├── test_feature_agglomeration.py │ ├── test_hierarchical.py │ ├── test_k_means.py │ ├── test_mean_shift.py │ ├── test_optics.py │ └── test_spectral.py ├── compose ├── __init__.py ├── _column_transformer.py ├── _target.py └── tests │ ├── __init__.py │ ├── test_column_transformer.py │ └── test_target.py ├── conftest.py ├── covariance ├── __init__.py ├── _elliptic_envelope.py ├── _empirical_covariance.py ├── _graph_lasso.py ├── _robust_covariance.py ├── _shrunk_covariance.py └── tests │ ├── __init__.py │ ├── test_covariance.py │ ├── test_elliptic_envelope.py │ ├── test_graphical_lasso.py │ └── test_robust_covariance.py ├── cross_decomposition ├── __init__.py ├── _cca.py ├── _pls.py └── tests │ ├── __init__.py │ └── test_pls.py ├── datasets ├── __init__.py ├── _base.py ├── _california_housing.py ├── _covtype.py ├── _kddcup99.py ├── _lfw.py ├── _olivetti_faces.py ├── _openml.py ├── _rcv1.py ├── _samples_generator.py ├── _species_distributions.py ├── _svmlight_format.py ├── _svmlight_format_fast.pyx ├── _twenty_newsgroups.py ├── data │ ├── boston_house_prices.csv │ ├── breast_cancer.csv │ ├── diabetes_data.csv.gz │ ├── diabetes_target.csv.gz │ ├── digits.csv.gz │ ├── iris.csv │ ├── linnerud_exercise.csv │ ├── linnerud_physiological.csv │ └── wine_data.csv ├── descr │ ├── boston_house_prices.rst │ ├── breast_cancer.rst │ ├── california_housing.rst │ ├── covtype.rst │ ├── diabetes.rst │ ├── digits.rst │ ├── iris.rst │ ├── kddcup99.rst │ ├── lfw.rst │ ├── linnerud.rst │ ├── olivetti_faces.rst │ ├── rcv1.rst │ ├── twenty_newsgroups.rst │ └── wine_data.rst ├── images │ ├── README.txt │ ├── china.jpg │ └── flower.jpg ├── setup.py └── tests │ ├── __init__.py │ ├── data │ ├── openml │ │ ├── 1 │ │ │ ├── api-v1-json-data-1.json.gz │ │ │ ├── api-v1-json-data-features-1.json.gz │ │ │ ├── api-v1-json-data-qualities-1.json.gz │ │ │ └── data-v1-download-1.arff.gz │ │ ├── 2 │ │ │ ├── api-v1-json-data-2.json.gz │ │ │ ├── api-v1-json-data-features-2.json.gz │ │ │ ├── api-v1-json-data-list-data_name-anneal-limit-2-data_version-1.json.gz │ │ │ ├── api-v1-json-data-list-data_name-anneal-limit-2-status-active-.json.gz │ │ │ ├── api-v1-json-data-qualities-2.json.gz │ │ │ └── data-v1-download-1666876.arff.gz │ │ ├── 3 │ │ │ ├── api-v1-json-data-3.json.gz │ │ │ ├── api-v1-json-data-features-3.json.gz │ │ │ ├── api-v1-json-data-qualities-3.json.gz │ │ │ └── data-v1-download-3.arff.gz │ │ ├── 61 │ │ │ ├── api-v1-json-data-61.json.gz │ │ │ ├── api-v1-json-data-features-61.json.gz │ │ │ ├── api-v1-json-data-list-data_name-iris-limit-2-data_version-1.json.gz │ │ │ ├── api-v1-json-data-list-data_name-iris-limit-2-status-active-.json.gz │ │ │ ├── api-v1-json-data-qualities-61.json.gz │ │ │ └── data-v1-download-61.arff.gz │ │ ├── 62 │ │ │ ├── api-v1-json-data-62.json.gz │ │ │ ├── api-v1-json-data-features-62.json.gz │ │ │ ├── api-v1-json-data-qualities-62.json.gz │ │ │ └── data-v1-download-52352.arff.gz │ │ ├── 292 │ │ │ ├── api-v1-json-data-292.json.gz │ │ │ ├── api-v1-json-data-40981.json.gz │ │ │ ├── api-v1-json-data-features-292.json.gz │ │ │ ├── api-v1-json-data-features-40981.json.gz │ │ │ ├── api-v1-json-data-list-data_name-australian-limit-2-data_version-1-status-deactivated.json.gz │ │ │ ├── api-v1-json-data-list-data_name-australian-limit-2-data_version-1.json.gz │ │ │ ├── api-v1-json-data-list-data_name-australian-limit-2-status-active-.json.gz │ │ │ └── data-v1-download-49822.arff.gz │ │ ├── 561 │ │ │ ├── api-v1-json-data-561.json.gz │ │ │ ├── api-v1-json-data-features-561.json.gz │ │ │ ├── api-v1-json-data-list-data_name-cpu-limit-2-data_version-1.json.gz │ │ │ ├── api-v1-json-data-list-data_name-cpu-limit-2-status-active-.json.gz │ │ │ ├── api-v1-json-data-qualities-561.json.gz │ │ │ └── data-v1-download-52739.arff.gz │ │ ├── 1119 │ │ │ ├── api-v1-json-data-1119.json.gz │ │ │ ├── api-v1-json-data-features-1119.json.gz │ │ │ ├── api-v1-json-data-list-data_name-adult-census-limit-2-data_version-1.json.gz │ │ │ ├── api-v1-json-data-list-data_name-adult-census-limit-2-status-active-.json.gz │ │ │ ├── api-v1-json-data-qualities-1119.json.gz │ │ │ └── data-v1-download-54002.arff.gz │ │ ├── 40589 │ │ │ ├── api-v1-json-data-40589.json.gz │ │ │ ├── api-v1-json-data-features-40589.json.gz │ │ │ ├── api-v1-json-data-list-data_name-emotions-limit-2-data_version-3.json.gz │ │ │ ├── api-v1-json-data-list-data_name-emotions-limit-2-status-active-.json.gz │ │ │ ├── api-v1-json-data-qualities-40589.json.gz │ │ │ └── data-v1-download-4644182.arff.gz │ │ ├── 40675 │ │ │ ├── api-v1-json-data-40675.json.gz │ │ │ ├── api-v1-json-data-features-40675.json.gz │ │ │ ├── api-v1-json-data-list-data_name-glass2-limit-2-data_version-1-status-deactivated.json.gz │ │ │ ├── api-v1-json-data-list-data_name-glass2-limit-2-data_version-1.json.gz │ │ │ ├── api-v1-json-data-list-data_name-glass2-limit-2-status-active-.json.gz │ │ │ ├── api-v1-json-data-qualities-40675.json.gz │ │ │ └── data-v1-download-4965250.arff.gz │ │ ├── 40945 │ │ │ ├── api-v1-json-data-40945.json.gz │ │ │ ├── api-v1-json-data-features-40945.json.gz │ │ │ ├── api-v1-json-data-qualities-40945.json.gz │ │ │ └── data-v1-download-16826755.arff.gz │ │ └── 40966 │ │ │ ├── api-v1-json-data-40966.json.gz │ │ │ ├── api-v1-json-data-features-40966.json.gz │ │ │ ├── api-v1-json-data-list-data_name-miceprotein-limit-2-data_version-4.json.gz │ │ │ ├── api-v1-json-data-list-data_name-miceprotein-limit-2-status-active-.json.gz │ │ │ ├── api-v1-json-data-qualities-40966.json.gz │ │ │ └── data-v1-download-17928620.arff.gz │ ├── svmlight_classification.txt │ ├── svmlight_invalid.txt │ ├── svmlight_invalid_order.txt │ └── svmlight_multilabel.txt │ ├── test_20news.py │ ├── test_base.py │ ├── test_california_housing.py │ ├── test_common.py │ ├── test_covtype.py │ ├── test_kddcup99.py │ ├── test_lfw.py │ ├── test_olivetti_faces.py │ ├── test_openml.py │ ├── test_rcv1.py │ ├── test_samples_generator.py │ └── test_svmlight_format.py ├── decomposition ├── __init__.py ├── _base.py ├── _cdnmf_fast.pyx ├── _dict_learning.py ├── _factor_analysis.py ├── _fastica.py ├── _incremental_pca.py ├── _kernel_pca.py ├── _nmf.py ├── _online_lda.py ├── _online_lda_fast.pyx ├── _pca.py ├── _sparse_pca.py ├── _truncated_svd.py ├── setup.py └── tests │ ├── __init__.py │ ├── test_dict_learning.py │ ├── test_factor_analysis.py │ ├── test_fastica.py │ ├── test_incremental_pca.py │ ├── test_kernel_pca.py │ ├── test_nmf.py │ ├── test_online_lda.py │ ├── test_pca.py │ ├── test_sparse_pca.py │ └── test_truncated_svd.py ├── discriminant_analysis.py ├── dummy.py ├── ensemble ├── __init__.py ├── _bagging.py ├── _base.py ├── _forest.py ├── _gb.py ├── _gb_losses.py ├── _gradient_boosting.pyx ├── _hist_gradient_boosting │ ├── __init__.py │ ├── _binning.pyx │ ├── _gradient_boosting.pyx │ ├── _loss.pyx │ ├── _predictor.pyx │ ├── binning.py │ ├── common.pxd │ ├── common.pyx │ ├── gradient_boosting.py │ ├── grower.py │ ├── histogram.pyx │ ├── loss.py │ ├── predictor.py │ ├── splitting.pyx │ ├── tests │ │ ├── __init__.py │ │ ├── test_binning.py │ │ ├── test_compare_lightgbm.py │ │ ├── test_gradient_boosting.py │ │ ├── test_grower.py │ │ ├── test_histogram.py │ │ ├── test_loss.py │ │ ├── test_predictor.py │ │ ├── test_splitting.py │ │ └── test_warm_start.py │ └── utils.pyx ├── _iforest.py ├── _stacking.py ├── _voting.py ├── _weight_boosting.py ├── partial_dependence.py ├── setup.py └── tests │ ├── __init__.py │ ├── test_bagging.py │ ├── test_base.py │ ├── test_common.py │ ├── test_forest.py │ ├── test_gradient_boosting.py │ ├── test_gradient_boosting_loss_functions.py │ ├── test_iforest.py │ ├── test_partial_dependence.py │ ├── test_stacking.py │ ├── test_voting.py │ └── test_weight_boosting.py ├── exceptions.py ├── experimental ├── __init__.py ├── enable_hist_gradient_boosting.py ├── enable_iterative_imputer.py └── tests │ ├── __init__.py │ ├── test_enable_hist_gradient_boosting.py │ └── test_enable_iterative_imputer.py ├── externals ├── README ├── __init__.py ├── _arff.py ├── _lobpcg.py ├── _pep562.py ├── _pilutil.py ├── _scipy_linalg.py ├── conftest.py ├── joblib │ ├── __init__.py │ └── numpy_pickle.py ├── setup.py └── six.py ├── feature_extraction ├── __init__.py ├── _dict_vectorizer.py ├── _hashing.py ├── _hashing_fast.pyx ├── _stop_words.py ├── image.py ├── setup.py ├── tests │ ├── __init__.py │ ├── test_dict_vectorizer.py │ ├── test_feature_hasher.py │ ├── test_image.py │ └── test_text.py └── text.py ├── feature_selection ├── __init__.py ├── _base.py ├── _from_model.py ├── _mutual_info.py ├── _rfe.py ├── _univariate_selection.py ├── _variance_threshold.py └── tests │ ├── __init__.py │ ├── test_base.py │ ├── test_chi2.py │ ├── test_feature_select.py │ ├── test_from_model.py │ ├── test_mutual_info.py │ ├── test_rfe.py │ └── test_variance_threshold.py ├── gaussian_process ├── __init__.py ├── _gpc.py ├── _gpr.py ├── kernels.py └── tests │ ├── __init__.py │ ├── _mini_sequence_kernel.py │ ├── test_gpc.py │ ├── test_gpr.py │ └── test_kernels.py ├── impute ├── __init__.py ├── _base.py ├── _iterative.py ├── _knn.py └── tests │ ├── __init__.py │ ├── test_base.py │ ├── test_common.py │ ├── test_impute.py │ └── test_knn.py ├── inspection ├── __init__.py ├── _partial_dependence.py ├── _permutation_importance.py └── tests │ ├── __init__.py │ ├── test_partial_dependence.py │ ├── test_permutation_importance.py │ └── test_plot_partial_dependence.py ├── isotonic.py ├── kernel_approximation.py ├── kernel_ridge.py ├── linear_model ├── __init__.py ├── _base.py ├── _bayes.py ├── _cd_fast.pyx ├── _coordinate_descent.py ├── _huber.py ├── _least_angle.py ├── _logistic.py ├── _omp.py ├── _passive_aggressive.py ├── _perceptron.py ├── _ransac.py ├── _ridge.py ├── _sag.py ├── _sag_fast.pyx.tp ├── _sgd_fast.pxd ├── _sgd_fast.pyx ├── _sgd_fast_helpers.h ├── _stochastic_gradient.py ├── _theil_sen.py ├── setup.py └── tests │ ├── __init__.py │ ├── test_base.py │ ├── test_bayes.py │ ├── test_coordinate_descent.py │ ├── test_huber.py │ ├── test_least_angle.py │ ├── test_logistic.py │ ├── test_omp.py │ ├── test_passive_aggressive.py │ ├── test_perceptron.py │ ├── test_ransac.py │ ├── test_ridge.py │ ├── test_sag.py │ ├── test_sgd.py │ ├── test_sparse_coordinate_descent.py │ └── test_theil_sen.py ├── manifold ├── __init__.py ├── _barnes_hut_tsne.pyx ├── _isomap.py ├── _locally_linear.py ├── _mds.py ├── _spectral_embedding.py ├── _t_sne.py ├── _utils.pyx ├── setup.py └── tests │ ├── __init__.py │ ├── test_isomap.py │ ├── test_locally_linear.py │ ├── test_mds.py │ ├── test_spectral_embedding.py │ └── test_t_sne.py ├── metrics ├── __init__.py ├── _base.py ├── _classification.py ├── _pairwise_fast.pyx ├── _plot │ ├── __init__.py │ ├── base.py │ ├── confusion_matrix.py │ ├── precision_recall_curve.py │ ├── roc_curve.py │ └── tests │ │ ├── __init__.py │ │ ├── test_plot_confusion_matrix.py │ │ ├── test_plot_precision_recall.py │ │ └── test_plot_roc_curve.py ├── _ranking.py ├── _regression.py ├── _scorer.py ├── cluster │ ├── __init__.py │ ├── _bicluster.py │ ├── _expected_mutual_info_fast.pyx │ ├── _supervised.py │ ├── _unsupervised.py │ ├── setup.py │ └── tests │ │ ├── __init__.py │ │ ├── test_bicluster.py │ │ ├── test_common.py │ │ ├── test_supervised.py │ │ └── test_unsupervised.py ├── pairwise.py ├── setup.py └── tests │ ├── __init__.py │ ├── test_classification.py │ ├── test_common.py │ ├── test_pairwise.py │ ├── test_ranking.py │ ├── test_regression.py │ └── test_score_objects.py ├── mixture ├── __init__.py ├── _base.py ├── _bayesian_mixture.py ├── _gaussian_mixture.py └── tests │ ├── __init__.py │ ├── test_bayesian_mixture.py │ ├── test_gaussian_mixture.py │ └── test_mixture.py ├── model_selection ├── __init__.py ├── _search.py ├── _split.py ├── _validation.py └── tests │ ├── __init__.py │ ├── common.py │ ├── test_search.py │ ├── test_split.py │ └── test_validation.py ├── multiclass.py ├── multioutput.py ├── naive_bayes.py ├── neighbors ├── __init__.py ├── _ball_tree.pyx ├── _base.py ├── _binary_tree.pxi ├── _classification.py ├── _dist_metrics.pxd ├── _dist_metrics.pyx ├── _graph.py ├── _kd_tree.pyx ├── _kde.py ├── _lof.py ├── _nca.py ├── _nearest_centroid.py ├── _quad_tree.pxd ├── _quad_tree.pyx ├── _regression.py ├── _typedefs.pxd ├── _typedefs.pyx ├── _unsupervised.py ├── setup.py └── tests │ ├── __init__.py │ ├── test_ball_tree.py │ ├── test_dist_metrics.py │ ├── test_graph.py │ ├── test_kd_tree.py │ ├── test_kde.py │ ├── test_lof.py │ ├── test_nca.py │ ├── test_nearest_centroid.py │ ├── test_neighbors.py │ ├── test_neighbors_pipeline.py │ ├── test_neighbors_tree.py │ └── test_quad_tree.py ├── neural_network ├── __init__.py ├── _base.py ├── _multilayer_perceptron.py ├── _rbm.py ├── _stochastic_optimizers.py └── tests │ ├── __init__.py │ ├── test_mlp.py │ ├── test_rbm.py │ └── test_stochastic_optimizers.py ├── pipeline.py ├── preprocessing ├── __init__.py ├── _csr_polynomial_expansion.pyx ├── _data.py ├── _discretization.py ├── _encoders.py ├── _function_transformer.py ├── _label.py ├── setup.py └── tests │ ├── __init__.py │ ├── test_common.py │ ├── test_data.py │ ├── test_discretization.py │ ├── test_encoders.py │ ├── test_function_transformer.py │ └── test_label.py ├── random_projection.py ├── semi_supervised ├── __init__.py ├── _label_propagation.py └── tests │ ├── __init__.py │ └── test_label_propagation.py ├── setup.py ├── svm ├── __init__.py ├── _base.py ├── _bounds.py ├── _classes.py ├── _liblinear.pxd ├── _liblinear.pyx ├── _libsvm.pxd ├── _libsvm.pyx ├── _libsvm_sparse.pyx ├── setup.py ├── src │ ├── liblinear │ │ ├── COPYRIGHT │ │ ├── _cython_blas_helpers.h │ │ ├── liblinear_helper.c │ │ ├── linear.cpp │ │ ├── linear.h │ │ ├── tron.cpp │ │ └── tron.h │ └── libsvm │ │ ├── LIBSVM_CHANGES │ │ ├── libsvm_helper.c │ │ ├── libsvm_sparse_helper.c │ │ ├── libsvm_template.cpp │ │ ├── svm.cpp │ │ └── svm.h └── tests │ ├── __init__.py │ ├── test_bounds.py │ ├── test_sparse.py │ └── test_svm.py ├── tests ├── __init__.py ├── test_base.py ├── test_build.py ├── test_calibration.py ├── test_check_build.py ├── test_common.py ├── test_config.py ├── test_discriminant_analysis.py ├── test_docstring_parameters.py ├── test_dummy.py ├── test_import_deprecations.py ├── test_init.py ├── test_isotonic.py ├── test_kernel_approximation.py ├── test_kernel_ridge.py ├── test_metaestimators.py ├── test_multiclass.py ├── test_multioutput.py ├── test_naive_bayes.py ├── test_pipeline.py ├── test_random_projection.py └── test_site_joblib.py ├── tree ├── __init__.py ├── _classes.py ├── _criterion.pxd ├── _criterion.pyx ├── _export.py ├── _reingold_tilford.py ├── _splitter.pxd ├── _splitter.pyx ├── _tree.pxd ├── _tree.pyx ├── _utils.pxd ├── _utils.pyx ├── setup.py └── tests │ ├── __init__.py │ ├── test_export.py │ ├── test_reingold_tilford.py │ └── test_tree.py └── utils ├── __init__.py ├── _cython_blas.pxd ├── _cython_blas.pyx ├── _fast_dict.pxd ├── _fast_dict.pyx ├── _joblib.py ├── _logistic_sigmoid.pyx ├── _mask.py ├── _mocking.py ├── _openmp_helpers.pyx ├── _pprint.py ├── _random.pxd ├── _random.pyx ├── _seq_dataset.pxd.tp ├── _seq_dataset.pyx.tp ├── _show_versions.py ├── _testing.py ├── _weight_vector.pxd ├── _weight_vector.pyx ├── arrayfuncs.pyx ├── class_weight.py ├── deprecation.py ├── estimator_checks.py ├── extmath.py ├── fixes.py ├── graph.py ├── graph_shortest_path.pyx ├── linear_assignment_.py ├── metaestimators.py ├── multiclass.py ├── murmurhash.pxd ├── murmurhash.pyx ├── optimize.py ├── random.py ├── setup.py ├── sparsefuncs.py ├── sparsefuncs_fast.pyx ├── src ├── MurmurHash3.cpp └── MurmurHash3.h ├── stats.py ├── tests ├── __init__.py ├── test_class_weight.py ├── test_cython_blas.py ├── test_deprecated_utils.py ├── test_deprecation.py ├── test_estimator_checks.py ├── test_extmath.py ├── test_fast_dict.py ├── test_fixes.py ├── test_linear_assignment.py ├── test_metaestimators.py ├── test_multiclass.py ├── test_murmurhash.py ├── test_optimize.py ├── test_pprint.py ├── test_random.py ├── test_seq_dataset.py ├── test_shortest_path.py ├── test_show_versions.py ├── test_sparsefuncs.py ├── test_testing.py ├── test_utils.py └── test_validation.py └── validation.py /.binder/requirements.txt: -------------------------------------------------------------------------------- 1 | --find-links https://sklearn-nightly.scdn8.secure.raxcdn.com scikit-learn 2 | --pre 3 | matplotlib 4 | scikit-image 5 | pandas 6 | sphinx-gallery 7 | scikit-learn 8 | 9 | -------------------------------------------------------------------------------- /.circleci/artifact_path: -------------------------------------------------------------------------------- 1 | 0/doc/_changed.html 2 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | # Commits pushed to master should not make the overall 8 | # project coverage decrease by more than 1%: 9 | target: auto 10 | threshold: 1% 11 | patch: 12 | default: 13 | # Be tolerant on slight code coverage diff on PRs to limit 14 | # noisy red coverage status on github PRs. 15 | # Note The coverage stats are still uploaded 16 | # to codecov so that PR reviewers can see uncovered lines 17 | # in the github diff if they install the codecov browser 18 | # extension: 19 | # https://github.com/codecov/browser-extension 20 | target: auto 21 | threshold: 1% 22 | 23 | ignore: 24 | - "sklearn/externals" 25 | - "sklearn/_build_utils" 26 | - "**/setup.py" 27 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = sklearn 4 | parallel = True 5 | omit = 6 | */sklearn/externals/* 7 | */sklearn/_build_utils/* 8 | */benchmarks/* 9 | **/setup.py 10 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | /doc/whats_new.rst merge=union 2 | -------------------------------------------------------------------------------- /.landscape.yml: -------------------------------------------------------------------------------- 1 | pylint: 2 | disable: 3 | - unpacking-non-sequence 4 | ignore-paths: 5 | - sklearn/externals 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # make it explicit that we favor the new container-based travis workers 2 | language: python 3 | 4 | cache: 5 | apt: true 6 | directories: 7 | - $HOME/.cache/pip 8 | - $HOME/.ccache 9 | 10 | dist: xenial 11 | 12 | env: 13 | global: 14 | # Directory where tests are run from 15 | - TEST_DIR=/tmp/sklearn 16 | - OMP_NUM_THREADS=4 17 | - OPENBLAS_NUM_THREADS=4 18 | 19 | matrix: 20 | include: 21 | # Linux environment to test scikit-learn against numpy and scipy master 22 | # installed from their CI wheels in a virtualenv with the Python 23 | # interpreter provided by travis. 24 | - python: 3.7 25 | env: CHECK_WARNINGS="true" 26 | if: type = cron OR commit_message =~ /\[scipy-dev\]/ 27 | 28 | install: source build_tools/travis/install.sh 29 | script: 30 | - bash build_tools/travis/test_script.sh 31 | - bash build_tools/travis/test_docs.sh 32 | - bash build_tools/travis/test_pytest_soft_dependency.sh 33 | after_success: source build_tools/travis/after_success.sh 34 | notifications: 35 | webhooks: 36 | urls: 37 | - https://webhooks.gitter.im/e/4ffabb4df010b70cd624 38 | on_success: change # options: [always|never|change] default: always 39 | on_failure: always # options: [always|never|change] default: always 40 | on_start: never # options: [always|never|change] default: always 41 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.rst 2 | recursive-include doc * 3 | recursive-include examples * 4 | recursive-include sklearn *.c *.h *.pyx *.pxd *.pxi *.tp 5 | recursive-include sklearn/datasets *.csv *.csv.gz *.rst *.jpg *.txt *.arff.gz *.json.gz 6 | include COPYING 7 | include README.rst 8 | -------------------------------------------------------------------------------- /PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 5 | 6 | #### Reference Issues/PRs 7 | 13 | 14 | 15 | #### What does this implement/fix? Explain your changes. 16 | 17 | 18 | #### Any other comments? 19 | 20 | 21 | 33 | -------------------------------------------------------------------------------- /benchmarks/.gitignore: -------------------------------------------------------------------------------- 1 | /bhtsne 2 | *.npy 3 | *.json 4 | /mnist_tsne_output/ 5 | -------------------------------------------------------------------------------- /benchmarks/bench_plot_parallel_pairwise.py: -------------------------------------------------------------------------------- 1 | # Author: Mathieu Blondel 2 | # License: BSD 3 clause 3 | import time 4 | 5 | import matplotlib.pyplot as plt 6 | 7 | from sklearn.utils import check_random_state 8 | from sklearn.metrics.pairwise import pairwise_distances 9 | from sklearn.metrics.pairwise import pairwise_kernels 10 | 11 | def plot(func): 12 | random_state = check_random_state(0) 13 | one_core = [] 14 | multi_core = [] 15 | sample_sizes = range(1000, 6000, 1000) 16 | 17 | for n_samples in sample_sizes: 18 | X = random_state.rand(n_samples, 300) 19 | 20 | start = time.time() 21 | func(X, n_jobs=1) 22 | one_core.append(time.time() - start) 23 | 24 | start = time.time() 25 | func(X, n_jobs=-1) 26 | multi_core.append(time.time() - start) 27 | 28 | plt.figure('scikit-learn parallel %s benchmark results' % func.__name__) 29 | plt.plot(sample_sizes, one_core, label="one core") 30 | plt.plot(sample_sizes, multi_core, label="multi core") 31 | plt.xlabel('n_samples') 32 | plt.ylabel('Time (s)') 33 | plt.title('Parallel %s' % func.__name__) 34 | plt.legend() 35 | 36 | 37 | def euclidean_distances(X, n_jobs): 38 | return pairwise_distances(X, metric="euclidean", n_jobs=n_jobs) 39 | 40 | 41 | def rbf_kernels(X, n_jobs): 42 | return pairwise_kernels(X, metric="rbf", n_jobs=n_jobs, gamma=0.1) 43 | 44 | plot(euclidean_distances) 45 | plot(rbf_kernels) 46 | plt.show() 47 | -------------------------------------------------------------------------------- /benchmarks/bench_plot_ward.py: -------------------------------------------------------------------------------- 1 | """ 2 | Benchmark scikit-learn's Ward implement compared to SciPy's 3 | """ 4 | 5 | import time 6 | 7 | import numpy as np 8 | from scipy.cluster import hierarchy 9 | import matplotlib.pyplot as plt 10 | 11 | from sklearn.cluster import AgglomerativeClustering 12 | 13 | ward = AgglomerativeClustering(n_clusters=3, linkage='ward') 14 | 15 | n_samples = np.logspace(.5, 3, 9) 16 | n_features = np.logspace(1, 3.5, 7) 17 | N_samples, N_features = np.meshgrid(n_samples, 18 | n_features) 19 | scikits_time = np.zeros(N_samples.shape) 20 | scipy_time = np.zeros(N_samples.shape) 21 | 22 | for i, n in enumerate(n_samples): 23 | for j, p in enumerate(n_features): 24 | X = np.random.normal(size=(n, p)) 25 | t0 = time.time() 26 | ward.fit(X) 27 | scikits_time[j, i] = time.time() - t0 28 | t0 = time.time() 29 | hierarchy.ward(X) 30 | scipy_time[j, i] = time.time() - t0 31 | 32 | ratio = scikits_time / scipy_time 33 | 34 | plt.figure("scikit-learn Ward's method benchmark results") 35 | plt.imshow(np.log(ratio), aspect='auto', origin="lower") 36 | plt.colorbar() 37 | plt.contour(ratio, levels=[1, ], colors='k') 38 | plt.yticks(range(len(n_features)), n_features.astype(np.int)) 39 | plt.ylabel('N features') 40 | plt.xticks(range(len(n_samples)), n_samples.astype(np.int)) 41 | plt.xlabel('N samples') 42 | plt.title("Scikit's time, in units of scipy time (log)") 43 | plt.show() 44 | -------------------------------------------------------------------------------- /benchmarks/plot_tsne_mnist.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import os.path as op 4 | 5 | import argparse 6 | 7 | 8 | LOG_DIR = "mnist_tsne_output" 9 | 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser('Plot benchmark results for t-SNE') 13 | parser.add_argument( 14 | '--labels', type=str, 15 | default=op.join(LOG_DIR, 'mnist_original_labels_10000.npy'), 16 | help='1D integer numpy array for labels') 17 | parser.add_argument( 18 | '--embedding', type=str, 19 | default=op.join(LOG_DIR, 'mnist_sklearn_TSNE_10000.npy'), 20 | help='2D float numpy array for embedded data') 21 | args = parser.parse_args() 22 | 23 | X = np.load(args.embedding) 24 | y = np.load(args.labels) 25 | 26 | for i in np.unique(y): 27 | mask = y == i 28 | plt.scatter(X[mask, 0], X[mask, 1], alpha=0.2, label=int(i)) 29 | plt.legend(loc='best') 30 | plt.show() 31 | -------------------------------------------------------------------------------- /build_tools/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for maintenance tools 2 | 3 | authors: 4 | python generate_authors_table.py 5 | -------------------------------------------------------------------------------- /build_tools/azure/install.cmd: -------------------------------------------------------------------------------- 1 | @rem https://github.com/numba/numba/blob/master/buildscripts/incremental/setup_conda_environment.cmd 2 | @rem The cmd /C hack circumvents a regression where conda installs a conda.bat 3 | @rem script in non-root environments. 4 | set CONDA_INSTALL=cmd /C conda install -q -y 5 | set PIP_INSTALL=pip install -q 6 | 7 | @echo on 8 | 9 | IF "%PYTHON_ARCH%"=="64" ( 10 | @rem Deactivate any environment 11 | call deactivate 12 | @rem Clean up any left-over from a previous build 13 | conda remove --all -q -y -n %VIRTUALENV% 14 | conda create -n %VIRTUALENV% -q -y python=%PYTHON_VERSION% numpy scipy cython matplotlib wheel pillow joblib 15 | 16 | call activate %VIRTUALENV% 17 | 18 | IF "%PYTEST_VERSION%"=="*" ( 19 | pip install pytest 20 | ) else ( 21 | pip install pytest==%PYTEST_VERSION% 22 | ) 23 | pip install pytest-xdist 24 | ) else ( 25 | pip install numpy scipy cython pytest wheel pillow joblib 26 | ) 27 | if "%COVERAGE%" == "true" ( 28 | pip install coverage codecov pytest-cov 29 | ) 30 | python --version 31 | pip --version 32 | 33 | @rem Install the build and runtime dependencies of the project. 34 | python setup.py bdist_wheel bdist_wininst -b doc\logos\scikit-learn-logo.bmp 35 | 36 | @rem Install the generated wheel package to test it 37 | pip install --pre --no-index --find-links dist\ scikit-learn 38 | 39 | if %errorlevel% neq 0 exit /b %errorlevel% 40 | -------------------------------------------------------------------------------- /build_tools/azure/test_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [[ "$DISTRIB" =~ ^conda.* ]]; then 6 | source activate $VIRTUALENV 7 | elif [[ "$DISTRIB" == "ubuntu" ]]; then 8 | source $VIRTUALENV/bin/activate 9 | fi 10 | 11 | make test-doc 12 | -------------------------------------------------------------------------------- /build_tools/azure/test_pytest_soft_dependency.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # called when DISTRIB=="conda" 6 | source activate $VIRTUALENV 7 | conda remove -y py pytest || pip uninstall -y py pytest 8 | 9 | if [[ "$COVERAGE" == "true" ]]; then 10 | # conda may remove coverage when uninstall pytest and py 11 | pip install coverage 12 | # Need to append the coverage to the existing .coverage generated by 13 | # running the tests. Make sure to reuse the same coverage 14 | # configuration as the one used by the main pytest run to be 15 | # able to combine the results. 16 | CMD="coverage run --rcfile=$BUILD_SOURCESDIRECTORY/.coveragerc" 17 | else 18 | CMD="python" 19 | fi 20 | 21 | # .coverage from running the tests is in TEST_DIR 22 | pushd $TEST_DIR 23 | $CMD -m sklearn.utils.tests.test_estimator_checks 24 | popd 25 | -------------------------------------------------------------------------------- /build_tools/azure/test_script.cmd: -------------------------------------------------------------------------------- 1 | @echo on 2 | 3 | @rem Only 64 bit uses conda and uses a python newer than 3.5 4 | IF "%PYTHON_ARCH%"=="64" ( 5 | call activate %VIRTUALENV% 6 | set PYTEST_ARGS=%PYTEST_ARGS% -n2 7 | ) 8 | 9 | mkdir %TMP_FOLDER% 10 | cd %TMP_FOLDER% 11 | 12 | if "%CHECK_WARNINGS%" == "true" ( 13 | set PYTEST_ARGS=%PYTEST_ARGS% -Werror::DeprecationWarning -Werror::FutureWarning 14 | ) 15 | 16 | if "%COVERAGE%" == "true" ( 17 | set PYTEST_ARGS=%PYTEST_ARGS% --cov sklearn 18 | ) 19 | 20 | pytest --junitxml=%JUNITXML% --showlocals --durations=20 %PYTEST_ARGS% --pyargs sklearn 21 | -------------------------------------------------------------------------------- /build_tools/azure/test_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [[ "$DISTRIB" =~ ^conda.* ]]; then 6 | source activate $VIRTUALENV 7 | elif [[ "$DISTRIB" == "ubuntu" ]] || [[ "$DISTRIB" == "ubuntu-32" ]]; then 8 | source $VIRTUALENV/bin/activate 9 | fi 10 | 11 | python --version 12 | python -c "import numpy; print('numpy %s' % numpy.__version__)" 13 | python -c "import scipy; print('scipy %s' % scipy.__version__)" 14 | python -c "\ 15 | try: 16 | import pandas 17 | print('pandas %s' % pandas.__version__) 18 | except ImportError: 19 | print('pandas not installed') 20 | " 21 | python -c "import multiprocessing as mp; print('%d CPUs' % mp.cpu_count())" 22 | pip list 23 | 24 | TEST_CMD="python -m pytest --showlocals --durations=20 --junitxml=$JUNITXML" 25 | 26 | if [[ "$COVERAGE" == "true" ]]; then 27 | export COVERAGE_PROCESS_START="$BUILD_SOURCESDIRECTORY/.coveragerc" 28 | TEST_CMD="$TEST_CMD --cov-config=$COVERAGE_PROCESS_START --cov sklearn" 29 | fi 30 | 31 | if [[ -n "$CHECK_WARNINGS" ]]; then 32 | TEST_CMD="$TEST_CMD -Werror::DeprecationWarning -Werror::FutureWarning" 33 | fi 34 | 35 | if [[ "$PYTHON_VERSION" == "*" ]]; then 36 | TEST_CMD="$TEST_CMD -n2" 37 | fi 38 | 39 | mkdir -p $TEST_DIR 40 | cp setup.cfg $TEST_DIR 41 | cd $TEST_DIR 42 | 43 | set -x 44 | $TEST_CMD --pyargs sklearn 45 | set +x 46 | -------------------------------------------------------------------------------- /build_tools/azure/upload_codecov.cmd: -------------------------------------------------------------------------------- 1 | @echo on 2 | 3 | @rem Only 64 bit uses conda 4 | IF "%PYTHON_ARCH%"=="64" ( 5 | call activate %VIRTUALENV% 6 | ) 7 | 8 | copy %TMP_FOLDER%\.coverage %BUILD_REPOSITORY_LOCALPATH% 9 | 10 | codecov --root %BUILD_REPOSITORY_LOCALPATH% -t %CODECOV_TOKEN% 11 | -------------------------------------------------------------------------------- /build_tools/azure/upload_codecov.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # called when COVERAGE=="true" and DISTRIB=="conda" 6 | export PATH=$HOME/miniconda3/bin:$PATH 7 | source activate $VIRTUALENV 8 | 9 | # Need to run codecov from a git checkout, so we copy .coverage 10 | # from TEST_DIR where pytest has been run 11 | pushd $TEST_DIR 12 | coverage combine --append 13 | popd 14 | cp $TEST_DIR/.coverage $BUILD_REPOSITORY_LOCALPATH 15 | 16 | codecov --root $BUILD_REPOSITORY_LOCALPATH -t $CODECOV_TOKEN || echo "codecov upload failed" 17 | -------------------------------------------------------------------------------- /build_tools/circle/build_test_pypy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | set -e 4 | 5 | apt-get -yq update 6 | apt-get -yq install libatlas-dev libatlas-base-dev liblapack-dev gfortran ccache libopenblas-dev 7 | 8 | pip install virtualenv 9 | 10 | if command -v pypy3; then 11 | virtualenv -p $(command -v pypy3) pypy-env 12 | elif command -v pypy; then 13 | virtualenv -p $(command -v pypy) pypy-env 14 | fi 15 | 16 | source pypy-env/bin/activate 17 | 18 | python --version 19 | which python 20 | 21 | # XXX: numpy version pinning can be reverted once PyPy 22 | # compatibility is resolved for numpy v1.6.x. For instance, 23 | # when PyPy3 >6.0 is released (see numpy/numpy#12740) 24 | pip install --extra-index https://antocuni.github.io/pypy-wheels/ubuntu numpy Cython pytest 25 | pip install scipy sphinx numpydoc docutils joblib pillow 26 | 27 | ccache -M 512M 28 | export CCACHE_COMPRESS=1 29 | export PATH=/usr/lib/ccache:$PATH 30 | export LOKY_MAX_CPU_COUNT="2" 31 | export OMP_NUM_THREADS="1" 32 | 33 | python setup.py build_ext --inplace -j 3 34 | pip install -e . 35 | 36 | # Check that Python implementation is PyPy 37 | python - << EOL 38 | import platform 39 | from sklearn.utils import IS_PYPY 40 | assert IS_PYPY is True, "platform={}!=PyPy".format(platform.python_implementation()) 41 | EOL 42 | 43 | python -m pytest sklearn/ 44 | python -m pytest doc/sphinxext/ 45 | python -m pytest $(find doc -name '*.rst' | sort) 46 | -------------------------------------------------------------------------------- /build_tools/circle/checkout_merge_commit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | # Add `master` branch to the update list. 5 | # Otherwise CircleCI will give us a cached one. 6 | FETCH_REFS="+master:master" 7 | 8 | # Update PR refs for testing. 9 | if [[ -n "${CIRCLE_PR_NUMBER}" ]] 10 | then 11 | FETCH_REFS="${FETCH_REFS} +refs/pull/${CIRCLE_PR_NUMBER}/head:pr/${CIRCLE_PR_NUMBER}/head" 12 | FETCH_REFS="${FETCH_REFS} +refs/pull/${CIRCLE_PR_NUMBER}/merge:pr/${CIRCLE_PR_NUMBER}/merge" 13 | fi 14 | 15 | # Retrieve the refs. 16 | git fetch -u origin ${FETCH_REFS} 17 | 18 | # Checkout the PR merge ref. 19 | if [[ -n "${CIRCLE_PR_NUMBER}" ]] 20 | then 21 | git checkout -qf "pr/${CIRCLE_PR_NUMBER}/merge" || ( 22 | echo Could not fetch merge commit. >&2 23 | echo There may be conflicts in merging PR \#${CIRCLE_PR_NUMBER} with master. >&2; 24 | exit 1) 25 | fi 26 | 27 | # Check for merge conflicts. 28 | if [[ -n "${CIRCLE_PR_NUMBER}" ]] 29 | then 30 | git branch --merged | grep master > /dev/null 31 | git branch --merged | grep "pr/${CIRCLE_PR_NUMBER}/head" > /dev/null 32 | fi 33 | -------------------------------------------------------------------------------- /build_tools/travis/after_success.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script is meant to be called by the "after_success" step defined in 3 | # .travis.yml. See https://docs.travis-ci.com/ for more details. 4 | 5 | # License: 3-clause BSD 6 | 7 | set -e 8 | 9 | if [[ "$COVERAGE" == "true" ]]; then 10 | # Need to run codecov from a git checkout, so we copy .coverage 11 | # from TEST_DIR where pytest has been run 12 | cp $TEST_DIR/.coverage $TRAVIS_BUILD_DIR 13 | 14 | # Ignore codecov failures as the codecov server is not 15 | # very reliable but we don't want travis to report a failure 16 | # in the github UI just because the coverage report failed to 17 | # be published. 18 | codecov --root $TRAVIS_BUILD_DIR || echo "codecov upload failed" 19 | fi 20 | -------------------------------------------------------------------------------- /build_tools/travis/test_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | make test-doc 7 | -------------------------------------------------------------------------------- /build_tools/travis/test_pytest_soft_dependency.sh: -------------------------------------------------------------------------------- 1 | ##!/bin/bash 2 | 3 | set -e 4 | 5 | if [[ "$CHECK_PYTEST_SOFT_DEPENDENCY" == "true" ]]; then 6 | conda remove -y py pytest || pip uninstall -y py pytest 7 | if [[ "$COVERAGE" == "true" ]]; then 8 | # Need to append the coverage to the existing .coverage generated by 9 | # running the tests 10 | CMD="coverage run --append" 11 | else 12 | CMD="python" 13 | fi 14 | # .coverage from running the tests is in TEST_DIR 15 | cd $TEST_DIR 16 | $CMD -m sklearn.utils.tests.test_estimator_checks 17 | cd $OLDPWD 18 | fi 19 | -------------------------------------------------------------------------------- /build_tools/travis/travis_fastfail.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # This file is a part of Julia. License is MIT: https://julialang.org/license 3 | 4 | curlhdr="Accept: application/vnd.travis-ci.2+json" 5 | endpoint="https://api.travis-ci.org/repos/$TRAVIS_REPO_SLUG" 6 | 7 | # Fail fast for superseded builds to PR's 8 | if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then 9 | newestbuildforthisPR=$(curl -H "$curlhdr" $endpoint/builds?event_type=pull_request | \ 10 | jq ".builds | map(select(.pull_request_number == $TRAVIS_PULL_REQUEST))[0].number") 11 | if [ $newestbuildforthisPR != null -a $newestbuildforthisPR != \"$TRAVIS_BUILD_NUMBER\" ]; then 12 | echo "There are newer queued builds for this pull request, failing early." 13 | exit 1 14 | fi 15 | else 16 | # And for non-latest push builds in branches other than master or release* 17 | case $TRAVIS_BRANCH in 18 | master | release*) 19 | ;; 20 | *) 21 | if [ \"$TRAVIS_BUILD_NUMBER\" != $(curl -H "$curlhdr" \ 22 | $endpoint/branches/$TRAVIS_BRANCH | jq ".branch.number") ]; then 23 | echo "There are newer queued builds for this branch, failing early." 24 | exit 1 25 | fi 26 | ;; 27 | esac 28 | fi 29 | -------------------------------------------------------------------------------- /doc/README.md: -------------------------------------------------------------------------------- 1 | # Documentation for scikit-learn 2 | 3 | This directory contains the full manual and web site as displayed at 4 | http://scikit-learn.org. See 5 | http://scikit-learn.org/dev/developers/contributing.html#documentation for 6 | detailed information about the documentation. 7 | -------------------------------------------------------------------------------- /doc/authors_emeritus.rst: -------------------------------------------------------------------------------- 1 | - Mathieu Blondel 2 | - Matthieu Brucher 3 | - Lars Buitinck 4 | - David Cournapeau 5 | - Noel Dawe 6 | - Shiqiao Du 7 | - Vincent Dubourg 8 | - Edouard Duchesnay 9 | - Alexander Fabisch 10 | - Virgile Fritsch 11 | - Satrajit Ghosh 12 | - Angel Soler Gollonet 13 | - Chris Gorgolewski 14 | - Jaques Grobler 15 | - Brian Holt 16 | - Arnaud Joly 17 | - Thouis (Ray) Jones 18 | - Kyle Kastner 19 | - manoj kumar 20 | - Robert Layton 21 | - Wei Li 22 | - Paolo Losi 23 | - Gilles Louppe 24 | - Vincent Michel 25 | - Jarrod Millman 26 | - Alexandre Passos 27 | - Fabian Pedregosa 28 | - Peter Prettenhofer 29 | - (Venkat) Raghav, Rajagopalan 30 | - Jacob Schreiber 31 | - Jake Vanderplas 32 | - David Warde-Farley 33 | - Ron Weiss -------------------------------------------------------------------------------- /doc/binder/requirements.txt: -------------------------------------------------------------------------------- 1 | # A binder requirement file is required by sphinx-gallery. We don't really need 2 | # one since the binder requirement files live in the 3 | # scikit-learn/binder-examples repo and not in the scikit-learn.github.io repo 4 | # that comes from the scikit-learn doc build. This file can be removed if 5 | # 'dependencies' is made an optional key for binder in sphinx-gallery. 6 | -------------------------------------------------------------------------------- /doc/contents.rst: -------------------------------------------------------------------------------- 1 | .. include:: includes/big_toc_css.rst 2 | .. include:: tune_toc.rst 3 | 4 | .. Places global toc into the sidebar 5 | 6 | :globalsidebartoc: True 7 | 8 | ================= 9 | Table Of Contents 10 | ================= 11 | 12 | .. Define an order for the Table of Contents: 13 | 14 | .. toctree:: 15 | :maxdepth: 2 16 | 17 | preface 18 | tutorial/index 19 | getting_started 20 | user_guide 21 | glossary 22 | auto_examples/index 23 | modules/classes 24 | developers/index 25 | -------------------------------------------------------------------------------- /doc/data_transforms.rst: -------------------------------------------------------------------------------- 1 | .. include:: includes/big_toc_css.rst 2 | 3 | .. _data-transforms: 4 | 5 | Dataset transformations 6 | ----------------------- 7 | 8 | scikit-learn provides a library of transformers, which may clean (see 9 | :ref:`preprocessing`), reduce (see :ref:`data_reduction`), expand (see 10 | :ref:`kernel_approximation`) or generate (see :ref:`feature_extraction`) 11 | feature representations. 12 | 13 | Like other estimators, these are represented by classes with a ``fit`` method, 14 | which learns model parameters (e.g. mean and standard deviation for 15 | normalization) from a training set, and a ``transform`` method which applies 16 | this transformation model to unseen data. ``fit_transform`` may be more 17 | convenient and efficient for modelling and transforming the training data 18 | simultaneously. 19 | 20 | Combining such transformers, either in parallel or series is covered in 21 | :ref:`combining_estimators`. :ref:`metrics` covers transforming feature 22 | spaces into affinity matrices, while :ref:`preprocessing_targets` considers 23 | transformations of the target space (e.g. categorical labels) for use in 24 | scikit-learn. 25 | 26 | .. toctree:: 27 | :maxdepth: 2 28 | 29 | modules/compose 30 | modules/feature_extraction 31 | modules/preprocessing 32 | modules/impute 33 | modules/unsupervised_reduction 34 | modules/random_projection 35 | modules/kernel_approximation 36 | modules/metrics 37 | modules/preprocessing_targets 38 | -------------------------------------------------------------------------------- /doc/developers/index.rst: -------------------------------------------------------------------------------- 1 | .. Places global toc into the sidebar 2 | 3 | :globalsidebartoc: True 4 | 5 | .. _developers_guide: 6 | 7 | ================= 8 | Developer's Guide 9 | ================= 10 | 11 | .. include:: ../includes/big_toc_css.rst 12 | .. include:: ../tune_toc.rst 13 | 14 | .. toctree:: 15 | 16 | contributing 17 | develop 18 | tips 19 | utilities 20 | performance 21 | advanced_installation 22 | maintainer 23 | plotting 24 | -------------------------------------------------------------------------------- /doc/images/anaconda-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/anaconda-small.png -------------------------------------------------------------------------------- /doc/images/anaconda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/anaconda.png -------------------------------------------------------------------------------- /doc/images/axa-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/axa-small.png -------------------------------------------------------------------------------- /doc/images/axa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/axa.png -------------------------------------------------------------------------------- /doc/images/bcg-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/bcg-small.png -------------------------------------------------------------------------------- /doc/images/bcg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/bcg.png -------------------------------------------------------------------------------- /doc/images/bnp-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/bnp-small.png -------------------------------------------------------------------------------- /doc/images/bnp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/bnp.png -------------------------------------------------------------------------------- /doc/images/cds-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/cds-logo.png -------------------------------------------------------------------------------- /doc/images/columbia-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/columbia-small.png -------------------------------------------------------------------------------- /doc/images/dataiku-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/dataiku-small.png -------------------------------------------------------------------------------- /doc/images/dataiku.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/dataiku.png -------------------------------------------------------------------------------- /doc/images/dysco.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/dysco.png -------------------------------------------------------------------------------- /doc/images/fnrs-logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/fnrs-logo-small.png -------------------------------------------------------------------------------- /doc/images/fujitsu-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/fujitsu-small.png -------------------------------------------------------------------------------- /doc/images/fujitsu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/fujitsu.png -------------------------------------------------------------------------------- /doc/images/google-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/google-small.png -------------------------------------------------------------------------------- /doc/images/grid_search_cross_validation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/grid_search_cross_validation.png -------------------------------------------------------------------------------- /doc/images/grid_search_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/grid_search_workflow.png -------------------------------------------------------------------------------- /doc/images/inria-logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/inria-logo.jpg -------------------------------------------------------------------------------- /doc/images/inria-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/inria-small.png -------------------------------------------------------------------------------- /doc/images/intel-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/intel-small.png -------------------------------------------------------------------------------- /doc/images/intel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/intel.png -------------------------------------------------------------------------------- /doc/images/iris.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/iris.pdf -------------------------------------------------------------------------------- /doc/images/last_digit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/last_digit.png -------------------------------------------------------------------------------- /doc/images/lda_model_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/lda_model_graph.png -------------------------------------------------------------------------------- /doc/images/microsoft-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/microsoft-small.png -------------------------------------------------------------------------------- /doc/images/microsoft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/microsoft.png -------------------------------------------------------------------------------- /doc/images/ml_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/ml_map.png -------------------------------------------------------------------------------- /doc/images/multilayerperceptron_network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/multilayerperceptron_network.png -------------------------------------------------------------------------------- /doc/images/no_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/no_image.png -------------------------------------------------------------------------------- /doc/images/nvidia-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/nvidia-small.png -------------------------------------------------------------------------------- /doc/images/nvidia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/nvidia.png -------------------------------------------------------------------------------- /doc/images/nyu_short_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/nyu_short_color.png -------------------------------------------------------------------------------- /doc/images/plot_digits_classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/plot_digits_classification.png -------------------------------------------------------------------------------- /doc/images/plot_face_recognition_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/plot_face_recognition_1.png -------------------------------------------------------------------------------- /doc/images/plot_face_recognition_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/plot_face_recognition_2.png -------------------------------------------------------------------------------- /doc/images/png-logo-inria-la-fondation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/png-logo-inria-la-fondation.png -------------------------------------------------------------------------------- /doc/images/rbm_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/rbm_graph.png -------------------------------------------------------------------------------- /doc/images/scikit-learn-logo-notext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/scikit-learn-logo-notext.png -------------------------------------------------------------------------------- /doc/images/scikit-learn-logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/scikit-learn-logo-small.png -------------------------------------------------------------------------------- /doc/images/sloan_banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/sloan_banner.png -------------------------------------------------------------------------------- /doc/images/sloan_logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/sloan_logo-small.png -------------------------------------------------------------------------------- /doc/images/sydney-stacked-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/sydney-stacked-small.png -------------------------------------------------------------------------------- /doc/images/telecom-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/images/telecom-small.png -------------------------------------------------------------------------------- /doc/includes/big_toc_css.rst: -------------------------------------------------------------------------------- 1 | .. 2 | File to ..include in a document with a big table of content, to give 3 | it 'style' 4 | 5 | .. raw:: html 6 | 7 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /doc/includes/bigger_toc_css.rst: -------------------------------------------------------------------------------- 1 | .. 2 | File to ..include in a document with a very big table of content, to 3 | give it 'style' 4 | 5 | .. raw:: html 6 | 7 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /doc/inspection.rst: -------------------------------------------------------------------------------- 1 | .. include:: includes/big_toc_css.rst 2 | 3 | .. _inspection: 4 | 5 | Inspection 6 | ---------- 7 | 8 | Predictive performance is often the main goal of developing machine learning 9 | models. Yet summarising performance with an evaluation metric is often 10 | insufficient: it assumes that the evaluation metric and test dataset 11 | perfectly reflect the target domain, which is rarely true. In certain domains, 12 | a model needs a certain level of interpretability before it can be deployed. 13 | A model that is exhibiting performance issues needs to be debugged for one to 14 | understand the model's underlying issue. The 15 | :mod:`sklearn.inspection` module provides tools to help understand the 16 | predictions from a model and what affects them. This can be used to 17 | evaluate assumptions and biases of a model, design a better model, or 18 | to diagnose issues with model performance. 19 | 20 | .. toctree:: 21 | 22 | modules/partial_dependence 23 | modules/permutation_importance 24 | -------------------------------------------------------------------------------- /doc/logos/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/logos/favicon.ico -------------------------------------------------------------------------------- /doc/logos/identity.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/logos/identity.pdf -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo-notext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/logos/scikit-learn-logo-notext.png -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/logos/scikit-learn-logo-small.png -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo-thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/logos/scikit-learn-logo-thumb.png -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/logos/scikit-learn-logo.bmp -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/logos/scikit-learn-logo.png -------------------------------------------------------------------------------- /doc/model_selection.rst: -------------------------------------------------------------------------------- 1 | .. include:: includes/big_toc_css.rst 2 | 3 | .. _model_selection: 4 | 5 | Model selection and evaluation 6 | ------------------------------ 7 | 8 | .. toctree:: 9 | :maxdepth: 2 10 | 11 | modules/cross_validation 12 | modules/grid_search 13 | modules/model_evaluation 14 | modules/model_persistence 15 | modules/learning_curve 16 | -------------------------------------------------------------------------------- /doc/modules/glm_data/lasso_enet_coordinate_descent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/modules/glm_data/lasso_enet_coordinate_descent.png -------------------------------------------------------------------------------- /doc/modules/isotonic.rst: -------------------------------------------------------------------------------- 1 | .. _isotonic: 2 | 3 | =================== 4 | Isotonic regression 5 | =================== 6 | 7 | .. currentmodule:: sklearn.isotonic 8 | 9 | The class :class:`IsotonicRegression` fits a non-decreasing function to data. 10 | It solves the following problem: 11 | 12 | minimize :math:`\sum_i w_i (y_i - \hat{y}_i)^2` 13 | 14 | subject to :math:`\hat{y}_{min} = \hat{y}_1 \le \hat{y}_2 ... \le \hat{y}_n = \hat{y}_{max}` 15 | 16 | where each :math:`w_i` is strictly positive and each :math:`y_i` is an 17 | arbitrary real number. It yields the vector which is composed of non-decreasing 18 | elements the closest in terms of mean squared error. In practice this list 19 | of elements forms a function that is piecewise linear. 20 | 21 | .. figure:: ../auto_examples/images/sphx_glr_plot_isotonic_regression_001.png 22 | :target: ../auto_examples/plot_isotonic_regression.html 23 | :align: center 24 | -------------------------------------------------------------------------------- /doc/modules/pipeline.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. raw:: html 4 | 5 | 6 | 9 | 10 | This content is now at :ref:`combining_estimators`. 11 | -------------------------------------------------------------------------------- /doc/preface.rst: -------------------------------------------------------------------------------- 1 | .. This helps define the TOC ordering for "about us" sections. Particularly 2 | useful for PDF output as this section is not linked from elsewhere. 3 | 4 | .. Places global toc into the sidebar 5 | 6 | :globalsidebartoc: True 7 | 8 | .. _preface_menu: 9 | 10 | .. include:: includes/big_toc_css.rst 11 | .. include:: tune_toc.rst 12 | 13 | ======================= 14 | Welcome to scikit-learn 15 | ======================= 16 | 17 | | 18 | 19 | .. toctree:: 20 | :maxdepth: 2 21 | 22 | install 23 | faq 24 | support 25 | related_projects 26 | about 27 | testimonials/testimonials 28 | whats_new 29 | roadmap 30 | governance 31 | 32 | | 33 | -------------------------------------------------------------------------------- /doc/sphinxext/MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include tests *.py 2 | include *.txt 3 | -------------------------------------------------------------------------------- /doc/supervised_learning.rst: -------------------------------------------------------------------------------- 1 | .. include:: includes/big_toc_css.rst 2 | 3 | .. _supervised-learning: 4 | 5 | Supervised learning 6 | ----------------------- 7 | 8 | .. toctree:: 9 | :maxdepth: 2 10 | 11 | modules/linear_model 12 | modules/lda_qda.rst 13 | modules/kernel_ridge.rst 14 | modules/svm 15 | modules/sgd 16 | modules/neighbors 17 | modules/gaussian_process 18 | modules/cross_decomposition.rst 19 | modules/naive_bayes 20 | modules/tree 21 | modules/ensemble 22 | modules/multiclass 23 | modules/feature_selection.rst 24 | modules/label_propagation.rst 25 | modules/isotonic.rst 26 | modules/calibration.rst 27 | modules/neural_networks_supervised 28 | -------------------------------------------------------------------------------- /doc/templates/class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __init__ 10 | {% endblock %} 11 | 12 | .. include:: {{module}}.{{objname}}.examples 13 | 14 | .. raw:: html 15 | 16 |
17 | -------------------------------------------------------------------------------- /doc/templates/class_with_call.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}=============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __init__ 10 | .. automethod:: __call__ 11 | {% endblock %} 12 | 13 | .. include:: {{module}}.{{objname}}.examples 14 | 15 | .. raw:: html 16 | 17 |
18 | -------------------------------------------------------------------------------- /doc/templates/class_without_init.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /doc/templates/deprecated_class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autoclass:: {{ objname }} 14 | 15 | {% block methods %} 16 | .. automethod:: __init__ 17 | {% endblock %} 18 | 19 | .. include:: {{module}}.{{objname}}.examples 20 | 21 | .. raw:: html 22 | 23 |
24 | -------------------------------------------------------------------------------- /doc/templates/deprecated_class_with_call.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}=============== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autoclass:: {{ objname }} 14 | 15 | {% block methods %} 16 | .. automethod:: __init__ 17 | .. automethod:: __call__ 18 | {% endblock %} 19 | 20 | .. include:: {{module}}.{{objname}}.examples 21 | 22 | .. raw:: html 23 | 24 |
25 | -------------------------------------------------------------------------------- /doc/templates/deprecated_class_without_init.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autoclass:: {{ objname }} 14 | 15 | .. include:: {{module}}.{{objname}}.examples 16 | 17 | .. raw:: html 18 | 19 |
20 | -------------------------------------------------------------------------------- /doc/templates/deprecated_function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autofunction:: {{ objname }} 14 | 15 | .. include:: {{module}}.{{objname}}.examples 16 | 17 | .. raw:: html 18 | 19 |
20 | -------------------------------------------------------------------------------- /doc/templates/documentation.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | scikit-learn: machine learning in Python 10 | 11 | 12 |

You will be automatically redirected to the main page.

13 | 14 | 15 | -------------------------------------------------------------------------------- /doc/templates/function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /doc/templates/generate_deprecated.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for f in [^d]*; do (head -n2 < $f; echo ' 3 | .. meta:: 4 | :robots: noindex 5 | 6 | .. warning:: 7 | **DEPRECATED** 8 | '; tail -n+3 $f) > deprecated_$f; done 9 | -------------------------------------------------------------------------------- /doc/templates/numpydoc_docstring.rst: -------------------------------------------------------------------------------- 1 | {{index}} 2 | {{summary}} 3 | {{extended_summary}} 4 | {{parameters}} 5 | {{returns}} 6 | {{yields}} 7 | {{other_parameters}} 8 | {{attributes}} 9 | {{raises}} 10 | {{warns}} 11 | {{warnings}} 12 | {{see_also}} 13 | {{notes}} 14 | {{references}} 15 | {{examples}} 16 | {{methods}} 17 | -------------------------------------------------------------------------------- /doc/testimonials/README.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | To find the list of people we contacted, see: 4 | https://docs.google.com/spreadsheet/ccc?key=0AhGnAxuBDhjmdDYwNzlZVE5SMkFsMjNBbGlaWkpNZ1E&usp=sharing 5 | 6 | To obtain access to this file, send an email to: 7 | nelle dot varoquaux at gmail dot com 8 | 9 | -------------------------------------------------------------------------------- /doc/testimonials/images/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/Makefile -------------------------------------------------------------------------------- /doc/testimonials/images/aweber.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/aweber.png -------------------------------------------------------------------------------- /doc/testimonials/images/bestofmedia-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/bestofmedia-logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/betaworks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/betaworks.png -------------------------------------------------------------------------------- /doc/testimonials/images/birchbox.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/birchbox.jpg -------------------------------------------------------------------------------- /doc/testimonials/images/booking.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/booking.png -------------------------------------------------------------------------------- /doc/testimonials/images/change-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/change-logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/dataiku_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/dataiku_logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/datapublica.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/datapublica.png -------------------------------------------------------------------------------- /doc/testimonials/images/datarobot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/datarobot.png -------------------------------------------------------------------------------- /doc/testimonials/images/evernote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/evernote.png -------------------------------------------------------------------------------- /doc/testimonials/images/howaboutwe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/howaboutwe.png -------------------------------------------------------------------------------- /doc/testimonials/images/huggingface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/huggingface.png -------------------------------------------------------------------------------- /doc/testimonials/images/infonea.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/infonea.jpg -------------------------------------------------------------------------------- /doc/testimonials/images/inria.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/inria.png -------------------------------------------------------------------------------- /doc/testimonials/images/jpmorgan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/jpmorgan.png -------------------------------------------------------------------------------- /doc/testimonials/images/lovely.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/lovely.png -------------------------------------------------------------------------------- /doc/testimonials/images/machinalis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/machinalis.png -------------------------------------------------------------------------------- /doc/testimonials/images/mars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/mars.png -------------------------------------------------------------------------------- /doc/testimonials/images/okcupid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/okcupid.png -------------------------------------------------------------------------------- /doc/testimonials/images/ottogroup_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/ottogroup_logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/peerindex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/peerindex.png -------------------------------------------------------------------------------- /doc/testimonials/images/phimeca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/phimeca.png -------------------------------------------------------------------------------- /doc/testimonials/images/rangespan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/rangespan.png -------------------------------------------------------------------------------- /doc/testimonials/images/solido_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/solido_logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/spotify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/spotify.png -------------------------------------------------------------------------------- /doc/testimonials/images/telecomparistech.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/telecomparistech.jpg -------------------------------------------------------------------------------- /doc/testimonials/images/yhat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/yhat.png -------------------------------------------------------------------------------- /doc/testimonials/images/zopa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/testimonials/images/zopa.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn-modern/search.html: -------------------------------------------------------------------------------- 1 | {%- extends "basic/search.html" %} 2 | {% block extrahead %} 3 | 4 | 5 | 6 | 7 | 8 | {% endblock %} 9 | -------------------------------------------------------------------------------- /doc/themes/scikit-learn-modern/theme.conf: -------------------------------------------------------------------------------- 1 | [theme] 2 | inherit = basic 3 | pygments_style = default 4 | stylesheet = css/theme.css 5 | 6 | [options] 7 | google_analytics = true 8 | mathjax_path = 9 | -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/css/examples.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/css/examples.css -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/FNRS-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/FNRS-logo.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/columbia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/columbia.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/digicosme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/digicosme.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/forkme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/forkme.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/glyphicons-halflings-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/glyphicons-halflings-white.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/glyphicons-halflings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/glyphicons-halflings.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/google.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/google.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/inria-small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/inria-small.jpg -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/inria-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/inria-small.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/nyu_short_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/nyu_short_color.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/plot_classifier_comparison_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/plot_classifier_comparison_1.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/plot_manifold_sphere_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/plot_manifold_sphere_1.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/scikit-learn-logo-notext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/scikit-learn-logo-notext.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/scikit-learn-logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/scikit-learn-logo-small.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/scikit-learn-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/scikit-learn-logo.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/sloan_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/sloan_logo.jpg -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/sydney-primary.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/sydney-primary.jpeg -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/sydney-stacked.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/sydney-stacked.jpeg -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/telecom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/doc/themes/scikit-learn/static/img/telecom.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/js/extra.js: -------------------------------------------------------------------------------- 1 | // Miscellaneous enhancements to doc display 2 | 3 | 4 | $(document).ready(function() { 5 | /*** Add permalink buttons next to glossary terms ***/ 6 | 7 | $('dl.glossary > dt[id]').append(function() { 8 | return (''); 11 | }) 12 | }); 13 | -------------------------------------------------------------------------------- /doc/themes/scikit-learn/theme.conf: -------------------------------------------------------------------------------- 1 | [theme] 2 | inherit = basic 3 | stylesheet = nature.css 4 | pygments_style = tango 5 | 6 | [options] 7 | oldversion = False 8 | collapsiblesidebar = True 9 | google_analytics = True 10 | surveybanner = False 11 | sprintbanner = True 12 | -------------------------------------------------------------------------------- /doc/tutorial/common_includes/info.txt: -------------------------------------------------------------------------------- 1 | Meant to share common RST file snippets that we want to reuse by inclusion 2 | in the real tutorial in order to lower the maintenance burden 3 | of redundant sections. 4 | -------------------------------------------------------------------------------- /doc/tutorial/index.rst: -------------------------------------------------------------------------------- 1 | .. Places global toc into the sidebar 2 | 3 | :globalsidebartoc: True 4 | 5 | .. _tutorial_menu: 6 | 7 | 8 | .. include:: ../includes/big_toc_css.rst 9 | .. include:: ../tune_toc.rst 10 | 11 | ====================== 12 | scikit-learn Tutorials 13 | ====================== 14 | 15 | | 16 | 17 | .. toctree:: 18 | :maxdepth: 2 19 | 20 | basic/tutorial.rst 21 | statistical_inference/index.rst 22 | text_analytics/working_with_text_data.rst 23 | machine_learning_map/index 24 | ../presentations 25 | 26 | | 27 | 28 | .. note:: **Doctest Mode** 29 | 30 | The code-examples in the above tutorials are written in a 31 | *python-console* format. If you wish to easily execute these examples 32 | in **IPython**, use:: 33 | 34 | %doctest_mode 35 | 36 | in the IPython-console. You can then simply copy and paste the examples 37 | directly into IPython without having to worry about removing the **>>>** 38 | manually. 39 | -------------------------------------------------------------------------------- /doc/tutorial/statistical_inference/finding_help.rst: -------------------------------------------------------------------------------- 1 | Finding help 2 | ============ 3 | 4 | 5 | The project mailing list 6 | ------------------------ 7 | 8 | If you encounter a bug with ``scikit-learn`` or something that needs 9 | clarification in the docstring or the online documentation, please feel free to 10 | ask on the `Mailing List `_ 11 | 12 | 13 | Q&A communities with Machine Learning practitioners 14 | ---------------------------------------------------- 15 | 16 | :Quora.com: 17 | 18 | Quora has a topic for Machine Learning related questions that 19 | also features some interesting discussions: 20 | https://www.quora.com/topic/Machine-Learning 21 | 22 | :Stack Exchange: 23 | 24 | The Stack Exchange family of sites hosts `multiple subdomains for Machine Learning questions`_. 25 | 26 | .. _`How do I learn machine learning?`: https://www.quora.com/How-do-I-learn-machine-learning-1 27 | 28 | .. _`multiple subdomains for Machine Learning questions`: https://meta.stackexchange.com/q/130524 29 | 30 | -- _'An excellent free online course for Machine Learning taught by Professor Andrew Ng of Stanford': https://www.coursera.org/learn/machine-learning 31 | 32 | -- _'Another excellent free online course that takes a more general approach to Artificial Intelligence': https://www.udacity.com/course/intro-to-artificial-intelligence--cs271 33 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/.gitignore: -------------------------------------------------------------------------------- 1 | # cruft 2 | .*.swp 3 | *.pyc 4 | .DS_Store 5 | *.pdf 6 | 7 | # folder to be used for working on the exercises 8 | workspace 9 | 10 | # output of the sphinx build of the documentation 11 | tutorial/_build 12 | 13 | # datasets to be fetched from the web and cached locally 14 | data/twenty_newsgroups/20news-bydate.tar.gz 15 | data/twenty_newsgroups/20news-bydate-train 16 | data/twenty_newsgroups/20news-bydate-test 17 | 18 | data/movie_reviews/txt_sentoken 19 | data/movie_reviews/poldata.README.2.0 20 | 21 | data/languages/paragraphs 22 | data/languages/short_paragraphs 23 | data/languages/html 24 | 25 | data/labeled_faces_wild/lfw_preprocessed/ 26 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/data/movie_reviews/fetch_data.py: -------------------------------------------------------------------------------- 1 | """Script to download the movie review dataset""" 2 | 3 | import os 4 | import tarfile 5 | from contextlib import closing 6 | from urllib.request import urlopen 7 | 8 | 9 | URL = ("http://www.cs.cornell.edu/people/pabo/" 10 | "movie-review-data/review_polarity.tar.gz") 11 | 12 | ARCHIVE_NAME = URL.rsplit('/', 1)[1] 13 | DATA_FOLDER = "txt_sentoken" 14 | 15 | 16 | if not os.path.exists(DATA_FOLDER): 17 | 18 | if not os.path.exists(ARCHIVE_NAME): 19 | print("Downloading dataset from %s (3 MB)" % URL) 20 | opener = urlopen(URL) 21 | with open(ARCHIVE_NAME, 'wb') as archive: 22 | archive.write(opener.read()) 23 | 24 | print("Decompressing %s" % ARCHIVE_NAME) 25 | with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive: 26 | archive.extractall(path='.') 27 | os.remove(ARCHIVE_NAME) 28 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/data/twenty_newsgroups/fetch_data.py: -------------------------------------------------------------------------------- 1 | """Script to download the 20 newsgroups text classification set""" 2 | 3 | import os 4 | import tarfile 5 | from contextlib import closing 6 | from urllib.request import urlopen 7 | 8 | URL = ("http://people.csail.mit.edu/jrennie/" 9 | "20Newsgroups/20news-bydate.tar.gz") 10 | 11 | ARCHIVE_NAME = URL.rsplit('/', 1)[1] 12 | TRAIN_FOLDER = "20news-bydate-train" 13 | TEST_FOLDER = "20news-bydate-test" 14 | 15 | 16 | if not os.path.exists(TRAIN_FOLDER) or not os.path.exists(TEST_FOLDER): 17 | 18 | if not os.path.exists(ARCHIVE_NAME): 19 | print("Downloading dataset from %s (14 MB)" % URL) 20 | opener = urlopen(URL) 21 | with open(ARCHIVE_NAME, 'wb') as archive: 22 | archive.write(opener.read()) 23 | 24 | print("Decompressing %s" % ARCHIVE_NAME) 25 | with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive: 26 | archive.extractall(path='.') 27 | os.remove(ARCHIVE_NAME) 28 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/solutions/generate_skeletons.py: -------------------------------------------------------------------------------- 1 | """Generate skeletons from the example code""" 2 | import os 3 | 4 | exercise_dir = os.path.dirname(__file__) 5 | if exercise_dir == '': 6 | exercise_dir = '.' 7 | 8 | skeleton_dir = os.path.abspath(os.path.join(exercise_dir, '..', 'skeletons')) 9 | if not os.path.exists(skeleton_dir): 10 | os.makedirs(skeleton_dir) 11 | 12 | solutions = os.listdir(exercise_dir) 13 | 14 | for f in solutions: 15 | if not f.endswith('.py'): 16 | continue 17 | 18 | if f == os.path.basename(__file__): 19 | continue 20 | 21 | print("Generating skeleton for %s" % f) 22 | 23 | input_file = open(os.path.join(exercise_dir, f)) 24 | output_file = open(os.path.join(skeleton_dir, f), 'w') 25 | 26 | in_exercise_region = False 27 | 28 | for line in input_file: 29 | linestrip = line.strip() 30 | if len(linestrip) == 0: 31 | in_exercise_region = False 32 | elif linestrip.startswith('# TASK:'): 33 | in_exercise_region = True 34 | 35 | if not in_exercise_region or linestrip.startswith('#'): 36 | output_file.write(line) 37 | 38 | output_file.close() 39 | -------------------------------------------------------------------------------- /doc/unsupervised_learning.rst: -------------------------------------------------------------------------------- 1 | .. include:: includes/big_toc_css.rst 2 | 3 | .. _unsupervised-learning: 4 | 5 | Unsupervised learning 6 | ----------------------- 7 | 8 | .. toctree:: 9 | :maxdepth: 2 10 | 11 | modules/mixture 12 | modules/manifold 13 | modules/clustering 14 | modules/biclustering 15 | modules/decomposition 16 | modules/covariance 17 | modules/outlier_detection 18 | modules/density 19 | modules/neural_networks_unsupervised 20 | -------------------------------------------------------------------------------- /doc/user_guide.rst: -------------------------------------------------------------------------------- 1 | .. Places global toc into the sidebar 2 | 3 | :globalsidebartoc: True 4 | 5 | .. title:: User guide: contents 6 | 7 | .. _user_guide: 8 | 9 | ========== 10 | User Guide 11 | ========== 12 | 13 | .. include:: includes/big_toc_css.rst 14 | 15 | .. nice layout in the toc 16 | 17 | .. include:: tune_toc.rst 18 | 19 | .. toctree:: 20 | :numbered: 21 | :maxdepth: 3 22 | 23 | supervised_learning.rst 24 | unsupervised_learning.rst 25 | model_selection.rst 26 | inspection.rst 27 | visualizations.rst 28 | data_transforms.rst 29 | Dataset loading utilities 30 | modules/computing.rst 31 | -------------------------------------------------------------------------------- /doc/whats_new.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: sklearn 2 | .. include:: whats_new/_contributors.rst 3 | 4 | Release History 5 | =============== 6 | 7 | Release notes for all scikit-learn releases are linked in this this page. 8 | 9 | **Tip:** `Subscribe to scikit-learn releases `__ 10 | on libraries.io to be notified when new versions are released. 11 | 12 | .. toctree:: 13 | :maxdepth: 1 14 | 15 | Version 0.23 16 | Version 0.22 17 | Version 0.21 18 | Version 0.20 19 | Version 0.19 20 | Version 0.18 21 | Version 0.17 22 | Version 0.16 23 | Version 0.15 24 | Version 0.14 25 | Version 0.13 26 | Older Versions 27 | -------------------------------------------------------------------------------- /doc/whats_new/changelog_legend.inc: -------------------------------------------------------------------------------- 1 | Legend for changelogs 2 | --------------------- 3 | 4 | - |MajorFeature|: something big that you couldn't do before. 5 | - |Feature|: something that you couldn't do before. 6 | - |Efficiency|: an existing feature now may not require as much computation or 7 | memory. 8 | - |Enhancement|: a miscellaneous minor improvement. 9 | - |Fix|: something that previously didn't work as documentated -- or according 10 | to reasonable expectations -- should now work. 11 | - |API|: you will need to change your code to have the same effect in the 12 | future; or a feature will be removed in the future. 13 | -------------------------------------------------------------------------------- /examples/.flake8: -------------------------------------------------------------------------------- 1 | # Examples specific flake8 configuration 2 | 3 | [flake8] 4 | # Same ignore as project-wide plus E402 (imports not at top of file) 5 | ignore=E121,E123,E126,E24,E226,E704,W503,W504,E402 6 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _general_examples: 2 | 3 | Examples 4 | ======== 5 | 6 | Miscellaneous examples 7 | ---------------------- 8 | 9 | Miscellaneous and introductory examples for scikit-learn. 10 | -------------------------------------------------------------------------------- /examples/applications/README.txt: -------------------------------------------------------------------------------- 1 | .. _realworld_examples: 2 | 3 | Examples based on real world datasets 4 | ------------------------------------- 5 | 6 | Applications to real world problems with some medium sized datasets or 7 | interactive user interface. 8 | -------------------------------------------------------------------------------- /examples/bicluster/README.txt: -------------------------------------------------------------------------------- 1 | .. _bicluster_examples: 2 | 3 | Biclustering 4 | ------------ 5 | 6 | Examples concerning the :mod:`sklearn.cluster.bicluster` module. 7 | -------------------------------------------------------------------------------- /examples/calibration/README.txt: -------------------------------------------------------------------------------- 1 | .. _calibration_examples: 2 | 3 | Calibration 4 | ----------------------- 5 | 6 | Examples illustrating the calibration of predicted probabilities of classifiers. 7 | -------------------------------------------------------------------------------- /examples/classification/README.txt: -------------------------------------------------------------------------------- 1 | .. _classification_examples: 2 | 3 | Classification 4 | ----------------------- 5 | 6 | General examples about classification algorithms. 7 | -------------------------------------------------------------------------------- /examples/cluster/README.txt: -------------------------------------------------------------------------------- 1 | .. _cluster_examples: 2 | 3 | Clustering 4 | ---------- 5 | 6 | Examples concerning the :mod:`sklearn.cluster` module. 7 | -------------------------------------------------------------------------------- /examples/compose/README.txt: -------------------------------------------------------------------------------- 1 | .. _compose_examples: 2 | 3 | Pipelines and composite estimators 4 | ---------------------------------- 5 | 6 | Examples of how to compose transformers and pipelines from other estimators. See the :ref:`User Guide `. 7 | -------------------------------------------------------------------------------- /examples/covariance/README.txt: -------------------------------------------------------------------------------- 1 | .. _covariance_examples: 2 | 3 | Covariance estimation 4 | --------------------- 5 | 6 | Examples concerning the :mod:`sklearn.covariance` module. 7 | -------------------------------------------------------------------------------- /examples/cross_decomposition/README.txt: -------------------------------------------------------------------------------- 1 | .. _cross_decomposition_examples: 2 | 3 | Cross decomposition 4 | ------------------- 5 | 6 | Examples concerning the :mod:`sklearn.cross_decomposition` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/datasets/README.txt: -------------------------------------------------------------------------------- 1 | .. _dataset_examples: 2 | 3 | Dataset examples 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.datasets` module. 7 | -------------------------------------------------------------------------------- /examples/datasets/plot_digits_last_image.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | ========================================================= 6 | The Digit Dataset 7 | ========================================================= 8 | 9 | This dataset is made up of 1797 8x8 images. Each image, 10 | like the one shown below, is of a hand-written digit. 11 | In order to utilize an 8x8 figure like this, we'd have to 12 | first transform it into a feature vector with length 64. 13 | 14 | See `here 15 | `_ 16 | for more information about this dataset. 17 | """ 18 | print(__doc__) 19 | 20 | 21 | # Code source: Gaël Varoquaux 22 | # Modified for documentation by Jaques Grobler 23 | # License: BSD 3 clause 24 | 25 | from sklearn import datasets 26 | 27 | import matplotlib.pyplot as plt 28 | 29 | #Load the digits dataset 30 | digits = datasets.load_digits() 31 | 32 | #Display the first digit 33 | plt.figure(1, figsize=(3, 3)) 34 | plt.imshow(digits.images[-1], cmap=plt.cm.gray_r, interpolation='nearest') 35 | plt.show() 36 | -------------------------------------------------------------------------------- /examples/decomposition/README.txt: -------------------------------------------------------------------------------- 1 | .. _decomposition_examples: 2 | 3 | Decomposition 4 | ------------- 5 | 6 | Examples concerning the :mod:`sklearn.decomposition` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/decomposition/plot_beta_divergence.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================== 3 | Beta-divergence loss functions 4 | ============================== 5 | 6 | A plot that compares the various Beta-divergence loss functions supported by 7 | the Multiplicative-Update ('mu') solver in :class:`sklearn.decomposition.NMF`. 8 | """ 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | from sklearn.decomposition._nmf import _beta_divergence 12 | 13 | print(__doc__) 14 | 15 | x = np.linspace(0.001, 4, 1000) 16 | y = np.zeros(x.shape) 17 | 18 | colors = 'mbgyr' 19 | for j, beta in enumerate((0., 0.5, 1., 1.5, 2.)): 20 | for i, xi in enumerate(x): 21 | y[i] = _beta_divergence(1, xi, 1, beta) 22 | name = "beta = %1.1f" % beta 23 | plt.plot(x, y, label=name, color=colors[j]) 24 | 25 | plt.xlabel("x") 26 | plt.title("beta-divergence(1, x)") 27 | plt.legend(loc=0) 28 | plt.axis([0, 4, 0, 3]) 29 | plt.show() 30 | -------------------------------------------------------------------------------- /examples/ensemble/README.txt: -------------------------------------------------------------------------------- 1 | .. _ensemble_examples: 2 | 3 | Ensemble methods 4 | ---------------- 5 | 6 | Examples concerning the :mod:`sklearn.ensemble` module. 7 | -------------------------------------------------------------------------------- /examples/exercises/README.txt: -------------------------------------------------------------------------------- 1 | Tutorial exercises 2 | ------------------ 3 | 4 | Exercises for the tutorials 5 | -------------------------------------------------------------------------------- /examples/exercises/plot_cv_digits.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================================= 3 | Cross-validation on Digits Dataset Exercise 4 | ============================================= 5 | 6 | A tutorial exercise using Cross-validation with an SVM on the Digits dataset. 7 | 8 | This exercise is used in the :ref:`cv_generators_tut` part of the 9 | :ref:`model_selection_tut` section of the :ref:`stat_learn_tut_index`. 10 | """ 11 | print(__doc__) 12 | 13 | 14 | import numpy as np 15 | from sklearn.model_selection import cross_val_score 16 | from sklearn import datasets, svm 17 | 18 | X, y = datasets.load_digits(return_X_y=True) 19 | 20 | svc = svm.SVC(kernel='linear') 21 | C_s = np.logspace(-10, 0, 10) 22 | 23 | scores = list() 24 | scores_std = list() 25 | for C in C_s: 26 | svc.C = C 27 | this_scores = cross_val_score(svc, X, y, n_jobs=1) 28 | scores.append(np.mean(this_scores)) 29 | scores_std.append(np.std(this_scores)) 30 | 31 | # Do the plotting 32 | import matplotlib.pyplot as plt 33 | plt.figure() 34 | plt.semilogx(C_s, scores) 35 | plt.semilogx(C_s, np.array(scores) + np.array(scores_std), 'b--') 36 | plt.semilogx(C_s, np.array(scores) - np.array(scores_std), 'b--') 37 | locs, labels = plt.yticks() 38 | plt.yticks(locs, list(map(lambda x: "%g" % x, locs))) 39 | plt.ylabel('CV score') 40 | plt.xlabel('Parameter C') 41 | plt.ylim(0, 1.1) 42 | plt.show() 43 | -------------------------------------------------------------------------------- /examples/exercises/plot_digits_classification_exercise.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================ 3 | Digits Classification Exercise 4 | ================================ 5 | 6 | A tutorial exercise regarding the use of classification techniques on 7 | the Digits dataset. 8 | 9 | This exercise is used in the :ref:`clf_tut` part of the 10 | :ref:`supervised_learning_tut` section of the 11 | :ref:`stat_learn_tut_index`. 12 | """ 13 | print(__doc__) 14 | 15 | from sklearn import datasets, neighbors, linear_model 16 | 17 | X_digits, y_digits = datasets.load_digits(return_X_y=True) 18 | X_digits = X_digits / X_digits.max() 19 | 20 | n_samples = len(X_digits) 21 | 22 | X_train = X_digits[:int(.9 * n_samples)] 23 | y_train = y_digits[:int(.9 * n_samples)] 24 | X_test = X_digits[int(.9 * n_samples):] 25 | y_test = y_digits[int(.9 * n_samples):] 26 | 27 | knn = neighbors.KNeighborsClassifier() 28 | logistic = linear_model.LogisticRegression(max_iter=1000) 29 | 30 | print('KNN score: %f' % knn.fit(X_train, y_train).score(X_test, y_test)) 31 | print('LogisticRegression score: %f' 32 | % logistic.fit(X_train, y_train).score(X_test, y_test)) 33 | -------------------------------------------------------------------------------- /examples/feature_selection/README.txt: -------------------------------------------------------------------------------- 1 | .. _feature_selection_examples: 2 | 3 | Feature Selection 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.feature_selection` module. 7 | -------------------------------------------------------------------------------- /examples/feature_selection/plot_feature_selection_pipeline.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================== 3 | Pipeline Anova SVM 4 | ================== 5 | 6 | Simple usage of Pipeline that runs successively a univariate 7 | feature selection with anova and then a SVM of the selected features. 8 | 9 | Using a sub-pipeline, the fitted coefficients can be mapped back into 10 | the original feature space. 11 | """ 12 | from sklearn import svm 13 | from sklearn.datasets import make_classification 14 | from sklearn.feature_selection import SelectKBest, f_regression 15 | from sklearn.pipeline import make_pipeline 16 | from sklearn.model_selection import train_test_split 17 | from sklearn.metrics import classification_report 18 | 19 | print(__doc__) 20 | 21 | # import some data to play with 22 | X, y = make_classification( 23 | n_features=20, n_informative=3, n_redundant=0, n_classes=4, 24 | n_clusters_per_class=2) 25 | 26 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) 27 | 28 | # ANOVA SVM-C 29 | # 1) anova filter, take 3 best ranked features 30 | anova_filter = SelectKBest(f_regression, k=3) 31 | # 2) svm 32 | clf = svm.LinearSVC() 33 | 34 | anova_svm = make_pipeline(anova_filter, clf) 35 | anova_svm.fit(X_train, y_train) 36 | y_pred = anova_svm.predict(X_test) 37 | print(classification_report(y_test, y_pred)) 38 | 39 | coef = anova_svm[:-1].inverse_transform(anova_svm['linearsvc'].coef_) 40 | print(coef) 41 | -------------------------------------------------------------------------------- /examples/feature_selection/plot_rfe_digits.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================= 3 | Recursive feature elimination 4 | ============================= 5 | 6 | A recursive feature elimination example showing the relevance of pixels in 7 | a digit classification task. 8 | 9 | .. note:: 10 | 11 | See also :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py` 12 | 13 | """ 14 | print(__doc__) 15 | 16 | from sklearn.svm import SVC 17 | from sklearn.datasets import load_digits 18 | from sklearn.feature_selection import RFE 19 | import matplotlib.pyplot as plt 20 | 21 | # Load the digits dataset 22 | digits = load_digits() 23 | X = digits.images.reshape((len(digits.images), -1)) 24 | y = digits.target 25 | 26 | # Create the RFE object and rank each pixel 27 | svc = SVC(kernel="linear", C=1) 28 | rfe = RFE(estimator=svc, n_features_to_select=1, step=1) 29 | rfe.fit(X, y) 30 | ranking = rfe.ranking_.reshape(digits.images[0].shape) 31 | 32 | # Plot pixel ranking 33 | plt.matshow(ranking, cmap=plt.cm.Blues) 34 | plt.colorbar() 35 | plt.title("Ranking of pixels with RFE") 36 | plt.show() 37 | -------------------------------------------------------------------------------- /examples/gaussian_process/README.txt: -------------------------------------------------------------------------------- 1 | .. _gaussian_process_examples: 2 | 3 | Gaussian Process for Machine Learning 4 | ------------------------------------- 5 | 6 | Examples concerning the :mod:`sklearn.gaussian_process` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/impute/README.txt: -------------------------------------------------------------------------------- 1 | .. _impute_examples: 2 | 3 | Missing Value Imputation 4 | ------------------------ 5 | 6 | Examples concerning the :mod:`sklearn.impute` module. 7 | -------------------------------------------------------------------------------- /examples/inspection/README.txt: -------------------------------------------------------------------------------- 1 | .. _inspection_examples: 2 | 3 | Inspection 4 | ---------- 5 | 6 | Examples related to the :mod:`sklearn.inspection` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/linear_model/README.txt: -------------------------------------------------------------------------------- 1 | .. _linear_examples: 2 | 3 | Generalized Linear Models 4 | ------------------------- 5 | 6 | Examples concerning the :mod:`sklearn.linear_model` module. 7 | -------------------------------------------------------------------------------- /examples/linear_model/plot_lasso_lars.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | ===================== 4 | Lasso path using LARS 5 | ===================== 6 | 7 | Computes Lasso Path along the regularization parameter using the LARS 8 | algorithm on the diabetes dataset. Each color represents a different 9 | feature of the coefficient vector, and this is displayed as a function 10 | of the regularization parameter. 11 | 12 | """ 13 | print(__doc__) 14 | 15 | # Author: Fabian Pedregosa 16 | # Alexandre Gramfort 17 | # License: BSD 3 clause 18 | 19 | import numpy as np 20 | import matplotlib.pyplot as plt 21 | 22 | from sklearn import linear_model 23 | from sklearn import datasets 24 | 25 | X, y = datasets.load_diabetes(return_X_y=True) 26 | 27 | print("Computing regularization path using the LARS ...") 28 | _, _, coefs = linear_model.lars_path(X, y, method='lasso', verbose=True) 29 | 30 | xx = np.sum(np.abs(coefs.T), axis=1) 31 | xx /= xx[-1] 32 | 33 | plt.plot(xx, coefs.T) 34 | ymin, ymax = plt.ylim() 35 | plt.vlines(xx, ymin, ymax, linestyle='dashed') 36 | plt.xlabel('|coef| / max|coef|') 37 | plt.ylabel('Coefficients') 38 | plt.title('LASSO Path') 39 | plt.axis('tight') 40 | plt.show() 41 | -------------------------------------------------------------------------------- /examples/linear_model/plot_sgd_loss_functions.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================== 3 | SGD: convex loss functions 4 | ========================== 5 | 6 | A plot that compares the various convex loss functions supported by 7 | :class:`sklearn.linear_model.SGDClassifier` . 8 | """ 9 | print(__doc__) 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | def modified_huber_loss(y_true, y_pred): 16 | z = y_pred * y_true 17 | loss = -4 * z 18 | loss[z >= -1] = (1 - z[z >= -1]) ** 2 19 | loss[z >= 1.] = 0 20 | return loss 21 | 22 | 23 | xmin, xmax = -4, 4 24 | xx = np.linspace(xmin, xmax, 100) 25 | lw = 2 26 | plt.plot([xmin, 0, 0, xmax], [1, 1, 0, 0], color='gold', lw=lw, 27 | label="Zero-one loss") 28 | plt.plot(xx, np.where(xx < 1, 1 - xx, 0), color='teal', lw=lw, 29 | label="Hinge loss") 30 | plt.plot(xx, -np.minimum(xx, 0), color='yellowgreen', lw=lw, 31 | label="Perceptron loss") 32 | plt.plot(xx, np.log2(1 + np.exp(-xx)), color='cornflowerblue', lw=lw, 33 | label="Log loss") 34 | plt.plot(xx, np.where(xx < 1, 1 - xx, 0) ** 2, color='orange', lw=lw, 35 | label="Squared hinge loss") 36 | plt.plot(xx, modified_huber_loss(xx, 1), color='darkorchid', lw=lw, 37 | linestyle='--', label="Modified Huber loss") 38 | plt.ylim((0, 8)) 39 | plt.legend(loc="upper right") 40 | plt.xlabel(r"Decision function $f(x)$") 41 | plt.ylabel("$L(y=1, f(x))$") 42 | plt.show() 43 | -------------------------------------------------------------------------------- /examples/linear_model/plot_sgd_separating_hyperplane.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================= 3 | SGD: Maximum margin separating hyperplane 4 | ========================================= 5 | 6 | Plot the maximum margin separating hyperplane within a two-class 7 | separable dataset using a linear Support Vector Machines classifier 8 | trained using SGD. 9 | """ 10 | print(__doc__) 11 | 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | from sklearn.linear_model import SGDClassifier 15 | from sklearn.datasets import make_blobs 16 | 17 | # we create 50 separable points 18 | X, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60) 19 | 20 | # fit the model 21 | clf = SGDClassifier(loss="hinge", alpha=0.01, max_iter=200) 22 | 23 | clf.fit(X, Y) 24 | 25 | # plot the line, the points, and the nearest vectors to the plane 26 | xx = np.linspace(-1, 5, 10) 27 | yy = np.linspace(-1, 5, 10) 28 | 29 | X1, X2 = np.meshgrid(xx, yy) 30 | Z = np.empty(X1.shape) 31 | for (i, j), val in np.ndenumerate(X1): 32 | x1 = val 33 | x2 = X2[i, j] 34 | p = clf.decision_function([[x1, x2]]) 35 | Z[i, j] = p[0] 36 | levels = [-1.0, 0.0, 1.0] 37 | linestyles = ['dashed', 'solid', 'dashed'] 38 | colors = 'k' 39 | plt.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles) 40 | plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired, 41 | edgecolor='black', s=20) 42 | 43 | plt.axis('tight') 44 | plt.show() 45 | -------------------------------------------------------------------------------- /examples/manifold/README.txt: -------------------------------------------------------------------------------- 1 | .. _manifold_examples: 2 | 3 | Manifold learning 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.manifold` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/manifold/plot_swissroll.py: -------------------------------------------------------------------------------- 1 | """ 2 | =================================== 3 | Swiss Roll reduction with LLE 4 | =================================== 5 | 6 | An illustration of Swiss Roll reduction 7 | with locally linear embedding 8 | """ 9 | 10 | # Author: Fabian Pedregosa -- 11 | # License: BSD 3 clause (C) INRIA 2011 12 | 13 | print(__doc__) 14 | 15 | import matplotlib.pyplot as plt 16 | 17 | # This import is needed to modify the way figure behaves 18 | from mpl_toolkits.mplot3d import Axes3D 19 | Axes3D 20 | 21 | #---------------------------------------------------------------------- 22 | # Locally linear embedding of the swiss roll 23 | 24 | from sklearn import manifold, datasets 25 | X, color = datasets.make_swiss_roll(n_samples=1500) 26 | 27 | print("Computing LLE embedding") 28 | X_r, err = manifold.locally_linear_embedding(X, n_neighbors=12, 29 | n_components=2) 30 | print("Done. Reconstruction error: %g" % err) 31 | 32 | #---------------------------------------------------------------------- 33 | # Plot result 34 | 35 | fig = plt.figure() 36 | 37 | ax = fig.add_subplot(211, projection='3d') 38 | ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral) 39 | 40 | ax.set_title("Original data") 41 | ax = fig.add_subplot(212) 42 | ax.scatter(X_r[:, 0], X_r[:, 1], c=color, cmap=plt.cm.Spectral) 43 | plt.axis('tight') 44 | plt.xticks([]), plt.yticks([]) 45 | plt.title('Projected data') 46 | plt.show() 47 | -------------------------------------------------------------------------------- /examples/mixture/README.txt: -------------------------------------------------------------------------------- 1 | .. _mixture_examples: 2 | 3 | Gaussian Mixture Models 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.mixture` module. 7 | -------------------------------------------------------------------------------- /examples/model_selection/README.txt: -------------------------------------------------------------------------------- 1 | .. _model_selection_examples: 2 | 3 | Model Selection 4 | ----------------------- 5 | 6 | Examples related to the :mod:`sklearn.model_selection` module. 7 | -------------------------------------------------------------------------------- /examples/model_selection/plot_cv_predict.py: -------------------------------------------------------------------------------- 1 | """ 2 | ==================================== 3 | Plotting Cross-Validated Predictions 4 | ==================================== 5 | 6 | This example shows how to use 7 | :func:`~sklearn.model_selection.cross_val_predict` to visualize prediction 8 | errors. 9 | 10 | """ 11 | from sklearn import datasets 12 | from sklearn.model_selection import cross_val_predict 13 | from sklearn import linear_model 14 | import matplotlib.pyplot as plt 15 | 16 | lr = linear_model.LinearRegression() 17 | X, y = datasets.load_boston(return_X_y=True) 18 | 19 | # cross_val_predict returns an array of the same size as `y` where each entry 20 | # is a prediction obtained by cross validation: 21 | predicted = cross_val_predict(lr, X, y, cv=10) 22 | 23 | fig, ax = plt.subplots() 24 | ax.scatter(y, predicted, edgecolors=(0, 0, 0)) 25 | ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) 26 | ax.set_xlabel('Measured') 27 | ax.set_ylabel('Predicted') 28 | plt.show() 29 | -------------------------------------------------------------------------------- /examples/multioutput/README.txt: -------------------------------------------------------------------------------- 1 | .. _multioutput_examples: 2 | 3 | Multioutput methods 4 | ------------------- 5 | 6 | Examples concerning the :mod:`sklearn.multioutput` module. 7 | -------------------------------------------------------------------------------- /examples/neighbors/README.txt: -------------------------------------------------------------------------------- 1 | .. _neighbors_examples: 2 | 3 | Nearest Neighbors 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.neighbors` module. 7 | -------------------------------------------------------------------------------- /examples/neural_networks/README.txt: -------------------------------------------------------------------------------- 1 | .. _neural_network_examples: 2 | 3 | Neural Networks 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.neural_network` module. 7 | -------------------------------------------------------------------------------- /examples/plot_changed_only_pprint_parameter.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================= 3 | Compact estimator representations 4 | ================================= 5 | 6 | This example illustrates the use of the print_changed_only global parameter. 7 | 8 | Setting print_changed_only to True will alterate the representation of 9 | estimators to only show the parameters that have been set to non-default 10 | values. This can be used to have more compact representations. 11 | """ 12 | print(__doc__) 13 | 14 | from sklearn.linear_model import LogisticRegression 15 | from sklearn import set_config 16 | 17 | 18 | lr = LogisticRegression(penalty='l1') 19 | print('Default representation:') 20 | print(lr) 21 | # LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, 22 | # intercept_scaling=1, l1_ratio=None, max_iter=100, 23 | # multi_class='auto', n_jobs=None, penalty='l1', 24 | # random_state=None, solver='warn', tol=0.0001, verbose=0, 25 | # warm_start=False) 26 | 27 | set_config(print_changed_only=True) 28 | print('\nWith changed_only option:') 29 | print(lr) 30 | # LogisticRegression(penalty='l1') 31 | -------------------------------------------------------------------------------- /examples/preprocessing/README.txt: -------------------------------------------------------------------------------- 1 | .. _preprocessing_examples: 2 | 3 | Preprocessing 4 | ------------- 5 | 6 | Examples concerning the :mod:`sklearn.preprocessing` module. 7 | -------------------------------------------------------------------------------- /examples/release_highlights/README.txt: -------------------------------------------------------------------------------- 1 | .. _release_highlights_examples: 2 | 3 | Release Highlights 4 | ------------------ 5 | 6 | These examples illustrate the main features of the releases of scikit-learn. 7 | -------------------------------------------------------------------------------- /examples/semi_supervised/README.txt: -------------------------------------------------------------------------------- 1 | .. _semi_supervised_examples: 2 | 3 | Semi Supervised Classification 4 | ------------------------------ 5 | 6 | Examples concerning the :mod:`sklearn.semi_supervised` module. 7 | -------------------------------------------------------------------------------- /examples/svm/README.txt: -------------------------------------------------------------------------------- 1 | .. _svm_examples: 2 | 3 | Support Vector Machines 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.svm` module. 7 | -------------------------------------------------------------------------------- /examples/svm/plot_separating_hyperplane.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================= 3 | SVM: Maximum margin separating hyperplane 4 | ========================================= 5 | 6 | Plot the maximum margin separating hyperplane within a two-class 7 | separable dataset using a Support Vector Machine classifier with 8 | linear kernel. 9 | """ 10 | print(__doc__) 11 | 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | from sklearn import svm 15 | from sklearn.datasets import make_blobs 16 | 17 | 18 | # we create 40 separable points 19 | X, y = make_blobs(n_samples=40, centers=2, random_state=6) 20 | 21 | # fit the model, don't regularize for illustration purposes 22 | clf = svm.SVC(kernel='linear', C=1000) 23 | clf.fit(X, y) 24 | 25 | plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired) 26 | 27 | # plot the decision function 28 | ax = plt.gca() 29 | xlim = ax.get_xlim() 30 | ylim = ax.get_ylim() 31 | 32 | # create grid to evaluate model 33 | xx = np.linspace(xlim[0], xlim[1], 30) 34 | yy = np.linspace(ylim[0], ylim[1], 30) 35 | YY, XX = np.meshgrid(yy, xx) 36 | xy = np.vstack([XX.ravel(), YY.ravel()]).T 37 | Z = clf.decision_function(xy).reshape(XX.shape) 38 | 39 | # plot decision boundary and margins 40 | ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, 41 | linestyles=['--', '-', '--']) 42 | # plot support vectors 43 | ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100, 44 | linewidth=1, facecolors='none', edgecolors='k') 45 | plt.show() 46 | -------------------------------------------------------------------------------- /examples/svm/plot_svm_nonlinear.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============== 3 | Non-linear SVM 4 | ============== 5 | 6 | Perform binary classification using non-linear SVC 7 | with RBF kernel. The target to predict is a XOR of the 8 | inputs. 9 | 10 | The color map illustrates the decision function learned by the SVC. 11 | """ 12 | print(__doc__) 13 | 14 | import numpy as np 15 | import matplotlib.pyplot as plt 16 | from sklearn import svm 17 | 18 | xx, yy = np.meshgrid(np.linspace(-3, 3, 500), 19 | np.linspace(-3, 3, 500)) 20 | np.random.seed(0) 21 | X = np.random.randn(300, 2) 22 | Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) 23 | 24 | # fit the model 25 | clf = svm.NuSVC(gamma='auto') 26 | clf.fit(X, Y) 27 | 28 | # plot the decision function for each datapoint on the grid 29 | Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) 30 | Z = Z.reshape(xx.shape) 31 | 32 | plt.imshow(Z, interpolation='nearest', 33 | extent=(xx.min(), xx.max(), yy.min(), yy.max()), aspect='auto', 34 | origin='lower', cmap=plt.cm.PuOr_r) 35 | contours = plt.contour(xx, yy, Z, levels=[0], linewidths=2, 36 | linestyles='dashed') 37 | plt.scatter(X[:, 0], X[:, 1], s=30, c=Y, cmap=plt.cm.Paired, 38 | edgecolors='k') 39 | plt.xticks(()) 40 | plt.yticks(()) 41 | plt.axis([-3, 3, -3, 3]) 42 | plt.show() 43 | -------------------------------------------------------------------------------- /examples/text/README.txt: -------------------------------------------------------------------------------- 1 | .. _text_examples: 2 | 3 | Working with text documents 4 | ---------------------------- 5 | 6 | Examples concerning the :mod:`sklearn.feature_extraction.text` module. 7 | -------------------------------------------------------------------------------- /examples/tree/README.txt: -------------------------------------------------------------------------------- 1 | .. _tree_examples: 2 | 3 | Decision Trees 4 | -------------- 5 | 6 | Examples concerning the :mod:`sklearn.tree` module. 7 | -------------------------------------------------------------------------------- /lgtm.yml: -------------------------------------------------------------------------------- 1 | extraction: 2 | cpp: 3 | before_index: 4 | - pip3 install numpy==1.16.3 5 | - pip3 install --no-deps scipy Cython 6 | index: 7 | build_command: 8 | - python3 setup.py build_ext -i 9 | -------------------------------------------------------------------------------- /maint_tools/sort_whats_new.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Sorts what's new entries with per-module headings. 3 | # Pass what's new entries on stdin. 4 | 5 | import sys 6 | import re 7 | from collections import defaultdict 8 | 9 | LABEL_ORDER = ['MajorFeature', 'Feature', 'Enhancement', 'Efficiency', 10 | 'Fix', 'API'] 11 | 12 | 13 | def entry_sort_key(s): 14 | if s.startswith('- |'): 15 | return LABEL_ORDER.index(s.split('|')[1]) 16 | else: 17 | return -1 18 | 19 | 20 | # discard headings and other non-entry lines 21 | text = ''.join(l for l in sys.stdin 22 | if l.startswith('- ') or l.startswith(' ')) 23 | 24 | bucketed = defaultdict(list) 25 | 26 | for entry in re.split('\n(?=- )', text.strip()): 27 | modules = re.findall(r':(?:func|meth|mod|class):' 28 | r'`(?:[^<`]*<|~)?(?:sklearn.)?([a-z]\w+)', 29 | entry) 30 | modules = set(modules) 31 | if len(modules) > 1: 32 | key = 'Multiple modules' 33 | elif modules: 34 | key = ':mod:`sklearn.%s`' % next(iter(modules)) 35 | else: 36 | key = 'Miscellaneous' 37 | bucketed[key].append(entry) 38 | entry = entry.strip() + '\n' 39 | 40 | everything = [] 41 | for key, bucket in sorted(bucketed.items()): 42 | everything.append(key + '\n' + '.' * len(key)) 43 | bucket.sort(key=entry_sort_key) 44 | everything.extend(bucket) 45 | print('\n\n'.join(everything)) 46 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test = pytest 3 | 4 | [tool:pytest] 5 | # disable-pytest-warnings should be removed once we rewrite tests 6 | # using yield with parametrize 7 | doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS 8 | addopts = 9 | --ignore build_tools 10 | --ignore benchmarks 11 | --ignore doc 12 | --ignore examples 13 | --ignore maint_tools 14 | --doctest-modules 15 | --disable-pytest-warnings 16 | -rs 17 | 18 | filterwarnings = 19 | ignore:the matrix subclass:PendingDeprecationWarning 20 | 21 | [wheelhouse_uploader] 22 | artifact_indexes= 23 | # Wheels built by travis (only for specific tags): 24 | # https://github.com/MacPython/scikit-learn-wheels 25 | http://wheels.scipy.org 26 | 27 | [flake8] 28 | # Default flake8 3.5 ignored flags 29 | ignore=E121,E123,E126,E226,E24,E704,W503,W504 30 | -------------------------------------------------------------------------------- /site.cfg: -------------------------------------------------------------------------------- 1 | 2 | # Uncomment to link against the MKL library on windows 3 | # [mkl] 4 | # include_dirs=C:\Program Files\Intel\MKL\10.2.5.035\include 5 | # library_dirs=C:\Program Files\Intel\MKL\10.2.5.035\ia32\lib 6 | # mkl_libs=mkl_core, mkl_intel_c, mkl_intel_s, libguide, libguide40, mkl_blacs_dll, mkl_intel_sequential 7 | -------------------------------------------------------------------------------- /sklearn/__check_build/_check_build.pyx: -------------------------------------------------------------------------------- 1 | def check_build(): 2 | return 3 | -------------------------------------------------------------------------------- /sklearn/__check_build/setup.py: -------------------------------------------------------------------------------- 1 | # Author: Virgile Fritsch 2 | # License: BSD 3 clause 3 | 4 | import numpy 5 | 6 | 7 | def configuration(parent_package='', top_path=None): 8 | from numpy.distutils.misc_util import Configuration 9 | config = Configuration('__check_build', parent_package, top_path) 10 | config.add_extension('_check_build', 11 | sources=['_check_build.pyx'], 12 | include_dirs=[numpy.get_include()]) 13 | 14 | return config 15 | 16 | if __name__ == '__main__': 17 | from numpy.distutils.core import setup 18 | setup(**configuration(top_path='').todict()) 19 | -------------------------------------------------------------------------------- /sklearn/_distributor_init.py: -------------------------------------------------------------------------------- 1 | """ Distributor init file 2 | 3 | Distributors: you can add custom code here to support particular distributions 4 | of scikit-learn. 5 | 6 | For example, this is a good place to put any checks for hardware requirements. 7 | 8 | The scikit-learn standard source distribution will not put code in this file, 9 | so you can safely replace this file with your own version. 10 | """ 11 | -------------------------------------------------------------------------------- /sklearn/cluster/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/cluster/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/cluster/tests/common.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common utilities for testing clustering. 3 | 4 | """ 5 | 6 | import numpy as np 7 | 8 | 9 | ############################################################################### 10 | # Generate sample data 11 | 12 | def generate_clustered_data(seed=0, n_clusters=3, n_features=2, 13 | n_samples_per_cluster=20, std=.4): 14 | prng = np.random.RandomState(seed) 15 | 16 | # the data is voluntary shifted away from zero to check clustering 17 | # algorithm robustness with regards to non centered data 18 | means = np.array([[1, 1, 1, 0], 19 | [-1, -1, 0, 1], 20 | [1, -1, 1, 1], 21 | [-1, 1, 1, 0], 22 | ]) + 10 23 | 24 | X = np.empty((0, n_features)) 25 | for i in range(n_clusters): 26 | X = np.r_[X, means[i][:n_features] 27 | + std * prng.randn(n_samples_per_cluster, n_features)] 28 | return X 29 | -------------------------------------------------------------------------------- /sklearn/compose/__init__.py: -------------------------------------------------------------------------------- 1 | """Meta-estimators for building composite models with transformers 2 | 3 | In addition to its current contents, this module will eventually be home to 4 | refurbished versions of Pipeline and FeatureUnion. 5 | 6 | """ 7 | 8 | from ._column_transformer import (ColumnTransformer, make_column_transformer, 9 | make_column_selector) 10 | from ._target import TransformedTargetRegressor 11 | 12 | 13 | __all__ = [ 14 | 'ColumnTransformer', 15 | 'make_column_transformer', 16 | 'TransformedTargetRegressor', 17 | 'make_column_selector', 18 | ] 19 | -------------------------------------------------------------------------------- /sklearn/compose/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/compose/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.fixture(scope='function') 5 | def pyplot(): 6 | """Setup and teardown fixture for matplotlib. 7 | 8 | This fixture checks if we can import matplotlib. If not, the tests will be 9 | skipped. Otherwise, we setup matplotlib backend and close the figures 10 | after running the functions. 11 | 12 | Returns 13 | ------- 14 | pyplot : module 15 | The ``matplotlib.pyplot`` module. 16 | """ 17 | matplotlib = pytest.importorskip('matplotlib') 18 | matplotlib.use('agg', warn=False, force=True) 19 | pyplot = pytest.importorskip('matplotlib.pyplot') 20 | yield pyplot 21 | pyplot.close('all') 22 | -------------------------------------------------------------------------------- /sklearn/covariance/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.covariance` module includes methods and algorithms to 3 | robustly estimate the covariance of features given a set of points. The 4 | precision matrix defined as the inverse of the covariance is also estimated. 5 | Covariance estimation is closely related to the theory of Gaussian Graphical 6 | Models. 7 | """ 8 | 9 | from ._empirical_covariance import (empirical_covariance, 10 | EmpiricalCovariance, 11 | log_likelihood) 12 | from ._shrunk_covariance import (shrunk_covariance, ShrunkCovariance, 13 | ledoit_wolf, ledoit_wolf_shrinkage, 14 | LedoitWolf, oas, OAS) 15 | from ._robust_covariance import fast_mcd, MinCovDet 16 | from ._graph_lasso import graphical_lasso, GraphicalLasso, GraphicalLassoCV 17 | from ._elliptic_envelope import EllipticEnvelope 18 | 19 | 20 | __all__ = ['EllipticEnvelope', 21 | 'EmpiricalCovariance', 22 | 'GraphicalLasso', 23 | 'GraphicalLassoCV', 24 | 'LedoitWolf', 25 | 'MinCovDet', 26 | 'OAS', 27 | 'ShrunkCovariance', 28 | 'empirical_covariance', 29 | 'fast_mcd', 30 | 'graphical_lasso', 31 | 'ledoit_wolf', 32 | 'ledoit_wolf_shrinkage', 33 | 'log_likelihood', 34 | 'oas', 35 | 'shrunk_covariance'] 36 | -------------------------------------------------------------------------------- /sklearn/covariance/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/covariance/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/cross_decomposition/__init__.py: -------------------------------------------------------------------------------- 1 | from ._pls import PLSCanonical, PLSRegression, PLSSVD 2 | from ._cca import CCA 3 | 4 | __all__ = ['PLSCanonical', 'PLSRegression', 'PLSSVD', 'CCA'] 5 | -------------------------------------------------------------------------------- /sklearn/cross_decomposition/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/cross_decomposition/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/data/diabetes_data.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/data/diabetes_data.csv.gz -------------------------------------------------------------------------------- /sklearn/datasets/data/diabetes_target.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/data/diabetes_target.csv.gz -------------------------------------------------------------------------------- /sklearn/datasets/data/digits.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/data/digits.csv.gz -------------------------------------------------------------------------------- /sklearn/datasets/data/linnerud_exercise.csv: -------------------------------------------------------------------------------- 1 | Chins Situps Jumps 2 | 5 162 60 3 | 2 110 60 4 | 12 101 101 5 | 12 105 37 6 | 13 155 58 7 | 4 101 42 8 | 8 101 38 9 | 6 125 40 10 | 15 200 40 11 | 17 251 250 12 | 17 120 38 13 | 13 210 115 14 | 14 215 105 15 | 1 50 50 16 | 6 70 31 17 | 12 210 120 18 | 4 60 25 19 | 11 230 80 20 | 15 225 73 21 | 2 110 43 22 | -------------------------------------------------------------------------------- /sklearn/datasets/data/linnerud_physiological.csv: -------------------------------------------------------------------------------- 1 | Weight Waist Pulse 2 | 191 36 50 3 | 189 37 52 4 | 193 38 58 5 | 162 35 62 6 | 189 35 46 7 | 182 36 56 8 | 211 38 56 9 | 167 34 60 10 | 176 31 74 11 | 154 33 56 12 | 169 34 50 13 | 166 33 52 14 | 154 34 64 15 | 247 46 50 16 | 193 36 46 17 | 202 37 62 18 | 176 37 54 19 | 157 32 52 20 | 156 33 54 21 | 138 33 68 22 | -------------------------------------------------------------------------------- /sklearn/datasets/descr/california_housing.rst: -------------------------------------------------------------------------------- 1 | .. _california_housing_dataset: 2 | 3 | California Housing dataset 4 | -------------------------- 5 | 6 | **Data Set Characteristics:** 7 | 8 | :Number of Instances: 20640 9 | 10 | :Number of Attributes: 8 numeric, predictive attributes and the target 11 | 12 | :Attribute Information: 13 | - MedInc median income in block 14 | - HouseAge median house age in block 15 | - AveRooms average number of rooms 16 | - AveBedrms average number of bedrooms 17 | - Population block population 18 | - AveOccup average house occupancy 19 | - Latitude house block latitude 20 | - Longitude house block longitude 21 | 22 | :Missing Attribute Values: None 23 | 24 | This dataset was obtained from the StatLib repository. 25 | http://lib.stat.cmu.edu/datasets/ 26 | 27 | The target variable is the median house value for California districts. 28 | 29 | This dataset was derived from the 1990 U.S. census, using one row per census 30 | block group. A block group is the smallest geographical unit for which the U.S. 31 | Census Bureau publishes sample data (a block group typically has a population 32 | of 600 to 3,000 people). 33 | 34 | It can be downloaded/loaded using the 35 | :func:`sklearn.datasets.fetch_california_housing` function. 36 | 37 | .. topic:: References 38 | 39 | - Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions, 40 | Statistics and Probability Letters, 33 (1997) 291-297 41 | -------------------------------------------------------------------------------- /sklearn/datasets/descr/covtype.rst: -------------------------------------------------------------------------------- 1 | .. _covtype_dataset: 2 | 3 | Forest covertypes 4 | ----------------- 5 | 6 | The samples in this dataset correspond to 30×30m patches of forest in the US, 7 | collected for the task of predicting each patch's cover type, 8 | i.e. the dominant species of tree. 9 | There are seven covertypes, making this a multiclass classification problem. 10 | Each sample has 54 features, described on the 11 | `dataset's homepage `__. 12 | Some of the features are boolean indicators, 13 | while others are discrete or continuous measurements. 14 | 15 | **Data Set Characteristics:** 16 | 17 | ================= ============ 18 | Classes 7 19 | Samples total 581012 20 | Dimensionality 54 21 | Features int 22 | ================= ============ 23 | 24 | :func:`sklearn.datasets.fetch_covtype` will load the covertype dataset; 25 | it returns a dictionary-like object 26 | with the feature matrix in the ``data`` member 27 | and the target values in ``target``. 28 | The dataset will be downloaded from the web if necessary. 29 | -------------------------------------------------------------------------------- /sklearn/datasets/descr/diabetes.rst: -------------------------------------------------------------------------------- 1 | .. _diabetes_dataset: 2 | 3 | Diabetes dataset 4 | ---------------- 5 | 6 | Ten baseline variables, age, sex, body mass index, average blood 7 | pressure, and six blood serum measurements were obtained for each of n = 8 | 442 diabetes patients, as well as the response of interest, a 9 | quantitative measure of disease progression one year after baseline. 10 | 11 | **Data Set Characteristics:** 12 | 13 | :Number of Instances: 442 14 | 15 | :Number of Attributes: First 10 columns are numeric predictive values 16 | 17 | :Target: Column 11 is a quantitative measure of disease progression one year after baseline 18 | 19 | :Attribute Information: 20 | - Age 21 | - Sex 22 | - Body mass index 23 | - Average blood pressure 24 | - S1 25 | - S2 26 | - S3 27 | - S4 28 | - S5 29 | - S6 30 | 31 | Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1). 32 | 33 | Source URL: 34 | https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html 35 | 36 | For more information see: 37 | Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) "Least Angle Regression," Annals of Statistics (with discussion), 407-499. 38 | (https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf) -------------------------------------------------------------------------------- /sklearn/datasets/descr/linnerud.rst: -------------------------------------------------------------------------------- 1 | .. _linnerrud_dataset: 2 | 3 | Linnerrud dataset 4 | ----------------- 5 | 6 | **Data Set Characteristics:** 7 | 8 | :Number of Instances: 20 9 | :Number of Attributes: 3 10 | :Missing Attribute Values: None 11 | 12 | The Linnerud dataset constains two small dataset: 13 | 14 | - *physiological* - CSV containing 20 observations on 3 exercise variables: 15 | Weight, Waist and Pulse. 16 | 17 | - *exercise* - CSV containing 20 observations on 3 physiological variables: 18 | Chins, Situps and Jumps. 19 | 20 | .. topic:: References 21 | 22 | * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris: Editions Technic. 23 | -------------------------------------------------------------------------------- /sklearn/datasets/images/README.txt: -------------------------------------------------------------------------------- 1 | Image: china.jpg 2 | Released under a creative commons license. [1] 3 | Attribution: Some rights reserved by danielbuechele [2] 4 | Retrieved 21st August, 2011 from [3] by Robert Layton 5 | 6 | [1] https://creativecommons.org/licenses/by/2.0/ 7 | [2] https://www.flickr.com/photos/danielbuechele/ 8 | [3] https://www.flickr.com/photos/danielbuechele/6061409035/sizes/z/in/photostream/ 9 | 10 | 11 | Image: flower.jpg 12 | Released under a creative commons license. [1] 13 | Attribution: Some rights reserved by danielbuechele [2] 14 | Retrieved 21st August, 2011 from [3] by Robert Layton 15 | 16 | [1] https://creativecommons.org/licenses/by/2.0/ 17 | [2] https://www.flickr.com/photos/vultilion/ 18 | [3] https://www.flickr.com/photos/vultilion/6056698931/sizes/z/in/photostream/ 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /sklearn/datasets/images/china.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/images/china.jpg -------------------------------------------------------------------------------- /sklearn/datasets/images/flower.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/images/flower.jpg -------------------------------------------------------------------------------- /sklearn/datasets/setup.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy 3 | import os 4 | import platform 5 | 6 | 7 | def configuration(parent_package='', top_path=None): 8 | from numpy.distutils.misc_util import Configuration 9 | config = Configuration('datasets', parent_package, top_path) 10 | config.add_data_dir('data') 11 | config.add_data_dir('descr') 12 | config.add_data_dir('images') 13 | config.add_data_dir(os.path.join('tests', 'data')) 14 | if platform.python_implementation() != 'PyPy': 15 | config.add_extension('_svmlight_format_fast', 16 | sources=['_svmlight_format_fast.pyx'], 17 | include_dirs=[numpy.get_include()]) 18 | config.add_subpackage('tests') 19 | return config 20 | 21 | 22 | if __name__ == '__main__': 23 | from numpy.distutils.core import setup 24 | setup(**configuration(top_path='').todict()) 25 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/1/api-v1-json-data-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/1/api-v1-json-data-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/1/api-v1-json-data-features-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/1/api-v1-json-data-features-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/1/api-v1-json-data-qualities-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/1/api-v1-json-data-qualities-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/1/data-v1-download-1.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/1/data-v1-download-1.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/1119/api-v1-json-data-1119.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-1119.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/1119/api-v1-json-data-features-1119.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-features-1119.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-data_version-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-data_version-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/1119/api-v1-json-data-qualities-1119.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-qualities-1119.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/1119/data-v1-download-54002.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/1119/data-v1-download-54002.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/2/api-v1-json-data-2.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/2/api-v1-json-data-2.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/2/api-v1-json-data-features-2.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/2/api-v1-json-data-features-2.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/2/api-v1-json-data-list-data_name-anneal-limit-2-data_version-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/2/api-v1-json-data-list-data_name-anneal-limit-2-data_version-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/2/api-v1-json-data-list-data_name-anneal-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/2/api-v1-json-data-list-data_name-anneal-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/2/api-v1-json-data-qualities-2.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/2/api-v1-json-data-qualities-2.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/2/data-v1-download-1666876.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/2/data-v1-download-1666876.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/api-v1-json-data-292.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/292/api-v1-json-data-292.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/api-v1-json-data-40981.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/292/api-v1-json-data-40981.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/api-v1-json-data-features-292.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/292/api-v1-json-data-features-292.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/api-v1-json-data-features-40981.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/292/api-v1-json-data-features-40981.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-data_version-1-status-deactivated.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-data_version-1-status-deactivated.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-data_version-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-data_version-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/data-v1-download-49822.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/292/data-v1-download-49822.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/3/api-v1-json-data-3.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/3/api-v1-json-data-3.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/3/api-v1-json-data-features-3.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/3/api-v1-json-data-features-3.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/3/api-v1-json-data-qualities-3.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/3/api-v1-json-data-qualities-3.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/3/data-v1-download-3.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/3/data-v1-download-3.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40589/api-v1-json-data-40589.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-40589.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40589/api-v1-json-data-features-40589.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-features-40589.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40589/api-v1-json-data-list-data_name-emotions-limit-2-data_version-3.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-list-data_name-emotions-limit-2-data_version-3.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40589/api-v1-json-data-list-data_name-emotions-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-list-data_name-emotions-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40589/api-v1-json-data-qualities-40589.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-qualities-40589.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40589/data-v1-download-4644182.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40589/data-v1-download-4644182.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40675/api-v1-json-data-40675.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-40675.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40675/api-v1-json-data-features-40675.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-features-40675.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-data_version-1-status-deactivated.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-data_version-1-status-deactivated.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-data_version-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-data_version-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40675/api-v1-json-data-qualities-40675.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-qualities-40675.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40675/data-v1-download-4965250.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40675/data-v1-download-4965250.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40945/api-v1-json-data-40945.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40945/api-v1-json-data-40945.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40945/api-v1-json-data-features-40945.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40945/api-v1-json-data-features-40945.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40945/api-v1-json-data-qualities-40945.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40945/api-v1-json-data-qualities-40945.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40945/data-v1-download-16826755.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40945/data-v1-download-16826755.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40966/api-v1-json-data-40966.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-40966.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40966/api-v1-json-data-features-40966.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-features-40966.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40966/api-v1-json-data-list-data_name-miceprotein-limit-2-data_version-4.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-list-data_name-miceprotein-limit-2-data_version-4.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40966/api-v1-json-data-list-data_name-miceprotein-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-list-data_name-miceprotein-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40966/api-v1-json-data-qualities-40966.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-qualities-40966.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40966/data-v1-download-17928620.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/40966/data-v1-download-17928620.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/561/api-v1-json-data-561.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/561/api-v1-json-data-561.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/561/api-v1-json-data-features-561.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/561/api-v1-json-data-features-561.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/561/api-v1-json-data-list-data_name-cpu-limit-2-data_version-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/561/api-v1-json-data-list-data_name-cpu-limit-2-data_version-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/561/api-v1-json-data-list-data_name-cpu-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/561/api-v1-json-data-list-data_name-cpu-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/561/api-v1-json-data-qualities-561.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/561/api-v1-json-data-qualities-561.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/561/data-v1-download-52739.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/561/data-v1-download-52739.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/61/api-v1-json-data-61.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/61/api-v1-json-data-61.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/61/api-v1-json-data-features-61.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/61/api-v1-json-data-features-61.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/61/api-v1-json-data-list-data_name-iris-limit-2-data_version-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/61/api-v1-json-data-list-data_name-iris-limit-2-data_version-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/61/api-v1-json-data-list-data_name-iris-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/61/api-v1-json-data-list-data_name-iris-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/61/api-v1-json-data-qualities-61.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/61/api-v1-json-data-qualities-61.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/61/data-v1-download-61.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/61/data-v1-download-61.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/62/api-v1-json-data-62.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/62/api-v1-json-data-62.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/62/api-v1-json-data-features-62.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/62/api-v1-json-data-features-62.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/62/api-v1-json-data-qualities-62.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/62/api-v1-json-data-qualities-62.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/62/data-v1-download-52352.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/datasets/tests/data/openml/62/data-v1-download-52352.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_classification.txt: -------------------------------------------------------------------------------- 1 | # comment 2 | # note: the next line contains a tab 3 | 1.0 3:2.5 11:-5.2 16:1.5 # and an inline comment 4 | 2.0 6:1.0 13:-3 5 | # another comment 6 | 3.0 21:27 7 | 4.0 2:1.234567890123456e10 # double precision value 8 | 1.0 # empty line, all zeros 9 | 2.0 3:0 # explicit zeros 10 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_invalid.txt: -------------------------------------------------------------------------------- 1 | python 2:2.5 10:-5.2 15:1.5 2 | 2.0 5:1.0 12:-3 3 | 3.0 20:27 4 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_invalid_order.txt: -------------------------------------------------------------------------------- 1 | -1 5:2.5 2:-5.2 15:1.5 2 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_multilabel.txt: -------------------------------------------------------------------------------- 1 | # multilabel dataset in SVMlight format 2 | 1,0 2:2.5 10:-5.2 15:1.5 3 | 2 5:1.0 12:-3 4 | 2:3.5 11:26 5 | 1,2 20:27 6 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/test_california_housing.py: -------------------------------------------------------------------------------- 1 | """Test the california_housing loader. 2 | 3 | Skipped if california_housing is not already downloaded to data_home. 4 | """ 5 | 6 | from sklearn.datasets import fetch_california_housing 7 | from sklearn.utils._testing import SkipTest 8 | from sklearn.datasets.tests.test_common import check_return_X_y 9 | from functools import partial 10 | 11 | 12 | def fetch(*args, **kwargs): 13 | return fetch_california_housing(*args, download_if_missing=False, **kwargs) 14 | 15 | 16 | def test_fetch(): 17 | try: 18 | data = fetch() 19 | except IOError: 20 | raise SkipTest("California housing dataset can not be loaded.") 21 | assert((20640, 8) == data.data.shape) 22 | assert((20640, ) == data.target.shape) 23 | 24 | # test return_X_y option 25 | fetch_func = partial(fetch) 26 | check_return_X_y(data, fetch_func) 27 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/test_common.py: -------------------------------------------------------------------------------- 1 | """Test loaders for common functionality. 2 | """ 3 | 4 | 5 | def check_return_X_y(bunch, fetch_func_partial): 6 | X_y_tuple = fetch_func_partial(return_X_y=True) 7 | assert isinstance(X_y_tuple, tuple) 8 | assert X_y_tuple[0].shape == bunch.data.shape 9 | assert X_y_tuple[1].shape == bunch.target.shape 10 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/test_covtype.py: -------------------------------------------------------------------------------- 1 | """Test the covtype loader. 2 | 3 | Skipped if covtype is not already downloaded to data_home. 4 | """ 5 | 6 | from sklearn.datasets import fetch_covtype 7 | from sklearn.utils._testing import SkipTest 8 | from sklearn.datasets.tests.test_common import check_return_X_y 9 | from functools import partial 10 | 11 | 12 | def fetch(*args, **kwargs): 13 | return fetch_covtype(*args, download_if_missing=False, **kwargs) 14 | 15 | 16 | def test_fetch(): 17 | try: 18 | data1 = fetch(shuffle=True, random_state=42) 19 | except IOError: 20 | raise SkipTest("Covertype dataset can not be loaded.") 21 | 22 | data2 = fetch(shuffle=True, random_state=37) 23 | 24 | X1, X2 = data1['data'], data2['data'] 25 | assert (581012, 54) == X1.shape 26 | assert X1.shape == X2.shape 27 | 28 | assert X1.sum() == X2.sum() 29 | 30 | y1, y2 = data1['target'], data2['target'] 31 | assert (X1.shape[0],) == y1.shape 32 | assert (X1.shape[0],) == y2.shape 33 | 34 | # test return_X_y option 35 | fetch_func = partial(fetch) 36 | check_return_X_y(data1, fetch_func) 37 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/test_olivetti_faces.py: -------------------------------------------------------------------------------- 1 | """Test Olivetti faces fetcher, if the data is available.""" 2 | import pytest 3 | import numpy as np 4 | 5 | from sklearn import datasets 6 | from sklearn.utils import Bunch 7 | from sklearn.datasets.tests.test_common import check_return_X_y 8 | 9 | from sklearn.utils._testing import assert_array_equal 10 | 11 | 12 | def _is_olivetti_faces_not_available(): 13 | try: 14 | datasets.fetch_olivetti_faces(download_if_missing=False) 15 | return False 16 | except IOError: 17 | return True 18 | 19 | 20 | @pytest.mark.skipif( 21 | _is_olivetti_faces_not_available(), 22 | reason='Download Olivetti faces dataset to run this test' 23 | ) 24 | def test_olivetti_faces(): 25 | data = datasets.fetch_olivetti_faces(shuffle=True, random_state=0) 26 | 27 | assert isinstance(data, Bunch) 28 | for expected_keys in ('data', 'images', 'target', 'DESCR'): 29 | assert expected_keys in data.keys() 30 | 31 | assert data.data.shape == (400, 4096) 32 | assert data.images.shape == (400, 64, 64) 33 | assert data.target.shape == (400,) 34 | assert_array_equal(np.unique(np.sort(data.target)), np.arange(40)) 35 | 36 | # test the return_X_y option 37 | check_return_X_y(data, datasets.fetch_olivetti_faces) 38 | -------------------------------------------------------------------------------- /sklearn/decomposition/_cdnmf_fast.pyx: -------------------------------------------------------------------------------- 1 | # cython: cdivision=True 2 | # cython: boundscheck=False 3 | # cython: wraparound=False 4 | 5 | # Author: Mathieu Blondel, Tom Dupre la Tour 6 | # License: BSD 3 clause 7 | 8 | cimport cython 9 | from libc.math cimport fabs 10 | 11 | 12 | def _update_cdnmf_fast(double[:, ::1] W, double[:, :] HHt, double[:, :] XHt, 13 | Py_ssize_t[::1] permutation): 14 | cdef double violation = 0 15 | cdef Py_ssize_t n_components = W.shape[1] 16 | cdef Py_ssize_t n_samples = W.shape[0] # n_features for H update 17 | cdef double grad, pg, hess 18 | cdef Py_ssize_t i, r, s, t 19 | 20 | with nogil: 21 | for s in range(n_components): 22 | t = permutation[s] 23 | 24 | for i in range(n_samples): 25 | # gradient = GW[t, i] where GW = np.dot(W, HHt) - XHt 26 | grad = -XHt[i, t] 27 | 28 | for r in range(n_components): 29 | grad += HHt[t, r] * W[i, r] 30 | 31 | # projected gradient 32 | pg = min(0., grad) if W[i, t] == 0 else grad 33 | violation += fabs(pg) 34 | 35 | # Hessian 36 | hess = HHt[t, t] 37 | 38 | if hess != 0: 39 | W[i, t] = max(W[i, t] - grad / hess, 0.) 40 | 41 | return violation 42 | -------------------------------------------------------------------------------- /sklearn/decomposition/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy 3 | from numpy.distutils.misc_util import Configuration 4 | 5 | 6 | def configuration(parent_package="", top_path=None): 7 | config = Configuration("decomposition", parent_package, top_path) 8 | 9 | libraries = [] 10 | if os.name == 'posix': 11 | libraries.append('m') 12 | 13 | config.add_extension("_online_lda_fast", 14 | sources=["_online_lda_fast.pyx"], 15 | include_dirs=[numpy.get_include()], 16 | libraries=libraries) 17 | 18 | config.add_extension('_cdnmf_fast', 19 | sources=['_cdnmf_fast.pyx'], 20 | include_dirs=[numpy.get_include()], 21 | libraries=libraries) 22 | 23 | config.add_subpackage("tests") 24 | 25 | return config 26 | 27 | if __name__ == "__main__": 28 | from numpy.distutils.core import setup 29 | setup(**configuration().todict()) 30 | -------------------------------------------------------------------------------- /sklearn/decomposition/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/decomposition/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/ensemble/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.ensemble` module includes ensemble-based methods for 3 | classification, regression and anomaly detection. 4 | """ 5 | 6 | from ._base import BaseEnsemble 7 | from ._forest import RandomForestClassifier 8 | from ._forest import RandomForestRegressor 9 | from ._forest import RandomTreesEmbedding 10 | from ._forest import ExtraTreesClassifier 11 | from ._forest import ExtraTreesRegressor 12 | from ._bagging import BaggingClassifier 13 | from ._bagging import BaggingRegressor 14 | from ._iforest import IsolationForest 15 | from ._weight_boosting import AdaBoostClassifier 16 | from ._weight_boosting import AdaBoostRegressor 17 | from ._gb import GradientBoostingClassifier 18 | from ._gb import GradientBoostingRegressor 19 | from ._voting import VotingClassifier 20 | from ._voting import VotingRegressor 21 | from ._stacking import StackingClassifier 22 | from ._stacking import StackingRegressor 23 | 24 | from . import partial_dependence 25 | 26 | __all__ = ["BaseEnsemble", 27 | "RandomForestClassifier", "RandomForestRegressor", 28 | "RandomTreesEmbedding", "ExtraTreesClassifier", 29 | "ExtraTreesRegressor", "BaggingClassifier", 30 | "BaggingRegressor", "IsolationForest", "GradientBoostingClassifier", 31 | "GradientBoostingRegressor", "AdaBoostClassifier", 32 | "AdaBoostRegressor", "VotingClassifier", "VotingRegressor", 33 | "StackingClassifier", "StackingRegressor", 34 | "partial_dependence"] 35 | -------------------------------------------------------------------------------- /sklearn/ensemble/_hist_gradient_boosting/__init__.py: -------------------------------------------------------------------------------- 1 | """This module implements histogram-based gradient boosting estimators. 2 | 3 | The implementation is a port from pygbm which is itself strongly inspired 4 | from LightGBM. 5 | """ 6 | -------------------------------------------------------------------------------- /sklearn/ensemble/_hist_gradient_boosting/common.pxd: -------------------------------------------------------------------------------- 1 | # cython: language_level=3 2 | import numpy as np 3 | cimport numpy as np 4 | 5 | 6 | ctypedef np.npy_float64 X_DTYPE_C 7 | ctypedef np.npy_uint8 X_BINNED_DTYPE_C 8 | ctypedef np.npy_float64 Y_DTYPE_C 9 | ctypedef np.npy_float32 G_H_DTYPE_C 10 | 11 | cdef packed struct hist_struct: 12 | # Same as histogram dtype but we need a struct to declare views. It needs 13 | # to be packed since by default numpy dtypes aren't aligned 14 | Y_DTYPE_C sum_gradients 15 | Y_DTYPE_C sum_hessians 16 | unsigned int count 17 | 18 | 19 | cdef packed struct node_struct: 20 | # Equivalent struct to PREDICTOR_RECORD_DTYPE to use in memory views. It 21 | # needs to be packed since by default numpy dtypes aren't aligned 22 | Y_DTYPE_C value 23 | unsigned int count 24 | unsigned int feature_idx 25 | X_DTYPE_C threshold 26 | unsigned char missing_go_to_left 27 | unsigned int left 28 | unsigned int right 29 | Y_DTYPE_C gain 30 | unsigned int depth 31 | unsigned char is_leaf 32 | X_BINNED_DTYPE_C bin_threshold 33 | -------------------------------------------------------------------------------- /sklearn/ensemble/_hist_gradient_boosting/common.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # Y_DYTPE is the dtype to which the targets y are converted to. This is also 4 | # dtype for leaf values, gains, and sums of gradients / hessians. The gradients 5 | # and hessians arrays are stored as floats to avoid using too much memory. 6 | Y_DTYPE = np.float64 7 | X_DTYPE = np.float64 8 | X_BINNED_DTYPE = np.uint8 # hence max_bins == 256 9 | # dtype for gradients and hessians arrays 10 | G_H_DTYPE = np.float32 11 | 12 | HISTOGRAM_DTYPE = np.dtype([ 13 | ('sum_gradients', Y_DTYPE), # sum of sample gradients in bin 14 | ('sum_hessians', Y_DTYPE), # sum of sample hessians in bin 15 | ('count', np.uint32), # number of samples in bin 16 | ]) 17 | 18 | PREDICTOR_RECORD_DTYPE = np.dtype([ 19 | ('value', Y_DTYPE), 20 | ('count', np.uint32), 21 | ('feature_idx', np.uint32), 22 | ('threshold', X_DTYPE), 23 | ('missing_go_to_left', np.uint8), 24 | ('left', np.uint32), 25 | ('right', np.uint32), 26 | ('gain', Y_DTYPE), 27 | ('depth', np.uint32), 28 | ('is_leaf', np.uint8), 29 | ('bin_threshold', X_BINNED_DTYPE), 30 | ]) 31 | 32 | ALMOST_INF = 1e300 # see LightGBM AvoidInf() 33 | -------------------------------------------------------------------------------- /sklearn/ensemble/_hist_gradient_boosting/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/ensemble/_hist_gradient_boosting/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/ensemble/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/ensemble/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.experimental` module provides importable modules that enable 3 | the use of experimental features or estimators. 4 | 5 | The features and estimators that are experimental aren't subject to 6 | deprecation cycles. Use them at your own risks! 7 | """ 8 | -------------------------------------------------------------------------------- /sklearn/experimental/enable_hist_gradient_boosting.py: -------------------------------------------------------------------------------- 1 | """Enables histogram-based gradient boosting estimators. 2 | 3 | The API and results of these estimators might change without any deprecation 4 | cycle. 5 | 6 | Importing this file dynamically sets the 7 | :class:`sklearn.ensemble.HistGradientBoostingClassifier` and 8 | :class:`sklearn.ensemble.HistGradientBoostingRegressor` as attributes of the 9 | ensemble module:: 10 | 11 | >>> # explicitly require this experimental feature 12 | >>> from sklearn.experimental import enable_hist_gradient_boosting # noqa 13 | >>> # now you can import normally from ensemble 14 | >>> from sklearn.ensemble import HistGradientBoostingClassifier 15 | >>> from sklearn.ensemble import HistGradientBoostingRegressor 16 | 17 | 18 | The ``# noqa`` comment comment can be removed: it just tells linters like 19 | flake8 to ignore the import, which appears as unused. 20 | """ 21 | 22 | from ..ensemble._hist_gradient_boosting.gradient_boosting import ( 23 | HistGradientBoostingClassifier, 24 | HistGradientBoostingRegressor 25 | ) 26 | 27 | from .. import ensemble 28 | 29 | ensemble.HistGradientBoostingClassifier = HistGradientBoostingClassifier 30 | ensemble.HistGradientBoostingRegressor = HistGradientBoostingRegressor 31 | ensemble.__all__ += ['HistGradientBoostingClassifier', 32 | 'HistGradientBoostingRegressor'] 33 | -------------------------------------------------------------------------------- /sklearn/experimental/enable_iterative_imputer.py: -------------------------------------------------------------------------------- 1 | """Enables IterativeImputer 2 | 3 | The API and results of this estimator might change without any deprecation 4 | cycle. 5 | 6 | Importing this file dynamically sets :class:`sklearn.impute.IterativeImputer` 7 | as an attribute of the impute module:: 8 | 9 | >>> # explicitly require this experimental feature 10 | >>> from sklearn.experimental import enable_iterative_imputer # noqa 11 | >>> # now you can import normally from impute 12 | >>> from sklearn.impute import IterativeImputer 13 | """ 14 | 15 | from ..impute._iterative import IterativeImputer 16 | from .. import impute 17 | 18 | impute.IterativeImputer = IterativeImputer 19 | impute.__all__ += ['IterativeImputer'] 20 | -------------------------------------------------------------------------------- /sklearn/experimental/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/experimental/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/experimental/tests/test_enable_iterative_imputer.py: -------------------------------------------------------------------------------- 1 | """Tests for making sure experimental imports work as expected.""" 2 | 3 | import textwrap 4 | 5 | from sklearn.utils._testing import assert_run_python_script 6 | 7 | 8 | def test_imports_strategies(): 9 | # Make sure different import strategies work or fail as expected. 10 | 11 | # Since Python caches the imported modules, we need to run a child process 12 | # for every test case. Else, the tests would not be independent 13 | # (manually removing the imports from the cache (sys.modules) is not 14 | # recommended and can lead to many complications). 15 | 16 | good_import = """ 17 | from sklearn.experimental import enable_iterative_imputer 18 | from sklearn.impute import IterativeImputer 19 | """ 20 | assert_run_python_script(textwrap.dedent(good_import)) 21 | 22 | good_import_with_ensemble_first = """ 23 | import sklearn.ensemble 24 | from sklearn.experimental import enable_iterative_imputer 25 | from sklearn.impute import IterativeImputer 26 | """ 27 | assert_run_python_script(textwrap.dedent(good_import_with_ensemble_first)) 28 | 29 | bad_imports = """ 30 | import pytest 31 | 32 | with pytest.raises(ImportError): 33 | from sklearn.impute import IterativeImputer 34 | 35 | import sklearn.experimental 36 | with pytest.raises(ImportError): 37 | from sklearn.impute import IterativeImputer 38 | """ 39 | assert_run_python_script(textwrap.dedent(bad_imports)) 40 | -------------------------------------------------------------------------------- /sklearn/externals/README: -------------------------------------------------------------------------------- 1 | This directory contains bundled external dependencies that are updated 2 | every once in a while. 3 | 4 | Note for distribution packagers: if you want to remove the duplicated 5 | code and depend on a packaged version, we suggest that you simply do a 6 | symbolic link in this directory. 7 | 8 | -------------------------------------------------------------------------------- /sklearn/externals/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | External, bundled dependencies. 4 | 5 | """ 6 | -------------------------------------------------------------------------------- /sklearn/externals/conftest.py: -------------------------------------------------------------------------------- 1 | # Do not collect any tests in externals. This is more robust than using 2 | # --ignore because --ignore needs a path and it is not convenient to pass in 3 | # the externals path (very long install-dependent path in site-packages) when 4 | # using --pyargs 5 | def pytest_ignore_collect(path, config): 6 | return True 7 | 8 | -------------------------------------------------------------------------------- /sklearn/externals/joblib/__init__.py: -------------------------------------------------------------------------------- 1 | # Import necessary to preserve backward compatibility of pickles 2 | import sys 3 | import warnings 4 | 5 | from joblib import * 6 | 7 | 8 | msg = ("sklearn.externals.joblib is deprecated in 0.21 and will be removed " 9 | "in 0.23. Please import this functionality directly from joblib, " 10 | "which can be installed with: pip install joblib. If this warning is " 11 | "raised when loading pickled models, you may need to re-serialize " 12 | "those models with scikit-learn 0.21+.") 13 | 14 | if not hasattr(sys, "_is_pytest_session"): 15 | warnings.warn(msg, category=FutureWarning) 16 | -------------------------------------------------------------------------------- /sklearn/externals/joblib/numpy_pickle.py: -------------------------------------------------------------------------------- 1 | # Import necessary to preserve backward compatibliity of pickles 2 | 3 | from joblib.numpy_pickle import * 4 | -------------------------------------------------------------------------------- /sklearn/externals/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | def configuration(parent_package='', top_path=None): 5 | from numpy.distutils.misc_util import Configuration 6 | config = Configuration('externals', parent_package, top_path) 7 | config.add_subpackage('joblib') 8 | 9 | return config 10 | -------------------------------------------------------------------------------- /sklearn/feature_extraction/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.feature_extraction` module deals with feature extraction 3 | from raw data. It currently includes methods to extract features from text and 4 | images. 5 | """ 6 | 7 | from ._dict_vectorizer import DictVectorizer 8 | from ._hashing import FeatureHasher 9 | from .image import img_to_graph, grid_to_graph 10 | from . import text 11 | 12 | __all__ = ['DictVectorizer', 'image', 'img_to_graph', 'grid_to_graph', 'text', 13 | 'FeatureHasher'] 14 | -------------------------------------------------------------------------------- /sklearn/feature_extraction/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | 4 | 5 | def configuration(parent_package='', top_path=None): 6 | import numpy 7 | from numpy.distutils.misc_util import Configuration 8 | 9 | config = Configuration('feature_extraction', parent_package, top_path) 10 | libraries = [] 11 | if os.name == 'posix': 12 | libraries.append('m') 13 | 14 | if platform.python_implementation() != 'PyPy': 15 | config.add_extension('_hashing_fast', 16 | sources=['_hashing_fast.pyx'], 17 | include_dirs=[numpy.get_include()], 18 | libraries=libraries) 19 | config.add_subpackage("tests") 20 | 21 | return config 22 | -------------------------------------------------------------------------------- /sklearn/feature_extraction/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/feature_extraction/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/feature_selection/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.feature_selection` module implements feature selection 3 | algorithms. It currently includes univariate filter selection methods and the 4 | recursive feature elimination algorithm. 5 | """ 6 | 7 | from ._univariate_selection import chi2 8 | from ._univariate_selection import f_classif 9 | from ._univariate_selection import f_oneway 10 | from ._univariate_selection import f_regression 11 | from ._univariate_selection import SelectPercentile 12 | from ._univariate_selection import SelectKBest 13 | from ._univariate_selection import SelectFpr 14 | from ._univariate_selection import SelectFdr 15 | from ._univariate_selection import SelectFwe 16 | from ._univariate_selection import GenericUnivariateSelect 17 | 18 | from ._variance_threshold import VarianceThreshold 19 | 20 | from ._rfe import RFE 21 | from ._rfe import RFECV 22 | 23 | from ._from_model import SelectFromModel 24 | 25 | from ._mutual_info import mutual_info_regression, mutual_info_classif 26 | 27 | 28 | __all__ = ['GenericUnivariateSelect', 29 | 'RFE', 30 | 'RFECV', 31 | 'SelectFdr', 32 | 'SelectFpr', 33 | 'SelectFwe', 34 | 'SelectKBest', 35 | 'SelectFromModel', 36 | 'SelectPercentile', 37 | 'VarianceThreshold', 38 | 'chi2', 39 | 'f_classif', 40 | 'f_oneway', 41 | 'f_regression', 42 | 'mutual_info_classif', 43 | 'mutual_info_regression'] 44 | -------------------------------------------------------------------------------- /sklearn/feature_selection/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/feature_selection/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/gaussian_process/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Author: Jan Hendrik Metzen 4 | # Vincent Dubourg 5 | # (mostly translation, see implementation details) 6 | # License: BSD 3 clause 7 | 8 | """ 9 | The :mod:`sklearn.gaussian_process` module implements Gaussian Process 10 | based regression and classification. 11 | """ 12 | 13 | from ._gpr import GaussianProcessRegressor 14 | from ._gpc import GaussianProcessClassifier 15 | from . import kernels 16 | 17 | 18 | __all__ = ['GaussianProcessRegressor', 'GaussianProcessClassifier', 19 | 'kernels'] 20 | -------------------------------------------------------------------------------- /sklearn/gaussian_process/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/gaussian_process/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/impute/__init__.py: -------------------------------------------------------------------------------- 1 | """Transformers for missing value imputation""" 2 | 3 | from ._base import MissingIndicator, SimpleImputer 4 | from ._knn import KNNImputer 5 | 6 | __all__ = [ 7 | 'MissingIndicator', 8 | 'SimpleImputer', 9 | 'KNNImputer' 10 | ] 11 | -------------------------------------------------------------------------------- /sklearn/impute/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/impute/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/inspection/__init__.py: -------------------------------------------------------------------------------- 1 | """The :mod:`sklearn.inspection` module includes tools for model inspection.""" 2 | from ._partial_dependence import partial_dependence 3 | from ._partial_dependence import plot_partial_dependence 4 | from ._partial_dependence import PartialDependenceDisplay 5 | from ._permutation_importance import permutation_importance 6 | 7 | __all__ = [ 8 | 'partial_dependence', 9 | 'plot_partial_dependence', 10 | 'permutation_importance', 11 | 'PartialDependenceDisplay' 12 | ] 13 | -------------------------------------------------------------------------------- /sklearn/inspection/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/inspection/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/linear_model/_sgd_fast.pxd: -------------------------------------------------------------------------------- 1 | # License: BSD 3 clause 2 | """Helper to load LossFunction from sgd_fast.pyx to sag_fast.pyx""" 3 | 4 | cdef class LossFunction: 5 | cdef double loss(self, double p, double y) nogil 6 | cdef double _dloss(self, double p, double y) nogil 7 | 8 | 9 | cdef class Regression(LossFunction): 10 | cdef double loss(self, double p, double y) nogil 11 | cdef double _dloss(self, double p, double y) nogil 12 | 13 | 14 | cdef class Classification(LossFunction): 15 | cdef double loss(self, double p, double y) nogil 16 | cdef double _dloss(self, double p, double y) nogil 17 | 18 | 19 | cdef class Log(Classification): 20 | cdef double loss(self, double p, double y) nogil 21 | cdef double _dloss(self, double p, double y) nogil 22 | 23 | 24 | cdef class SquaredLoss(Regression): 25 | cdef double loss(self, double p, double y) nogil 26 | cdef double _dloss(self, double p, double y) nogil 27 | -------------------------------------------------------------------------------- /sklearn/linear_model/_sgd_fast_helpers.h: -------------------------------------------------------------------------------- 1 | // We cannot directly reuse the npy_isfinite from npy_math.h as numpy 2 | // and scikit-learn are not necessarily built with the same compiler. 3 | // When re-declaring the functions in the template for cython 4 | // specific for each parameter input type, it needs to be 2 different functions 5 | // as cython doesn't support function overloading. 6 | #ifdef _MSC_VER 7 | # include 8 | # define skl_isfinite _finite 9 | # define skl_isfinite32 _finite 10 | # define skl_isfinite64 _finite 11 | #else 12 | # include 13 | # define skl_isfinite npy_isfinite 14 | # define skl_isfinite32 npy_isfinite 15 | # define skl_isfinite64 npy_isfinite 16 | #endif 17 | -------------------------------------------------------------------------------- /sklearn/linear_model/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy 3 | 4 | from sklearn._build_utils import gen_from_templates 5 | 6 | 7 | def configuration(parent_package='', top_path=None): 8 | from numpy.distutils.misc_util import Configuration 9 | 10 | config = Configuration('linear_model', parent_package, top_path) 11 | 12 | libraries = [] 13 | if os.name == 'posix': 14 | libraries.append('m') 15 | 16 | config.add_extension('_cd_fast', 17 | sources=['_cd_fast.pyx'], 18 | include_dirs=numpy.get_include(), 19 | libraries=libraries) 20 | 21 | config.add_extension('_sgd_fast', 22 | sources=['_sgd_fast.pyx'], 23 | include_dirs=numpy.get_include(), 24 | libraries=libraries) 25 | 26 | # generate sag_fast from template 27 | templates = ['sklearn/linear_model/_sag_fast.pyx.tp'] 28 | gen_from_templates(templates, top_path) 29 | 30 | config.add_extension('_sag_fast', 31 | sources=['_sag_fast.pyx'], 32 | include_dirs=numpy.get_include()) 33 | 34 | # add other directories 35 | config.add_subpackage('tests') 36 | 37 | return config 38 | 39 | 40 | if __name__ == '__main__': 41 | from numpy.distutils.core import setup 42 | setup(**configuration(top_path='').todict()) 43 | -------------------------------------------------------------------------------- /sklearn/linear_model/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/linear_model/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/manifold/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.manifold` module implements data embedding techniques. 3 | """ 4 | 5 | from ._locally_linear import locally_linear_embedding, LocallyLinearEmbedding 6 | from ._isomap import Isomap 7 | from ._mds import MDS, smacof 8 | from ._spectral_embedding import SpectralEmbedding, spectral_embedding 9 | from ._t_sne import TSNE, trustworthiness 10 | 11 | __all__ = ['locally_linear_embedding', 'LocallyLinearEmbedding', 'Isomap', 12 | 'MDS', 'smacof', 'SpectralEmbedding', 'spectral_embedding', "TSNE", 13 | 'trustworthiness'] 14 | -------------------------------------------------------------------------------- /sklearn/manifold/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy 4 | 5 | 6 | def configuration(parent_package="", top_path=None): 7 | from numpy.distutils.misc_util import Configuration 8 | 9 | config = Configuration("manifold", parent_package, top_path) 10 | 11 | libraries = [] 12 | if os.name == 'posix': 13 | libraries.append('m') 14 | 15 | config.add_extension("_utils", 16 | sources=["_utils.pyx"], 17 | include_dirs=[numpy.get_include()], 18 | libraries=libraries, 19 | extra_compile_args=["-O3"]) 20 | 21 | config.add_extension("_barnes_hut_tsne", 22 | sources=["_barnes_hut_tsne.pyx"], 23 | include_dirs=[numpy.get_include()], 24 | libraries=libraries, 25 | extra_compile_args=['-O3']) 26 | 27 | config.add_subpackage('tests') 28 | 29 | return config 30 | 31 | 32 | if __name__ == "__main__": 33 | from numpy.distutils.core import setup 34 | setup(**configuration().todict()) 35 | -------------------------------------------------------------------------------- /sklearn/manifold/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/manifold/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/metrics/_plot/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/metrics/_plot/__init__.py -------------------------------------------------------------------------------- /sklearn/metrics/_plot/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/metrics/_plot/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/metrics/cluster/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy 4 | from numpy.distutils.misc_util import Configuration 5 | 6 | 7 | def configuration(parent_package="", top_path=None): 8 | config = Configuration("cluster", parent_package, top_path) 9 | libraries = [] 10 | if os.name == 'posix': 11 | libraries.append('m') 12 | config.add_extension("_expected_mutual_info_fast", 13 | sources=["_expected_mutual_info_fast.pyx"], 14 | include_dirs=[numpy.get_include()], 15 | libraries=libraries) 16 | 17 | config.add_subpackage("tests") 18 | 19 | return config 20 | 21 | 22 | if __name__ == "__main__": 23 | from numpy.distutils.core import setup 24 | setup(**configuration().todict()) 25 | -------------------------------------------------------------------------------- /sklearn/metrics/cluster/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/metrics/cluster/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/metrics/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from numpy.distutils.misc_util import Configuration 4 | 5 | 6 | def configuration(parent_package="", top_path=None): 7 | config = Configuration("metrics", parent_package, top_path) 8 | 9 | libraries = [] 10 | if os.name == 'posix': 11 | libraries.append('m') 12 | 13 | config.add_subpackage('_plot') 14 | config.add_subpackage('_plot.tests') 15 | config.add_subpackage('cluster') 16 | 17 | config.add_extension("_pairwise_fast", 18 | sources=["_pairwise_fast.pyx"], 19 | libraries=libraries) 20 | 21 | config.add_subpackage('tests') 22 | 23 | return config 24 | 25 | 26 | if __name__ == "__main__": 27 | from numpy.distutils.core import setup 28 | setup(**configuration().todict()) 29 | -------------------------------------------------------------------------------- /sklearn/metrics/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/metrics/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/mixture/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.mixture` module implements mixture modeling algorithms. 3 | """ 4 | 5 | from ._gaussian_mixture import GaussianMixture 6 | from ._bayesian_mixture import BayesianGaussianMixture 7 | 8 | 9 | __all__ = ['GaussianMixture', 10 | 'BayesianGaussianMixture'] 11 | -------------------------------------------------------------------------------- /sklearn/mixture/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/mixture/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/mixture/tests/test_mixture.py: -------------------------------------------------------------------------------- 1 | # Author: Guillaume Lemaitre 2 | # License: BSD 3 clause 3 | 4 | import pytest 5 | import numpy as np 6 | 7 | from sklearn.mixture import GaussianMixture 8 | from sklearn.mixture import BayesianGaussianMixture 9 | 10 | 11 | @pytest.mark.parametrize( 12 | "estimator", 13 | [GaussianMixture(), 14 | BayesianGaussianMixture()] 15 | ) 16 | def test_gaussian_mixture_n_iter(estimator): 17 | # check that n_iter is the number of iteration performed. 18 | rng = np.random.RandomState(0) 19 | X = rng.rand(10, 5) 20 | max_iter = 1 21 | estimator.set_params(max_iter=max_iter) 22 | estimator.fit(X) 23 | assert estimator.n_iter_ == max_iter 24 | -------------------------------------------------------------------------------- /sklearn/model_selection/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/model_selection/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/model_selection/tests/common.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common utilities for testing model selection. 3 | """ 4 | 5 | import numpy as np 6 | 7 | from sklearn.model_selection import KFold 8 | 9 | 10 | class OneTimeSplitter: 11 | """A wrapper to make KFold single entry cv iterator""" 12 | def __init__(self, n_splits=4, n_samples=99): 13 | self.n_splits = n_splits 14 | self.n_samples = n_samples 15 | self.indices = iter(KFold(n_splits=n_splits).split(np.ones(n_samples))) 16 | 17 | def split(self, X=None, y=None, groups=None): 18 | """Split can be called only once""" 19 | for index in self.indices: 20 | yield index 21 | 22 | def get_n_splits(self, X=None, y=None, groups=None): 23 | return self.n_splits 24 | -------------------------------------------------------------------------------- /sklearn/neighbors/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.neighbors` module implements the k-nearest neighbors 3 | algorithm. 4 | """ 5 | 6 | from ._ball_tree import BallTree 7 | from ._kd_tree import KDTree 8 | from ._dist_metrics import DistanceMetric 9 | from ._graph import kneighbors_graph, radius_neighbors_graph 10 | from ._graph import KNeighborsTransformer, RadiusNeighborsTransformer 11 | from ._unsupervised import NearestNeighbors 12 | from ._classification import KNeighborsClassifier, RadiusNeighborsClassifier 13 | from ._regression import KNeighborsRegressor, RadiusNeighborsRegressor 14 | from ._nearest_centroid import NearestCentroid 15 | from ._kde import KernelDensity 16 | from ._lof import LocalOutlierFactor 17 | from ._nca import NeighborhoodComponentsAnalysis 18 | from ._base import VALID_METRICS, VALID_METRICS_SPARSE 19 | 20 | __all__ = ['BallTree', 21 | 'DistanceMetric', 22 | 'KDTree', 23 | 'KNeighborsClassifier', 24 | 'KNeighborsRegressor', 25 | 'KNeighborsTransformer', 26 | 'NearestCentroid', 27 | 'NearestNeighbors', 28 | 'RadiusNeighborsClassifier', 29 | 'RadiusNeighborsRegressor', 30 | 'RadiusNeighborsTransformer', 31 | 'kneighbors_graph', 32 | 'radius_neighbors_graph', 33 | 'KernelDensity', 34 | 'LocalOutlierFactor', 35 | 'NeighborhoodComponentsAnalysis', 36 | 'VALID_METRICS', 37 | 'VALID_METRICS_SPARSE'] 38 | -------------------------------------------------------------------------------- /sklearn/neighbors/_typedefs.pxd: -------------------------------------------------------------------------------- 1 | #!python 2 | cimport numpy as np 3 | 4 | # Floating point/data type 5 | ctypedef np.float64_t DTYPE_t # WARNING: should match DTYPE in typedefs.pyx 6 | 7 | cdef enum: 8 | DTYPECODE = np.NPY_FLOAT64 9 | ITYPECODE = np.NPY_INTP 10 | 11 | # Index/integer type. 12 | # WARNING: ITYPE_t must be a signed integer type or you will have a bad time! 13 | ctypedef np.intp_t ITYPE_t # WARNING: should match ITYPE in typedefs.pyx 14 | 15 | # Fused type for certain operations 16 | ctypedef fused DITYPE_t: 17 | ITYPE_t 18 | DTYPE_t 19 | -------------------------------------------------------------------------------- /sklearn/neighbors/_typedefs.pyx: -------------------------------------------------------------------------------- 1 | #!python 2 | 3 | import numpy as np 4 | cimport numpy as np 5 | from libc.math cimport sqrt 6 | 7 | # use a hack to determine the associated numpy data types 8 | # NOTE: the following requires the buffer interface, only available in 9 | # numpy 1.5+. We'll choose the DTYPE by hand instead. 10 | #cdef ITYPE_t idummy 11 | #cdef ITYPE_t[:] idummy_view = &idummy 12 | #ITYPE = np.asarray(idummy_view).dtype 13 | ITYPE = np.intp # WARNING: this should match ITYPE_t in typedefs.pxd 14 | 15 | #cdef DTYPE_t ddummy 16 | #cdef DTYPE_t[:] ddummy_view = &ddummy 17 | #DTYPE = np.asarray(ddummy_view).dtype 18 | DTYPE = np.float64 # WARNING: this should match DTYPE_t in typedefs.pxd 19 | 20 | # some handy constants 21 | cdef DTYPE_t INF = np.inf 22 | cdef DTYPE_t PI = np.pi 23 | cdef DTYPE_t ROOT_2PI = sqrt(2 * PI) 24 | -------------------------------------------------------------------------------- /sklearn/neighbors/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/neighbors/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/neural_network/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.neural_network` module includes models based on neural 3 | networks. 4 | """ 5 | 6 | # License: BSD 3 clause 7 | 8 | from ._rbm import BernoulliRBM 9 | 10 | from ._multilayer_perceptron import MLPClassifier 11 | from ._multilayer_perceptron import MLPRegressor 12 | 13 | __all__ = ["BernoulliRBM", 14 | "MLPClassifier", 15 | "MLPRegressor"] 16 | -------------------------------------------------------------------------------- /sklearn/neural_network/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/neural_network/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/preprocessing/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def configuration(parent_package='', top_path=None): 5 | import numpy 6 | from numpy.distutils.misc_util import Configuration 7 | 8 | config = Configuration('preprocessing', parent_package, top_path) 9 | libraries = [] 10 | if os.name == 'posix': 11 | libraries.append('m') 12 | 13 | config.add_extension('_csr_polynomial_expansion', 14 | sources=['_csr_polynomial_expansion.pyx'], 15 | include_dirs=[numpy.get_include()], 16 | libraries=libraries) 17 | 18 | config.add_subpackage('tests') 19 | 20 | return config 21 | -------------------------------------------------------------------------------- /sklearn/preprocessing/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/preprocessing/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/semi_supervised/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.semi_supervised` module implements semi-supervised learning 3 | algorithms. These algorithms utilized small amounts of labeled data and large 4 | amounts of unlabeled data for classification tasks. This module includes Label 5 | Propagation. 6 | """ 7 | 8 | from ._label_propagation import LabelPropagation, LabelSpreading 9 | 10 | __all__ = ['LabelPropagation', 'LabelSpreading'] 11 | -------------------------------------------------------------------------------- /sklearn/semi_supervised/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/semi_supervised/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/svm/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.svm` module includes Support Vector Machine algorithms. 3 | """ 4 | 5 | # See http://scikit-learn.sourceforge.net/modules/svm.html for complete 6 | # documentation. 7 | 8 | # Author: Fabian Pedregosa with help from 9 | # the scikit-learn community. LibSVM and LibLinear are copyright 10 | # of their respective owners. 11 | # License: BSD 3 clause (C) INRIA 2010 12 | 13 | from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, \ 14 | LinearSVR 15 | from ._bounds import l1_min_c 16 | 17 | __all__ = ['LinearSVC', 18 | 'LinearSVR', 19 | 'NuSVC', 20 | 'NuSVR', 21 | 'OneClassSVM', 22 | 'SVC', 23 | 'SVR', 24 | 'l1_min_c'] 25 | -------------------------------------------------------------------------------- /sklearn/svm/src/liblinear/_cython_blas_helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef _CYTHON_BLAS_HELPERS_H 2 | #define _CYTHON_BLAS_HELPERS_H 3 | 4 | typedef double (*dot_func)(int, double*, int, double*, int); 5 | typedef void (*axpy_func)(int, double, double*, int, double*, int); 6 | typedef void (*scal_func)(int, double, double*, int); 7 | typedef double (*nrm2_func)(int, double*, int); 8 | 9 | typedef struct BlasFunctions{ 10 | dot_func dot; 11 | axpy_func axpy; 12 | scal_func scal; 13 | nrm2_func nrm2; 14 | } BlasFunctions; 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /sklearn/svm/src/liblinear/tron.h: -------------------------------------------------------------------------------- 1 | #ifndef _TRON_H 2 | #define _TRON_H 3 | 4 | #include "_cython_blas_helpers.h" 5 | 6 | class function 7 | { 8 | public: 9 | virtual double fun(double *w) = 0 ; 10 | virtual void grad(double *w, double *g) = 0 ; 11 | virtual void Hv(double *s, double *Hs) = 0 ; 12 | 13 | virtual int get_nr_variable(void) = 0 ; 14 | virtual ~function(void){} 15 | }; 16 | 17 | class TRON 18 | { 19 | public: 20 | TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000, BlasFunctions *blas = 0); 21 | ~TRON(); 22 | 23 | int tron(double *w); 24 | void set_print_string(void (*i_print) (const char *buf)); 25 | 26 | private: 27 | int trcg(double delta, double *g, double *s, double *r); 28 | double norm_inf(int n, double *x); 29 | 30 | double eps; 31 | int max_iter; 32 | function *fun_obj; 33 | BlasFunctions *blas; 34 | void info(const char *fmt,...); 35 | void (*tron_print_string)(const char *buf); 36 | }; 37 | #endif 38 | -------------------------------------------------------------------------------- /sklearn/svm/src/libsvm/LIBSVM_CHANGES: -------------------------------------------------------------------------------- 1 | Changes to Libsvm 2 | 3 | This is here mainly as checklist for incorporation of new versions of libsvm. 4 | 5 | * Add copyright to files svm.cpp and svm.h 6 | * Add random_seed support and call to srand in fit function 7 | 8 | The changes made with respect to upstream are detailed in the heading of svm.cpp 9 | -------------------------------------------------------------------------------- /sklearn/svm/src/libsvm/libsvm_template.cpp: -------------------------------------------------------------------------------- 1 | 2 | /* this is a hack to generate libsvm with both sparse and dense 3 | methods in the same binary*/ 4 | 5 | #define _DENSE_REP 6 | #include "svm.cpp" 7 | #undef _DENSE_REP 8 | #include "svm.cpp" 9 | -------------------------------------------------------------------------------- /sklearn/svm/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/svm/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/tests/test_build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import textwrap 4 | 5 | from sklearn import __version__ 6 | from sklearn.utils._openmp_helpers import _openmp_parallelism_enabled 7 | 8 | 9 | def test_openmp_parallelism_enabled(): 10 | # Check that sklearn is built with OpenMP-based parallelism enabled. 11 | # This test can be skipped by setting the environment variable 12 | # ``SKLEARN_SKIP_OPENMP_TEST``. 13 | if os.getenv("SKLEARN_SKIP_OPENMP_TEST"): 14 | pytest.skip("test explicitly skipped (SKLEARN_SKIP_OPENMP_TEST)") 15 | 16 | base_url = "dev" if __version__.endswith(".dev0") else "stable" 17 | err_msg = textwrap.dedent( 18 | """ 19 | This test fails because scikit-learn has been built without OpenMP. 20 | This is not recommended since some estimators will run in sequential 21 | mode instead of leveraging thread-based parallelism. 22 | 23 | You can find instructions to build scikit-learn with OpenMP at this 24 | address: 25 | 26 | https://scikit-learn.org/{}/developers/advanced_installation.html 27 | 28 | You can skip this test by setting the environment variable 29 | SKLEARN_SKIP_OPENMP_TEST to any value. 30 | """).format(base_url) 31 | 32 | assert _openmp_parallelism_enabled(), err_msg 33 | -------------------------------------------------------------------------------- /sklearn/tests/test_check_build.py: -------------------------------------------------------------------------------- 1 | """ 2 | Smoke Test the check_build module 3 | """ 4 | 5 | # Author: G Varoquaux 6 | # License: BSD 3 clause 7 | 8 | from sklearn.__check_build import raise_build_error 9 | 10 | from sklearn.utils._testing import assert_raises 11 | 12 | 13 | def test_raise_build_error(): 14 | assert_raises(ImportError, raise_build_error, ImportError()) 15 | -------------------------------------------------------------------------------- /sklearn/tests/test_init.py: -------------------------------------------------------------------------------- 1 | # Basic unittests to test functioning of module's top-level 2 | 3 | 4 | __author__ = 'Yaroslav Halchenko' 5 | __license__ = 'BSD' 6 | 7 | 8 | try: 9 | from sklearn import * # noqa 10 | _top_import_error = None 11 | except Exception as e: 12 | _top_import_error = e 13 | 14 | 15 | def test_import_skl(): 16 | # Test either above import has failed for some reason 17 | # "import *" is discouraged outside of the module level, hence we 18 | # rely on setting up the variable above 19 | assert _top_import_error is None 20 | -------------------------------------------------------------------------------- /sklearn/tests/test_site_joblib.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def test_old_pickle(tmpdir): 4 | import joblib 5 | 6 | # Check that a pickle that references sklearn.external.joblib can load 7 | f = tmpdir.join('foo.pkl') 8 | f.write(b'\x80\x02csklearn.externals.joblib.numpy_pickle\nNumpyArrayWrappe' 9 | b'r\nq\x00)\x81q\x01}q\x02(U\x05dtypeq\x03cnumpy\ndtype\nq\x04U' 10 | b'\x02i8q\x05K\x00K\x01\x87q\x06Rq\x07(K\x03U\x01 0: 16 | return -log(1. + exp(-x)) 17 | else: 18 | return x - log(1. + exp(x)) 19 | 20 | 21 | def _log_logistic_sigmoid(unsigned int n_samples, 22 | unsigned int n_features, 23 | DTYPE_t[:, :] X, 24 | DTYPE_t[:, :] out): 25 | cdef: 26 | unsigned int i 27 | unsigned int j 28 | 29 | for i in range(n_samples): 30 | for j in range(n_features): 31 | out[i, j] = _inner_log_logistic_sigmoid(X[i, j]) 32 | return out 33 | -------------------------------------------------------------------------------- /sklearn/utils/_mask.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from . import is_scalar_nan 4 | from .fixes import _object_dtype_isnan 5 | 6 | 7 | def _get_mask(X, value_to_mask): 8 | """Compute the boolean mask X == missing_values.""" 9 | if is_scalar_nan(value_to_mask): 10 | if X.dtype.kind == "f": 11 | return np.isnan(X) 12 | elif X.dtype.kind in ("i", "u"): 13 | # can't have NaNs in integer array. 14 | return np.zeros(X.shape, dtype=bool) 15 | else: 16 | # np.isnan does not work on object dtypes. 17 | return _object_dtype_isnan(X) 18 | else: 19 | # X == value_to_mask with object dtypes does not always perform 20 | # element-wise for old versions of numpy 21 | return np.equal(X, value_to_mask) 22 | -------------------------------------------------------------------------------- /sklearn/utils/_weight_vector.pxd: -------------------------------------------------------------------------------- 1 | """Efficient (dense) parameter vector implementation for linear models. """ 2 | 3 | cimport numpy as np 4 | 5 | 6 | cdef extern from "math.h": 7 | cdef extern double sqrt(double x) 8 | 9 | 10 | cdef class WeightVector(object): 11 | cdef double *w_data_ptr 12 | cdef double *aw_data_ptr 13 | cdef double wscale 14 | cdef double average_a 15 | cdef double average_b 16 | cdef int n_features 17 | cdef double sq_norm 18 | 19 | cdef void add(self, double *x_data_ptr, int *x_ind_ptr, 20 | int xnnz, double c) nogil 21 | cdef void add_average(self, double *x_data_ptr, int *x_ind_ptr, 22 | int xnnz, double c, double num_iter) nogil 23 | cdef double dot(self, double *x_data_ptr, int *x_ind_ptr, 24 | int xnnz) nogil 25 | cdef void scale(self, double c) nogil 26 | cdef void reset_wscale(self) nogil 27 | cdef double norm(self) nogil 28 | -------------------------------------------------------------------------------- /sklearn/utils/murmurhash.pxd: -------------------------------------------------------------------------------- 1 | """Export fast murmurhash C/C++ routines + cython wrappers""" 2 | 3 | cimport numpy as np 4 | 5 | # The C API is disabled for now, since it requires -I flags to get 6 | # compilation to work even when these functions are not used. 7 | #cdef extern from "MurmurHash3.h": 8 | # void MurmurHash3_x86_32(void* key, int len, unsigned int seed, 9 | # void* out) 10 | # 11 | # void MurmurHash3_x86_128(void* key, int len, unsigned int seed, 12 | # void* out) 13 | # 14 | # void MurmurHash3_x64_128(void* key, int len, unsigned int seed, 15 | # void* out) 16 | 17 | 18 | cpdef np.uint32_t murmurhash3_int_u32(int key, unsigned int seed) 19 | cpdef np.int32_t murmurhash3_int_s32(int key, unsigned int seed) 20 | cpdef np.uint32_t murmurhash3_bytes_u32(bytes key, unsigned int seed) 21 | cpdef np.int32_t murmurhash3_bytes_s32(bytes key, unsigned int seed) 22 | -------------------------------------------------------------------------------- /sklearn/utils/src/MurmurHash3.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 3 | // domain. The author hereby disclaims copyright to this source code. 4 | 5 | #ifndef _MURMURHASH3_H_ 6 | #define _MURMURHASH3_H_ 7 | 8 | //----------------------------------------------------------------------------- 9 | // Platform-specific functions and macros 10 | 11 | // Microsoft Visual Studio 12 | 13 | #if defined(_MSC_VER) 14 | 15 | typedef unsigned char uint8_t; 16 | typedef unsigned long uint32_t; 17 | typedef unsigned __int64 uint64_t; 18 | 19 | // Other compilers 20 | 21 | #else // defined(_MSC_VER) 22 | 23 | #include 24 | 25 | #endif // !defined(_MSC_VER) 26 | 27 | //----------------------------------------------------------------------------- 28 | #ifdef __cplusplus 29 | extern "C" { 30 | #endif 31 | 32 | 33 | void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ); 34 | 35 | void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out ); 36 | 37 | void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); 38 | 39 | #ifdef __cplusplus 40 | } 41 | #endif 42 | 43 | //----------------------------------------------------------------------------- 44 | 45 | #endif // _MURMURHASH3_H_ 46 | -------------------------------------------------------------------------------- /sklearn/utils/stats.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .extmath import stable_cumsum 4 | 5 | 6 | def _weighted_percentile(array, sample_weight, percentile=50): 7 | """ 8 | Compute the weighted ``percentile`` of ``array`` with ``sample_weight``. 9 | """ 10 | sorted_idx = np.argsort(array) 11 | 12 | # Find index of median prediction for each sample 13 | weight_cdf = stable_cumsum(sample_weight[sorted_idx]) 14 | percentile_idx = np.searchsorted( 15 | weight_cdf, (percentile / 100.) * weight_cdf[-1]) 16 | # in rare cases, percentile_idx equals to len(sorted_idx) 17 | percentile_idx = np.clip(percentile_idx, 0, len(sorted_idx)-1) 18 | return array[sorted_idx[percentile_idx]] 19 | -------------------------------------------------------------------------------- /sklearn/utils/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibrhmusta/scikit-learn/4f97facc3a992c6e2459c3da86c9d69b0688d5ab/sklearn/utils/tests/__init__.py -------------------------------------------------------------------------------- /sklearn/utils/tests/test_fast_dict.py: -------------------------------------------------------------------------------- 1 | """ Test fast_dict. 2 | """ 3 | import numpy as np 4 | 5 | from sklearn.utils._fast_dict import IntFloatDict, argmin 6 | 7 | 8 | def test_int_float_dict(): 9 | rng = np.random.RandomState(0) 10 | keys = np.unique(rng.randint(100, size=10).astype(np.intp)) 11 | values = rng.rand(len(keys)) 12 | 13 | d = IntFloatDict(keys, values) 14 | for key, value in zip(keys, values): 15 | assert d[key] == value 16 | assert len(d) == len(keys) 17 | 18 | d.append(120, 3.) 19 | assert d[120] == 3.0 20 | assert len(d) == len(keys) + 1 21 | for i in range(2000): 22 | d.append(i + 1000, 4.0) 23 | assert d[1100] == 4.0 24 | 25 | 26 | def test_int_float_dict_argmin(): 27 | # Test the argmin implementation on the IntFloatDict 28 | keys = np.arange(100, dtype=np.intp) 29 | values = np.arange(100, dtype=np.float64) 30 | d = IntFloatDict(keys, values) 31 | assert argmin(d) == (0, 0) 32 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_optimize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.utils.optimize import _newton_cg 4 | from scipy.optimize import fmin_ncg 5 | 6 | from sklearn.utils._testing import assert_array_almost_equal 7 | 8 | 9 | def test_newton_cg(): 10 | # Test that newton_cg gives same result as scipy's fmin_ncg 11 | 12 | rng = np.random.RandomState(0) 13 | A = rng.normal(size=(10, 10)) 14 | x0 = np.ones(10) 15 | 16 | def func(x): 17 | Ax = A.dot(x) 18 | return .5 * (Ax).dot(Ax) 19 | 20 | def grad(x): 21 | return A.T.dot(A.dot(x)) 22 | 23 | def hess(x, p): 24 | return p.dot(A.T.dot(A.dot(x.all()))) 25 | 26 | def grad_hess(x): 27 | return grad(x), lambda x: A.T.dot(A.dot(x)) 28 | 29 | assert_array_almost_equal( 30 | _newton_cg(grad_hess, func, grad, x0, tol=1e-10)[0], 31 | fmin_ncg(f=func, x0=x0, fprime=grad, fhess_p=hess) 32 | ) 33 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_show_versions.py: -------------------------------------------------------------------------------- 1 | 2 | from sklearn.utils._show_versions import _get_sys_info 3 | from sklearn.utils._show_versions import _get_deps_info 4 | from sklearn.utils._show_versions import show_versions 5 | from sklearn.utils._testing import ignore_warnings 6 | 7 | 8 | def test_get_sys_info(): 9 | sys_info = _get_sys_info() 10 | 11 | assert 'python' in sys_info 12 | assert 'executable' in sys_info 13 | assert 'machine' in sys_info 14 | 15 | 16 | def test_get_deps_info(): 17 | with ignore_warnings(): 18 | deps_info = _get_deps_info() 19 | 20 | assert 'pip' in deps_info 21 | assert 'setuptools' in deps_info 22 | assert 'sklearn' in deps_info 23 | assert 'numpy' in deps_info 24 | assert 'scipy' in deps_info 25 | assert 'Cython' in deps_info 26 | assert 'pandas' in deps_info 27 | assert 'matplotlib' in deps_info 28 | assert 'joblib' in deps_info 29 | 30 | 31 | def test_show_versions(capsys): 32 | with ignore_warnings(): 33 | show_versions() 34 | out, err = capsys.readouterr() 35 | 36 | assert 'python' in out 37 | assert 'numpy' in out 38 | --------------------------------------------------------------------------------