├── sklearn ├── tests │ ├── __init__.py │ ├── test_check_build.py │ └── test_init.py ├── cluster │ ├── tests │ │ ├── __init__.py │ │ └── common.py │ ├── __init__.py │ └── _dbscan_inner.pyx ├── compose │ ├── tests │ │ └── __init__.py │ └── __init__.py ├── metrics │ ├── tests │ │ └── __init__.py │ ├── cluster │ │ ├── tests │ │ │ └── __init__.py │ │ ├── setup.py │ │ └── __init__.py │ └── setup.py ├── mixture │ ├── tests │ │ ├── __init__.py │ │ └── test_mixture.py │ └── __init__.py ├── src │ └── cblas │ │ ├── atlas_type.h │ │ ├── atlas_dsysinfo.h │ │ ├── atlas_ssysinfo.h │ │ ├── atlas_ptalias2.h │ │ └── README.txt ├── svm │ ├── tests │ │ └── __init__.py │ ├── src │ │ ├── libsvm │ │ │ ├── libsvm_template.cpp │ │ │ └── LIBSVM_CHANGES │ │ └── liblinear │ │ │ ├── tron.h │ │ │ └── COPYRIGHT │ ├── __init__.py │ └── liblinear.pxd ├── tree │ ├── tests │ │ └── __init__.py │ ├── __init__.py │ └── setup.py ├── utils │ ├── tests │ │ ├── __init__.py │ │ ├── test_bench.py │ │ ├── test_optimize.py │ │ ├── test_show_versions.py │ │ ├── test_fast_dict.py │ │ ├── test_linear_assignment.py │ │ └── test_deprecation.py │ ├── lgamma.pxd │ ├── src │ │ ├── gamma.h │ │ └── MurmurHash3.h │ ├── lgamma.pyx │ ├── _random.pxd │ ├── bench.py │ ├── fast_dict.pxd │ ├── stats.py │ ├── _logistic_sigmoid.pyx │ ├── _joblib.py │ ├── murmurhash.pxd │ ├── weight_vector.pxd │ └── seq_dataset.pxd ├── covariance │ ├── tests │ │ └── __init__.py │ └── __init__.py ├── datasets │ ├── tests │ │ ├── __init__.py │ │ ├── data │ │ │ ├── svmlight_invalid_order.txt │ │ │ ├── svmlight_invalid.txt │ │ │ ├── svmlight_multilabel.txt │ │ │ ├── openml │ │ │ │ ├── 2 │ │ │ │ │ ├── api-v1-json-data-2.json.gz │ │ │ │ │ ├── data-v1-download-1666876.arff.gz │ │ │ │ │ ├── api-v1-json-data-features-2.json.gz │ │ │ │ │ ├── api-v1-json-data-list-data_name-anneal-limit-2-data_version-1.json.gz │ │ │ │ │ └── api-v1-json-data-list-data_name-anneal-limit-2-status-active-.json.gz │ │ │ │ ├── 61 │ │ │ │ │ ├── api-v1-json-data-61.json.gz │ │ │ │ │ ├── data-v1-download-61.arff.gz │ │ │ │ │ ├── api-v1-json-data-features-61.json.gz │ │ │ │ │ ├── api-v1-json-data-list-data_name-iris-limit-2-data_version-1.json.gz │ │ │ │ │ └── api-v1-json-data-list-data_name-iris-limit-2-status-active-.json.gz │ │ │ │ ├── 292 │ │ │ │ │ ├── api-v1-json-data-292.json.gz │ │ │ │ │ ├── api-v1-json-data-40981.json.gz │ │ │ │ │ ├── data-v1-download-49822.arff.gz │ │ │ │ │ ├── api-v1-json-data-features-292.json.gz │ │ │ │ │ ├── api-v1-json-data-features-40981.json.gz │ │ │ │ │ ├── api-v1-json-data-list-data_name-australian-limit-2-data_version-1.json.gz │ │ │ │ │ ├── api-v1-json-data-list-data_name-australian-limit-2-status-active-.json.gz │ │ │ │ │ └── api-v1-json-data-list-data_name-australian-limit-2-data_version-1-status-deactivated.json.gz │ │ │ │ ├── 561 │ │ │ │ │ ├── api-v1-json-data-561.json.gz │ │ │ │ │ ├── data-v1-download-52739.arff.gz │ │ │ │ │ ├── api-v1-json-data-features-561.json.gz │ │ │ │ │ ├── api-v1-json-data-list-data_name-cpu-limit-2-data_version-1.json.gz │ │ │ │ │ └── api-v1-json-data-list-data_name-cpu-limit-2-status-active-.json.gz │ │ │ │ ├── 1119 │ │ │ │ │ ├── api-v1-json-data-1119.json.gz │ │ │ │ │ ├── data-v1-download-54002.arff.gz │ │ │ │ │ ├── api-v1-json-data-features-1119.json.gz │ │ │ │ │ ├── api-v1-json-data-list-data_name-adult-census-limit-2-data_version-1.json.gz │ │ │ │ │ └── api-v1-json-data-list-data_name-adult-census-limit-2-status-active-.json.gz │ │ │ │ ├── 40589 │ │ │ │ │ ├── api-v1-json-data-40589.json.gz │ │ │ │ │ ├── data-v1-download-4644182.arff.gz │ │ │ │ │ ├── api-v1-json-data-features-40589.json.gz │ │ │ │ │ ├── api-v1-json-data-list-data_name-emotions-limit-2-data_version-3.json.gz │ │ │ │ │ └── api-v1-json-data-list-data_name-emotions-limit-2-status-active-.json.gz │ │ │ │ ├── 40675 │ │ │ │ │ ├── api-v1-json-data-40675.json.gz │ │ │ │ │ ├── data-v1-download-4965250.arff.gz │ │ │ │ │ ├── api-v1-json-data-features-40675.json.gz │ │ │ │ │ ├── api-v1-json-data-list-data_name-glass2-limit-2-data_version-1.json.gz │ │ │ │ │ ├── api-v1-json-data-list-data_name-glass2-limit-2-status-active-.json.gz │ │ │ │ │ └── api-v1-json-data-list-data_name-glass2-limit-2-data_version-1-status-deactivated.json.gz │ │ │ │ ├── 40945 │ │ │ │ │ ├── api-v1-json-data-40945.json.gz │ │ │ │ │ └── api-v1-json-data-features-40945.json.gz │ │ │ │ └── 40966 │ │ │ │ │ ├── api-v1-json-data-40966.json.gz │ │ │ │ │ ├── data-v1-download-17928620.arff.gz │ │ │ │ │ ├── api-v1-json-data-features-40966.json.gz │ │ │ │ │ ├── api-v1-json-data-list-data_name-miceprotein-limit-2-data_version-4.json.gz │ │ │ │ │ └── api-v1-json-data-list-data_name-miceprotein-limit-2-status-active-.json.gz │ │ │ └── svmlight_classification.txt │ │ ├── test_common.py │ │ ├── test_california_housing.py │ │ └── test_covtype.py │ ├── images │ │ ├── china.jpg │ │ ├── flower.jpg │ │ └── README.txt │ ├── data │ │ ├── digits.csv.gz │ │ ├── diabetes_data.csv.gz │ │ ├── diabetes_target.csv.gz │ │ ├── linnerud_exercise.csv │ │ └── linnerud_physiological.csv │ ├── descr │ │ ├── linnerud.rst │ │ ├── covtype.rst │ │ ├── diabetes.rst │ │ └── california_housing.rst │ └── setup.py ├── ensemble │ ├── tests │ │ └── __init__.py │ ├── setup.py │ └── __init__.py ├── linear_model │ ├── tests │ │ └── __init__.py │ ├── sgd_fast_helpers.h │ └── sgd_fast.pxd ├── manifold │ ├── tests │ │ └── __init__.py │ ├── __init__.py │ └── setup.py ├── neighbors │ ├── tests │ │ └── __init__.py │ ├── typedefs.pxd │ ├── typedefs.pyx │ ├── __init__.py │ └── setup.py ├── decomposition │ ├── tests │ │ └── __init__.py │ ├── setup.py │ ├── cdnmf_fast.pyx │ └── __init__.py ├── feature_selection │ ├── tests │ │ ├── __init__.py │ │ └── test_variance_threshold.py │ └── __init__.py ├── gaussian_process │ ├── tests │ │ └── __init__.py │ └── __init__.py ├── model_selection │ └── tests │ │ ├── __init__.py │ │ └── common.py ├── neural_network │ ├── tests │ │ └── __init__.py │ └── __init__.py ├── preprocessing │ ├── tests │ │ └── __init__.py │ └── setup.py ├── semi_supervised │ ├── tests │ │ └── __init__.py │ └── __init__.py ├── cross_decomposition │ ├── tests │ │ └── __init__.py │ └── __init__.py ├── externals │ ├── joblib │ │ ├── externals │ │ │ ├── __init__.py │ │ │ ├── cloudpickle │ │ │ │ └── __init__.py │ │ │ └── loky │ │ │ │ ├── backend │ │ │ │ ├── compat_posix.py │ │ │ │ ├── __init__.py │ │ │ │ ├── compat.py │ │ │ │ └── fork_exec.py │ │ │ │ └── __init__.py │ │ ├── _compat.py │ │ └── _multiprocessing_helpers.py │ ├── __init__.py │ ├── conftest.py │ ├── README │ ├── setup.py │ └── copy_joblib.sh ├── feature_extraction │ ├── tests │ │ └── __init__.py │ ├── __init__.py │ └── setup.py └── __check_build │ ├── _check_build.pyx │ ├── setup.py │ └── __init__.py ├── doc ├── testimonials │ ├── images │ │ ├── Makefile │ │ ├── mars.png │ │ ├── yhat.png │ │ ├── zopa.png │ │ ├── aweber.png │ │ ├── inria.png │ │ ├── lovely.png │ │ ├── betaworks.png │ │ ├── birchbox.jpg │ │ ├── booking.png │ │ ├── datarobot.png │ │ ├── evernote.png │ │ ├── infonea.jpg │ │ ├── okcupid.png │ │ ├── peerindex.png │ │ ├── phimeca.png │ │ ├── rangespan.png │ │ ├── spotify.png │ │ ├── change-logo.png │ │ ├── datapublica.png │ │ ├── howaboutwe.png │ │ ├── huggingface.png │ │ ├── machinalis.png │ │ ├── solido_logo.png │ │ ├── dataiku_logo.png │ │ ├── ottogroup_logo.png │ │ ├── bestofmedia-logo.png │ │ └── telecomparistech.jpg │ └── README.txt ├── themes │ └── scikit-learn │ │ ├── static │ │ ├── css │ │ │ └── examples.css │ │ ├── img │ │ │ ├── columbia.png │ │ │ ├── forkme.png │ │ │ ├── google.png │ │ │ ├── telecom.png │ │ │ ├── FNRS-logo.png │ │ │ ├── digicosme.png │ │ │ ├── sloan_logo.jpg │ │ │ ├── inria-small.jpg │ │ │ ├── inria-small.png │ │ │ ├── nyu_short_color.png │ │ │ ├── sydney-primary.jpeg │ │ │ ├── sydney-stacked.jpeg │ │ │ ├── scikit-learn-logo.png │ │ │ ├── glyphicons-halflings.png │ │ │ ├── plot_manifold_sphere_1.png │ │ │ ├── scikit-learn-logo-small.png │ │ │ ├── scikit-learn-logo-notext.png │ │ │ ├── glyphicons-halflings-white.png │ │ │ └── plot_classifier_comparison_1.png │ │ └── js │ │ │ └── extra.js │ │ └── theme.conf ├── sphinxext │ └── MANIFEST.in ├── images │ ├── iris.pdf │ ├── dysco.png │ ├── ml_map.png │ ├── cds-logo.png │ ├── no_image.png │ ├── rbm_graph.png │ ├── inria-logo.jpg │ ├── last_digit.png │ ├── sloan_banner.png │ ├── lda_model_graph.png │ ├── nyu_short_color.png │ ├── plot_face_recognition_1.png │ ├── plot_face_recognition_2.png │ ├── scikit-learn-logo-notext.png │ ├── plot_digits_classification.png │ └── multilayerperceptron_network.png ├── logos │ ├── favicon.ico │ ├── identity.pdf │ ├── scikit-learn-logo.bmp │ ├── scikit-learn-logo.png │ ├── scikit-learn-logo-small.png │ ├── scikit-learn-logo-thumb.png │ └── scikit-learn-logo-notext.png ├── modules │ ├── glm_data │ │ └── lasso_enet_coordinate_descent.png │ ├── pipeline.rst │ ├── isotonic.rst │ └── cross_decomposition.rst ├── tutorial │ ├── common_includes │ │ └── info.txt │ ├── text_analytics │ │ ├── .gitignore │ │ ├── data │ │ │ ├── movie_reviews │ │ │ │ └── fetch_data.py │ │ │ └── twenty_newsgroups │ │ │ │ └── fetch_data.py │ │ └── solutions │ │ │ └── generate_skeletons.py │ ├── index.rst │ └── statistical_inference │ │ ├── finding_help.rst │ │ └── index.rst ├── templates │ ├── generate_deprecated.sh │ ├── function.rst │ ├── class_without_init.rst │ ├── numpydoc_docstring.rst │ ├── class.rst │ ├── deprecated_function.rst │ ├── deprecated_class_without_init.rst │ ├── class_with_call.rst │ ├── deprecated_class.rst │ └── deprecated_class_with_call.rst ├── README.md ├── developers │ └── index.rst ├── model_selection.rst ├── unsupervised_learning.rst ├── user_guide.rst ├── preface.rst ├── supervised_learning.rst ├── includes │ ├── big_toc_css.rst │ └── bigger_toc_css.rst ├── whats_new.rst └── data_transforms.rst ├── .gitattributes ├── benchmarks ├── .gitignore ├── plot_tsne_mnist.py ├── bench_plot_ward.py ├── bench_plot_parallel_pairwise.py └── bench_glm.py ├── examples ├── exercises │ ├── README.txt │ ├── plot_digits_classification_exercise.py │ └── plot_cv_digits.py ├── cluster │ └── README.txt ├── tree │ ├── README.txt │ └── plot_tree_regression.py ├── ensemble │ ├── README.txt │ ├── plot_forest_importances_faces.py │ └── plot_adaboost_regression.py ├── svm │ ├── README.txt │ ├── plot_svm_nonlinear.py │ ├── plot_separating_hyperplane.py │ ├── plot_svm_regression.py │ └── plot_custom_kernel.py ├── bicluster │ └── README.txt ├── datasets │ ├── README.txt │ └── plot_digits_last_image.py ├── classification │ └── README.txt ├── mixture │ ├── README.txt │ └── plot_gmm_pdf.py ├── neighbors │ ├── README.txt │ └── plot_regression.py ├── preprocessing │ └── README.txt ├── covariance │ └── README.txt ├── decomposition │ ├── README.txt │ ├── plot_beta_divergence.py │ └── plot_pca_iris.py ├── manifold │ ├── README.txt │ └── plot_swissroll.py ├── multioutput │ └── README.txt ├── linear_model │ ├── README.txt │ ├── plot_lasso_lars.py │ ├── plot_sgd_loss_functions.py │ ├── plot_sgd_separating_hyperplane.py │ ├── plot_sgd_penalties.py │ └── plot_sgd_weighted_samples.py ├── neural_networks │ └── README.txt ├── model_selection │ ├── README.txt │ └── plot_cv_predict.py ├── text │ └── README.txt ├── calibration │ └── README.txt ├── feature_selection │ ├── README.txt │ ├── plot_rfe_digits.py │ ├── plot_feature_selection_pipeline.py │ ├── plot_rfe_with_cross_validation.py │ └── plot_select_from_model_boston.py ├── .flake8 ├── README.txt ├── cross_decomposition │ └── README.txt ├── semi_supervised │ └── README.txt ├── gaussian_process │ └── README.txt ├── applications │ └── README.txt └── compose │ └── README.txt ├── .landscape.yml ├── .coveragerc ├── MANIFEST.in ├── site.cfg ├── setup.cfg ├── .codecov.yml ├── .gitignore ├── PULL_REQUEST_TEMPLATE.md ├── Makefile └── COPYING /sklearn/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/testimonials/images/Makefile: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/cluster/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/compose/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/metrics/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/mixture/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/src/cblas/atlas_type.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/svm/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/tree/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/utils/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/covariance/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/ensemble/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/linear_model/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/manifold/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/neighbors/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/src/cblas/atlas_dsysinfo.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/src/cblas/atlas_ssysinfo.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/decomposition/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/feature_selection/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/gaussian_process/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/metrics/cluster/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/model_selection/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/neural_network/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/preprocessing/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/semi_supervised/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/css/examples.css: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/cross_decomposition/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/externals/joblib/externals/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn/feature_extraction/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | /doc/whats_new.rst merge=union 2 | -------------------------------------------------------------------------------- /sklearn/utils/lgamma.pxd: -------------------------------------------------------------------------------- 1 | cdef double lgamma(double x) 2 | -------------------------------------------------------------------------------- /sklearn/__check_build/_check_build.pyx: -------------------------------------------------------------------------------- 1 | def check_build(): 2 | return -------------------------------------------------------------------------------- /benchmarks/.gitignore: -------------------------------------------------------------------------------- 1 | /bhtsne 2 | *.npy 3 | *.json 4 | /mnist_tsne_output/ 5 | -------------------------------------------------------------------------------- /doc/sphinxext/MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include tests *.py 2 | include *.txt 3 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_invalid_order.txt: -------------------------------------------------------------------------------- 1 | -1 5:2.5 2:-5.2 15:1.5 2 | -------------------------------------------------------------------------------- /sklearn/externals/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | External, bundled dependencies. 4 | 5 | """ 6 | -------------------------------------------------------------------------------- /doc/images/iris.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/iris.pdf -------------------------------------------------------------------------------- /doc/images/dysco.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/dysco.png -------------------------------------------------------------------------------- /doc/images/ml_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/ml_map.png -------------------------------------------------------------------------------- /doc/logos/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/logos/favicon.ico -------------------------------------------------------------------------------- /doc/logos/identity.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/logos/identity.pdf -------------------------------------------------------------------------------- /sklearn/cross_decomposition/__init__.py: -------------------------------------------------------------------------------- 1 | from .pls_ import * # noqa 2 | from .cca_ import * # noqa 3 | -------------------------------------------------------------------------------- /doc/images/cds-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/cds-logo.png -------------------------------------------------------------------------------- /doc/images/no_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/no_image.png -------------------------------------------------------------------------------- /doc/images/rbm_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/rbm_graph.png -------------------------------------------------------------------------------- /doc/images/inria-logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/inria-logo.jpg -------------------------------------------------------------------------------- /doc/images/last_digit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/last_digit.png -------------------------------------------------------------------------------- /doc/images/sloan_banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/sloan_banner.png -------------------------------------------------------------------------------- /examples/exercises/README.txt: -------------------------------------------------------------------------------- 1 | Tutorial exercises 2 | ------------------ 3 | 4 | Exercises for the tutorials 5 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_invalid.txt: -------------------------------------------------------------------------------- 1 | python 2:2.5 10:-5.2 15:1.5 2 | 2.0 5:1.0 12:-3 3 | 3.0 20:27 4 | -------------------------------------------------------------------------------- /.landscape.yml: -------------------------------------------------------------------------------- 1 | pylint: 2 | disable: 3 | - unpacking-non-sequence 4 | ignore-paths: 5 | - sklearn/externals 6 | -------------------------------------------------------------------------------- /doc/images/lda_model_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/lda_model_graph.png -------------------------------------------------------------------------------- /doc/images/nyu_short_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/nyu_short_color.png -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/logos/scikit-learn-logo.bmp -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/logos/scikit-learn-logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/mars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/mars.png -------------------------------------------------------------------------------- /doc/testimonials/images/yhat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/yhat.png -------------------------------------------------------------------------------- /doc/testimonials/images/zopa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/zopa.png -------------------------------------------------------------------------------- /doc/testimonials/images/aweber.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/aweber.png -------------------------------------------------------------------------------- /doc/testimonials/images/inria.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/inria.png -------------------------------------------------------------------------------- /doc/testimonials/images/lovely.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/lovely.png -------------------------------------------------------------------------------- /sklearn/datasets/images/china.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/images/china.jpg -------------------------------------------------------------------------------- /sklearn/datasets/images/flower.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/images/flower.jpg -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/logos/scikit-learn-logo-small.png -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo-thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/logos/scikit-learn-logo-thumb.png -------------------------------------------------------------------------------- /doc/testimonials/images/betaworks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/betaworks.png -------------------------------------------------------------------------------- /doc/testimonials/images/birchbox.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/birchbox.jpg -------------------------------------------------------------------------------- /doc/testimonials/images/booking.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/booking.png -------------------------------------------------------------------------------- /doc/testimonials/images/datarobot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/datarobot.png -------------------------------------------------------------------------------- /doc/testimonials/images/evernote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/evernote.png -------------------------------------------------------------------------------- /doc/testimonials/images/infonea.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/infonea.jpg -------------------------------------------------------------------------------- /doc/testimonials/images/okcupid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/okcupid.png -------------------------------------------------------------------------------- /doc/testimonials/images/peerindex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/peerindex.png -------------------------------------------------------------------------------- /doc/testimonials/images/phimeca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/phimeca.png -------------------------------------------------------------------------------- /doc/testimonials/images/rangespan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/rangespan.png -------------------------------------------------------------------------------- /doc/testimonials/images/spotify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/spotify.png -------------------------------------------------------------------------------- /sklearn/datasets/data/digits.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/data/digits.csv.gz -------------------------------------------------------------------------------- /doc/images/plot_face_recognition_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/plot_face_recognition_1.png -------------------------------------------------------------------------------- /doc/images/plot_face_recognition_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/plot_face_recognition_2.png -------------------------------------------------------------------------------- /doc/images/scikit-learn-logo-notext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/scikit-learn-logo-notext.png -------------------------------------------------------------------------------- /doc/logos/scikit-learn-logo-notext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/logos/scikit-learn-logo-notext.png -------------------------------------------------------------------------------- /doc/testimonials/images/change-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/change-logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/datapublica.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/datapublica.png -------------------------------------------------------------------------------- /doc/testimonials/images/howaboutwe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/howaboutwe.png -------------------------------------------------------------------------------- /doc/testimonials/images/huggingface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/huggingface.png -------------------------------------------------------------------------------- /doc/testimonials/images/machinalis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/machinalis.png -------------------------------------------------------------------------------- /doc/testimonials/images/solido_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/solido_logo.png -------------------------------------------------------------------------------- /doc/images/plot_digits_classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/plot_digits_classification.png -------------------------------------------------------------------------------- /doc/testimonials/images/dataiku_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/dataiku_logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/ottogroup_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/ottogroup_logo.png -------------------------------------------------------------------------------- /sklearn/datasets/data/diabetes_data.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/data/diabetes_data.csv.gz -------------------------------------------------------------------------------- /doc/images/multilayerperceptron_network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/multilayerperceptron_network.png -------------------------------------------------------------------------------- /doc/testimonials/images/bestofmedia-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/bestofmedia-logo.png -------------------------------------------------------------------------------- /doc/testimonials/images/telecomparistech.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/telecomparistech.jpg -------------------------------------------------------------------------------- /sklearn/datasets/data/diabetes_target.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/data/diabetes_target.csv.gz -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/columbia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/columbia.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/forkme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/forkme.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/google.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/google.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/telecom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/telecom.png -------------------------------------------------------------------------------- /examples/cluster/README.txt: -------------------------------------------------------------------------------- 1 | .. _cluster_examples: 2 | 3 | Clustering 4 | ---------- 5 | 6 | Examples concerning the :mod:`sklearn.cluster` module. 7 | -------------------------------------------------------------------------------- /examples/tree/README.txt: -------------------------------------------------------------------------------- 1 | .. _tree_examples: 2 | 3 | Decision Trees 4 | -------------- 5 | 6 | Examples concerning the :mod:`sklearn.tree` module. 7 | -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/FNRS-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/FNRS-logo.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/digicosme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/digicosme.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/sloan_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/sloan_logo.jpg -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/inria-small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/inria-small.jpg -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/inria-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/inria-small.png -------------------------------------------------------------------------------- /sklearn/utils/src/gamma.h: -------------------------------------------------------------------------------- 1 | #ifndef GAMMA_H 2 | #define GAMMA_H 3 | 4 | //double sklearn_gamma(double); 5 | double sklearn_lgamma(double); 6 | 7 | #endif 8 | 9 | -------------------------------------------------------------------------------- /doc/modules/glm_data/lasso_enet_coordinate_descent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/modules/glm_data/lasso_enet_coordinate_descent.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/nyu_short_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/nyu_short_color.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/sydney-primary.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/sydney-primary.jpeg -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/sydney-stacked.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/sydney-stacked.jpeg -------------------------------------------------------------------------------- /examples/ensemble/README.txt: -------------------------------------------------------------------------------- 1 | .. _ensemble_examples: 2 | 3 | Ensemble methods 4 | ---------------- 5 | 6 | Examples concerning the :mod:`sklearn.ensemble` module. 7 | -------------------------------------------------------------------------------- /examples/svm/README.txt: -------------------------------------------------------------------------------- 1 | .. _svm_examples: 2 | 3 | Support Vector Machines 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.svm` module. 7 | -------------------------------------------------------------------------------- /sklearn/externals/joblib/externals/cloudpickle/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .cloudpickle import * 4 | 5 | __version__ = '0.5.6' 6 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = sklearn 4 | include = */sklearn/* 5 | omit = 6 | */sklearn/externals/* 7 | */benchmarks/* 8 | */setup.py 9 | -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/scikit-learn-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/scikit-learn-logo.png -------------------------------------------------------------------------------- /examples/bicluster/README.txt: -------------------------------------------------------------------------------- 1 | .. _bicluster_examples: 2 | 3 | Biclustering 4 | ------------ 5 | 6 | Examples concerning the :mod:`sklearn.cluster.bicluster` module. 7 | -------------------------------------------------------------------------------- /examples/datasets/README.txt: -------------------------------------------------------------------------------- 1 | .. _dataset_examples: 2 | 3 | Dataset examples 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.datasets` module. 7 | -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/glyphicons-halflings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/glyphicons-halflings.png -------------------------------------------------------------------------------- /examples/classification/README.txt: -------------------------------------------------------------------------------- 1 | .. _classification_examples: 2 | 3 | Classification 4 | ----------------------- 5 | 6 | General examples about classification algorithms. 7 | -------------------------------------------------------------------------------- /examples/mixture/README.txt: -------------------------------------------------------------------------------- 1 | .. _mixture_examples: 2 | 3 | Gaussian Mixture Models 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.mixture` module. 7 | -------------------------------------------------------------------------------- /examples/neighbors/README.txt: -------------------------------------------------------------------------------- 1 | .. _neighbors_examples: 2 | 3 | Nearest Neighbors 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.neighbors` module. 7 | -------------------------------------------------------------------------------- /examples/preprocessing/README.txt: -------------------------------------------------------------------------------- 1 | .. _preprocessing_examples: 2 | 3 | Preprocessing 4 | ------------- 5 | 6 | Examples concerning the :mod:`sklearn.preprocessing` module. 7 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_multilabel.txt: -------------------------------------------------------------------------------- 1 | # multilabel dataset in SVMlight format 2 | 1,0 2:2.5 10:-5.2 15:1.5 3 | 2 5:1.0 12:-3 4 | 2:3.5 11:26 5 | 1,2 20:27 6 | -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/plot_manifold_sphere_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/plot_manifold_sphere_1.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/scikit-learn-logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/scikit-learn-logo-small.png -------------------------------------------------------------------------------- /examples/covariance/README.txt: -------------------------------------------------------------------------------- 1 | .. _covariance_examples: 2 | 3 | Covariance estimation 4 | --------------------- 5 | 6 | Examples concerning the :mod:`sklearn.covariance` module. 7 | -------------------------------------------------------------------------------- /examples/decomposition/README.txt: -------------------------------------------------------------------------------- 1 | .. _decomposition_examples: 2 | 3 | Decomposition 4 | ------------- 5 | 6 | Examples concerning the :mod:`sklearn.decomposition` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/manifold/README.txt: -------------------------------------------------------------------------------- 1 | .. _manifold_examples: 2 | 3 | Manifold learning 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.manifold` module. 7 | 8 | -------------------------------------------------------------------------------- /examples/multioutput/README.txt: -------------------------------------------------------------------------------- 1 | .. _multioutput_examples: 2 | 3 | Multioutput methods 4 | ------------------- 5 | 6 | Examples concerning the :mod:`sklearn.multioutput` module. 7 | -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/scikit-learn-logo-notext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/scikit-learn-logo-notext.png -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/2/api-v1-json-data-2.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/2/api-v1-json-data-2.json.gz -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/glyphicons-halflings-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/glyphicons-halflings-white.png -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/img/plot_classifier_comparison_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/plot_classifier_comparison_1.png -------------------------------------------------------------------------------- /examples/linear_model/README.txt: -------------------------------------------------------------------------------- 1 | .. _linear_examples: 2 | 3 | Generalized Linear Models 4 | ------------------------- 5 | 6 | Examples concerning the :mod:`sklearn.linear_model` module. 7 | -------------------------------------------------------------------------------- /examples/neural_networks/README.txt: -------------------------------------------------------------------------------- 1 | .. _neural_network_examples: 2 | 3 | Neural Networks 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.neural_network` module. 7 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/api-v1-json-data-292.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/api-v1-json-data-292.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/561/api-v1-json-data-561.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/561/api-v1-json-data-561.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/61/api-v1-json-data-61.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/61/api-v1-json-data-61.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/61/data-v1-download-61.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/61/data-v1-download-61.arff.gz -------------------------------------------------------------------------------- /examples/model_selection/README.txt: -------------------------------------------------------------------------------- 1 | .. _model_selection_examples: 2 | 3 | Model Selection 4 | ----------------------- 5 | 6 | Examples related to the :mod:`sklearn.model_selection` module. 7 | -------------------------------------------------------------------------------- /examples/text/README.txt: -------------------------------------------------------------------------------- 1 | .. _text_examples: 2 | 3 | Working with text documents 4 | ---------------------------- 5 | 6 | Examples concerning the :mod:`sklearn.feature_extraction.text` module. 7 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/1119/api-v1-json-data-1119.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-1119.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/2/data-v1-download-1666876.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/2/data-v1-download-1666876.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/api-v1-json-data-40981.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/api-v1-json-data-40981.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/data-v1-download-49822.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/data-v1-download-49822.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/561/data-v1-download-52739.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/561/data-v1-download-52739.arff.gz -------------------------------------------------------------------------------- /examples/calibration/README.txt: -------------------------------------------------------------------------------- 1 | .. _calibration_examples: 2 | 3 | Calibration 4 | ----------------------- 5 | 6 | Examples illustrating the calibration of predicted probabilities of classifiers. 7 | -------------------------------------------------------------------------------- /examples/feature_selection/README.txt: -------------------------------------------------------------------------------- 1 | .. _feature_selection_examples: 2 | 3 | Feature Selection 4 | ----------------------- 5 | 6 | Examples concerning the :mod:`sklearn.feature_selection` module. 7 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/1119/data-v1-download-54002.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/1119/data-v1-download-54002.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/2/api-v1-json-data-features-2.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/2/api-v1-json-data-features-2.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40589/api-v1-json-data-40589.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-40589.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40675/api-v1-json-data-40675.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-40675.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40945/api-v1-json-data-40945.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40945/api-v1-json-data-40945.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40966/api-v1-json-data-40966.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-40966.json.gz -------------------------------------------------------------------------------- /examples/.flake8: -------------------------------------------------------------------------------- 1 | # Examples specific flake8 configuration 2 | 3 | [flake8] 4 | # Same ignore as project-wide plus E402 (imports not at top of file) 5 | ignore=E121,E123,E126,E24,E226,E704,W503,W504,E402 6 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40589/data-v1-download-4644182.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40589/data-v1-download-4644182.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40675/data-v1-download-4965250.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40675/data-v1-download-4965250.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40966/data-v1-download-17928620.arff.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40966/data-v1-download-17928620.arff.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/61/api-v1-json-data-features-61.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/61/api-v1-json-data-features-61.json.gz -------------------------------------------------------------------------------- /doc/tutorial/common_includes/info.txt: -------------------------------------------------------------------------------- 1 | Meant to share common RST file snippets that we want to reuse by inclusion 2 | in the real tutorial in order to lower the maintenance burden 3 | of redundant sections. 4 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _general_examples: 2 | 3 | Examples 4 | ======== 5 | 6 | Miscellaneous examples 7 | ---------------------- 8 | 9 | Miscellaneous and introductory examples for scikit-learn. 10 | -------------------------------------------------------------------------------- /examples/cross_decomposition/README.txt: -------------------------------------------------------------------------------- 1 | .. _cross_decomposition_examples: 2 | 3 | Cross decomposition 4 | ------------------- 5 | 6 | Examples concerning the :mod:`sklearn.cross_decomposition` module. 7 | 8 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/api-v1-json-data-features-292.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/api-v1-json-data-features-292.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/561/api-v1-json-data-features-561.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/561/api-v1-json-data-features-561.json.gz -------------------------------------------------------------------------------- /examples/semi_supervised/README.txt: -------------------------------------------------------------------------------- 1 | .. _semi_supervised_examples: 2 | 3 | Semi Supervised Classification 4 | ------------------------------ 5 | 6 | Examples concerning the :mod:`sklearn.semi_supervised` module. 7 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/1119/api-v1-json-data-features-1119.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-features-1119.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/api-v1-json-data-features-40981.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/api-v1-json-data-features-40981.json.gz -------------------------------------------------------------------------------- /doc/templates/generate_deprecated.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for f in [^d]*; do (head -n2 < $f; echo ' 3 | .. meta:: 4 | :robots: noindex 5 | 6 | .. warning:: 7 | **DEPRECATED** 8 | '; tail -n+3 $f) > deprecated_$f; done 9 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40589/api-v1-json-data-features-40589.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-features-40589.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40675/api-v1-json-data-features-40675.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-features-40675.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40945/api-v1-json-data-features-40945.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40945/api-v1-json-data-features-40945.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40966/api-v1-json-data-features-40966.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-features-40966.json.gz -------------------------------------------------------------------------------- /examples/gaussian_process/README.txt: -------------------------------------------------------------------------------- 1 | .. _gaussian_process_examples: 2 | 3 | Gaussian Process for Machine Learning 4 | ------------------------------------- 5 | 6 | Examples concerning the :mod:`sklearn.gaussian_process` module. 7 | 8 | -------------------------------------------------------------------------------- /sklearn/svm/src/libsvm/libsvm_template.cpp: -------------------------------------------------------------------------------- 1 | 2 | /* this is a hack to generate libsvm with both sparse and dense 3 | methods in the same binary*/ 4 | 5 | #define _DENSE_REP 6 | #include "svm.cpp" 7 | #undef _DENSE_REP 8 | #include "svm.cpp" 9 | -------------------------------------------------------------------------------- /examples/applications/README.txt: -------------------------------------------------------------------------------- 1 | .. _realworld_examples: 2 | 3 | Examples based on real world datasets 4 | ------------------------------------- 5 | 6 | Applications to real world problems with some medium sized datasets or 7 | interactive user interface. 8 | -------------------------------------------------------------------------------- /examples/compose/README.txt: -------------------------------------------------------------------------------- 1 | .. _compose_examples: 2 | 3 | Pipelines and composite estimators 4 | ---------------------------------- 5 | 6 | Examples of how to compose transformers and pipelines from other estimators. See the :ref:`User Guide `. 7 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/2/api-v1-json-data-list-data_name-anneal-limit-2-data_version-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/2/api-v1-json-data-list-data_name-anneal-limit-2-data_version-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/2/api-v1-json-data-list-data_name-anneal-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/2/api-v1-json-data-list-data_name-anneal-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/561/api-v1-json-data-list-data_name-cpu-limit-2-data_version-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/561/api-v1-json-data-list-data_name-cpu-limit-2-data_version-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/561/api-v1-json-data-list-data_name-cpu-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/561/api-v1-json-data-list-data_name-cpu-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/61/api-v1-json-data-list-data_name-iris-limit-2-data_version-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/61/api-v1-json-data-list-data_name-iris-limit-2-data_version-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/61/api-v1-json-data-list-data_name-iris-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/61/api-v1-json-data-list-data_name-iris-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /sklearn/utils/lgamma.pyx: -------------------------------------------------------------------------------- 1 | cdef extern from "src/gamma.h": 2 | cdef double sklearn_lgamma(double x) 3 | 4 | 5 | cdef double lgamma(double x): 6 | if x <= 0: 7 | raise ValueError("x must be strictly positive, got %f" % x) 8 | return sklearn_lgamma(x) 9 | -------------------------------------------------------------------------------- /doc/themes/scikit-learn/theme.conf: -------------------------------------------------------------------------------- 1 | [theme] 2 | inherit = basic 3 | stylesheet = nature.css 4 | pygments_style = tango 5 | 6 | [options] 7 | oldversion = False 8 | collapsiblesidebar = True 9 | google_analytics = True 10 | surveybanner = False 11 | sprintbanner = True 12 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-data_version-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-data_version-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40589/api-v1-json-data-list-data_name-emotions-limit-2-data_version-3.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-list-data_name-emotions-limit-2-data_version-3.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40589/api-v1-json-data-list-data_name-emotions-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-list-data_name-emotions-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-data_version-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-data_version-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.rst 2 | recursive-include doc * 3 | recursive-include examples * 4 | recursive-include sklearn *.c *.h *.pyx *.pxd *.pxi 5 | recursive-include sklearn/datasets *.csv *.csv.gz *.rst *.jpg *.txt *.arff.gz *.json.gz 6 | include COPYING 7 | include README.rst 8 | 9 | -------------------------------------------------------------------------------- /doc/README.md: -------------------------------------------------------------------------------- 1 | # Documentation for scikit-learn 2 | 3 | This directory contains the full manual and web site as displayed at 4 | http://scikit-learn.org. See 5 | http://scikit-learn.org/dev/developers/contributing.html#documentation for 6 | detailed information about the documentation. 7 | -------------------------------------------------------------------------------- /doc/modules/pipeline.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. raw:: html 4 | 5 | 6 | 9 | 10 | This content is now at :ref:`combining_estimators`. 11 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-data_version-1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-data_version-1.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40966/api-v1-json-data-list-data_name-miceprotein-limit-2-data_version-4.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-list-data_name-miceprotein-limit-2-data_version-4.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40966/api-v1-json-data-list-data_name-miceprotein-limit-2-status-active-.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-list-data_name-miceprotein-limit-2-status-active-.json.gz -------------------------------------------------------------------------------- /doc/testimonials/README.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | To find the list of people we contacted, see: 4 | https://docs.google.com/spreadsheet/ccc?key=0AhGnAxuBDhjmdDYwNzlZVE5SMkFsMjNBbGlaWkpNZ1E&usp=sharing 5 | 6 | To obtain access to this file, send an email to: 7 | nelle dot varoquaux at gmail dot com 8 | 9 | -------------------------------------------------------------------------------- /doc/templates/function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /doc/templates/class_without_init.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-data_version-1-status-deactivated.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-data_version-1-status-deactivated.json.gz -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-data_version-1-status-deactivated.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-data_version-1-status-deactivated.json.gz -------------------------------------------------------------------------------- /site.cfg: -------------------------------------------------------------------------------- 1 | 2 | # Uncomment to link against the MKL library on windows 3 | # [mkl] 4 | # include_dirs=C:\Program Files\Intel\MKL\10.2.5.035\include 5 | # library_dirs=C:\Program Files\Intel\MKL\10.2.5.035\ia32\lib 6 | # mkl_libs=mkl_core, mkl_intel_c, mkl_intel_s, libguide, libguide40, mkl_blacs_dll, mkl_intel_sequential 7 | -------------------------------------------------------------------------------- /doc/templates/numpydoc_docstring.rst: -------------------------------------------------------------------------------- 1 | {{index}} 2 | {{summary}} 3 | {{extended_summary}} 4 | {{parameters}} 5 | {{returns}} 6 | {{yields}} 7 | {{other_parameters}} 8 | {{attributes}} 9 | {{raises}} 10 | {{warns}} 11 | {{warnings}} 12 | {{see_also}} 13 | {{notes}} 14 | {{references}} 15 | {{examples}} 16 | {{methods}} 17 | -------------------------------------------------------------------------------- /sklearn/mixture/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.mixture` module implements mixture modeling algorithms. 3 | """ 4 | 5 | from .gaussian_mixture import GaussianMixture 6 | from .bayesian_mixture import BayesianGaussianMixture 7 | 8 | 9 | __all__ = ['GaussianMixture', 10 | 'BayesianGaussianMixture'] 11 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/data/svmlight_classification.txt: -------------------------------------------------------------------------------- 1 | # comment 2 | # note: the next line contains a tab 3 | 1.0 3:2.5 11:-5.2 16:1.5 # and an inline comment 4 | 2.0 6:1.0 13:-3 5 | # another comment 6 | 3.0 21:27 7 | 4.0 2:1.234567890123456e10 # double precision value 8 | 1.0 # empty line, all zeros 9 | 2.0 3:0 # explicit zeros 10 | -------------------------------------------------------------------------------- /sklearn/svm/src/libsvm/LIBSVM_CHANGES: -------------------------------------------------------------------------------- 1 | Changes to Libsvm 2 | 3 | This is here mainly as checklist for incorporation of new versions of libsvm. 4 | 5 | * Add copyright to files svm.cpp and svm.h 6 | * Add random_seed support and call to srand in fit function 7 | 8 | The changes made with respect to upstream are detailed in the heading of svm.cpp 9 | -------------------------------------------------------------------------------- /sklearn/datasets/data/linnerud_exercise.csv: -------------------------------------------------------------------------------- 1 | Chins Situps Jumps 2 | 5 162 60 3 | 2 110 60 4 | 12 101 101 5 | 12 105 37 6 | 13 155 58 7 | 4 101 42 8 | 8 101 38 9 | 6 125 40 10 | 15 200 40 11 | 17 251 250 12 | 17 120 38 13 | 13 210 115 14 | 14 215 105 15 | 1 50 50 16 | 6 70 31 17 | 12 210 120 18 | 4 60 25 19 | 11 230 80 20 | 15 225 73 21 | 2 110 43 22 | -------------------------------------------------------------------------------- /sklearn/externals/conftest.py: -------------------------------------------------------------------------------- 1 | # Do not collect any tests in externals. This is more robust than using 2 | # --ignore because --ignore needs a path and it is not convenient to pass in 3 | # the externals path (very long install-dependent path in site-packages) when 4 | # using --pyargs 5 | def pytest_ignore_collect(path, config): 6 | return True 7 | 8 | -------------------------------------------------------------------------------- /doc/developers/index.rst: -------------------------------------------------------------------------------- 1 | .. _developers_guide: 2 | 3 | ================= 4 | Developer's Guide 5 | ================= 6 | 7 | .. include:: ../includes/big_toc_css.rst 8 | .. include:: ../tune_toc.rst 9 | 10 | .. toctree:: 11 | 12 | contributing 13 | tips 14 | utilities 15 | performance 16 | advanced_installation 17 | maintainer 18 | -------------------------------------------------------------------------------- /sklearn/linear_model/sgd_fast_helpers.h: -------------------------------------------------------------------------------- 1 | // We cannot directly reuse the npy_isfinite from npy_math.h as numpy 2 | // and scikit-learn are not necessarily built with the same compiler. 3 | #ifdef _MSC_VER 4 | # include 5 | # define skl_isfinite _finite 6 | #else 7 | # include 8 | # define skl_isfinite npy_isfinite 9 | #endif 10 | -------------------------------------------------------------------------------- /doc/model_selection.rst: -------------------------------------------------------------------------------- 1 | .. include:: includes/big_toc_css.rst 2 | 3 | .. _model_selection: 4 | 5 | Model selection and evaluation 6 | ------------------------------ 7 | 8 | .. toctree:: 9 | 10 | modules/cross_validation 11 | modules/grid_search 12 | modules/model_evaluation 13 | modules/model_persistence 14 | modules/learning_curve 15 | -------------------------------------------------------------------------------- /sklearn/datasets/data/linnerud_physiological.csv: -------------------------------------------------------------------------------- 1 | Weight Waist Pulse 2 | 191 36 50 3 | 189 37 52 4 | 193 38 58 5 | 162 35 62 6 | 189 35 46 7 | 182 36 56 8 | 211 38 56 9 | 167 34 60 10 | 176 31 74 11 | 154 33 56 12 | 169 34 50 13 | 166 33 52 14 | 154 34 64 15 | 247 46 50 16 | 193 36 46 17 | 202 37 62 18 | 176 37 54 19 | 157 32 52 20 | 156 33 54 21 | 138 33 68 22 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/test_common.py: -------------------------------------------------------------------------------- 1 | """Test loaders for common functionality. 2 | """ 3 | 4 | 5 | def check_return_X_y(bunch, fetch_func_partial): 6 | X_y_tuple = fetch_func_partial(return_X_y=True) 7 | assert(isinstance(X_y_tuple, tuple)) 8 | assert(X_y_tuple[0].shape == bunch.data.shape) 9 | assert(X_y_tuple[1].shape == bunch.target.shape) 10 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_bench.py: -------------------------------------------------------------------------------- 1 | 2 | import datetime 3 | 4 | from sklearn.utils.bench import total_seconds 5 | from sklearn.utils.testing import assert_equal 6 | 7 | 8 | def test_total_seconds(): 9 | delta = (datetime.datetime(2012, 1, 1, 5, 5, 1) 10 | - datetime.datetime(2012, 1, 1, 5, 5, 4)) 11 | assert_equal(86397, total_seconds(delta)) 12 | -------------------------------------------------------------------------------- /doc/templates/class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __init__ 10 | {% endblock %} 11 | 12 | .. include:: {{module}}.{{objname}}.examples 13 | 14 | .. raw:: html 15 | 16 |
17 | -------------------------------------------------------------------------------- /sklearn/tests/test_check_build.py: -------------------------------------------------------------------------------- 1 | """ 2 | Smoke Test the check_build module 3 | """ 4 | 5 | # Author: G Varoquaux 6 | # License: BSD 3 clause 7 | 8 | from sklearn.__check_build import raise_build_error 9 | 10 | from sklearn.utils.testing import assert_raises 11 | 12 | 13 | def test_raise_build_error(): 14 | assert_raises(ImportError, raise_build_error, ImportError()) 15 | -------------------------------------------------------------------------------- /sklearn/utils/_random.pxd: -------------------------------------------------------------------------------- 1 | # Authors: Arnaud Joly 2 | # 3 | # License: BSD 3 clause 4 | 5 | 6 | import numpy as np 7 | cimport numpy as np 8 | 9 | 10 | cpdef sample_without_replacement(np.int_t n_population, 11 | np.int_t n_samples, 12 | method=*, 13 | random_state=*) 14 | 15 | -------------------------------------------------------------------------------- /doc/templates/deprecated_function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autofunction:: {{ objname }} 14 | 15 | .. include:: {{module}}.{{objname}}.examples 16 | 17 | .. raw:: html 18 | 19 |
20 | -------------------------------------------------------------------------------- /doc/themes/scikit-learn/static/js/extra.js: -------------------------------------------------------------------------------- 1 | // Miscellaneous enhancements to doc display 2 | 3 | 4 | $(document).ready(function() { 5 | /*** Add permalink buttons next to glossary terms ***/ 6 | 7 | $('dl.glossary > dt[id]').append(function() { 8 | return (''); 11 | }) 12 | }); 13 | -------------------------------------------------------------------------------- /doc/templates/deprecated_class_without_init.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autoclass:: {{ objname }} 14 | 15 | .. include:: {{module}}.{{objname}}.examples 16 | 17 | .. raw:: html 18 | 19 |
20 | -------------------------------------------------------------------------------- /doc/templates/class_with_call.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}=============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __init__ 10 | .. automethod:: __call__ 11 | {% endblock %} 12 | 13 | .. include:: {{module}}.{{objname}}.examples 14 | 15 | .. raw:: html 16 | 17 |
18 | -------------------------------------------------------------------------------- /sklearn/neural_network/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.neural_network` module includes models based on neural 3 | networks. 4 | """ 5 | 6 | # License: BSD 3 clause 7 | 8 | from .rbm import BernoulliRBM 9 | 10 | from .multilayer_perceptron import MLPClassifier 11 | from .multilayer_perceptron import MLPRegressor 12 | 13 | __all__ = ["BernoulliRBM", 14 | "MLPClassifier", 15 | "MLPRegressor"] 16 | -------------------------------------------------------------------------------- /sklearn/semi_supervised/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.semi_supervised` module implements semi-supervised learning 3 | algorithms. These algorithms utilized small amounts of labeled data and large 4 | amounts of unlabeled data for classification tasks. This module includes Label 5 | Propagation. 6 | """ 7 | 8 | from .label_propagation import LabelPropagation, LabelSpreading 9 | 10 | __all__ = ['LabelPropagation', 'LabelSpreading'] 11 | -------------------------------------------------------------------------------- /sklearn/externals/joblib/externals/loky/backend/compat_posix.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | ############################################################################### 3 | # Compat file to load the correct wait function 4 | # 5 | # author: Thomas Moreau and Olivier grisel 6 | # 7 | import sys 8 | 9 | # Compat wait 10 | if sys.version_info < (3, 3): 11 | from ._posix_wait import wait 12 | else: 13 | from multiprocessing.connection import wait 14 | -------------------------------------------------------------------------------- /doc/unsupervised_learning.rst: -------------------------------------------------------------------------------- 1 | .. include:: includes/big_toc_css.rst 2 | 3 | .. _unsupervised-learning: 4 | 5 | Unsupervised learning 6 | ----------------------- 7 | 8 | .. toctree:: 9 | 10 | modules/mixture 11 | modules/manifold 12 | modules/clustering 13 | modules/biclustering 14 | modules/decomposition 15 | modules/covariance 16 | modules/outlier_detection 17 | modules/density 18 | modules/neural_networks_unsupervised 19 | -------------------------------------------------------------------------------- /sklearn/externals/README: -------------------------------------------------------------------------------- 1 | This directory contains bundled external dependencies that are updated 2 | every once in a while. 3 | 4 | Note to developers and advanced users: setting the SKLEARN_SITE_JOBLIB to 5 | a non null value will force scikit-learn to use the site joblib. 6 | 7 | Note for distribution packagers: if you want to remove the duplicated 8 | code and depend on a packaged version, we suggest that you simply do a 9 | symbolic link in this directory. 10 | 11 | -------------------------------------------------------------------------------- /doc/templates/deprecated_class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autoclass:: {{ objname }} 14 | 15 | {% block methods %} 16 | .. automethod:: __init__ 17 | {% endblock %} 18 | 19 | .. include:: {{module}}.{{objname}}.examples 20 | 21 | .. raw:: html 22 | 23 |
24 | -------------------------------------------------------------------------------- /doc/user_guide.rst: -------------------------------------------------------------------------------- 1 | .. title:: User guide: contents 2 | 3 | .. _user_guide: 4 | 5 | ========== 6 | User Guide 7 | ========== 8 | 9 | .. include:: includes/big_toc_css.rst 10 | 11 | .. nice layout in the toc 12 | 13 | .. include:: tune_toc.rst 14 | 15 | .. toctree:: 16 | :numbered: 17 | 18 | supervised_learning.rst 19 | unsupervised_learning.rst 20 | model_selection.rst 21 | data_transforms.rst 22 | Dataset loading utilities 23 | modules/computing.rst 24 | -------------------------------------------------------------------------------- /sklearn/feature_extraction/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.feature_extraction` module deals with feature extraction 3 | from raw data. It currently includes methods to extract features from text and 4 | images. 5 | """ 6 | 7 | from .dict_vectorizer import DictVectorizer 8 | from .hashing import FeatureHasher 9 | from .image import img_to_graph, grid_to_graph 10 | from . import text 11 | 12 | __all__ = ['DictVectorizer', 'image', 'img_to_graph', 'grid_to_graph', 'text', 13 | 'FeatureHasher'] 14 | -------------------------------------------------------------------------------- /sklearn/compose/__init__.py: -------------------------------------------------------------------------------- 1 | """Meta-estimators for building composite models with transformers 2 | 3 | In addition to its current contents, this module will eventually be home to 4 | refurbished versions of Pipeline and FeatureUnion. 5 | 6 | """ 7 | 8 | from ._column_transformer import ColumnTransformer, make_column_transformer 9 | from ._target import TransformedTargetRegressor 10 | 11 | 12 | __all__ = [ 13 | 'ColumnTransformer', 14 | 'make_column_transformer', 15 | 'TransformedTargetRegressor', 16 | ] 17 | -------------------------------------------------------------------------------- /doc/templates/deprecated_class_with_call.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}=============== 3 | 4 | .. meta:: 5 | :robots: noindex 6 | 7 | .. warning:: 8 | **DEPRECATED** 9 | 10 | 11 | .. currentmodule:: {{ module }} 12 | 13 | .. autoclass:: {{ objname }} 14 | 15 | {% block methods %} 16 | .. automethod:: __init__ 17 | .. automethod:: __call__ 18 | {% endblock %} 19 | 20 | .. include:: {{module}}.{{objname}}.examples 21 | 22 | .. raw:: html 23 | 24 |
25 | -------------------------------------------------------------------------------- /sklearn/manifold/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.manifold` module implements data embedding techniques. 3 | """ 4 | 5 | from .locally_linear import locally_linear_embedding, LocallyLinearEmbedding 6 | from .isomap import Isomap 7 | from .mds import MDS, smacof 8 | from .spectral_embedding_ import SpectralEmbedding, spectral_embedding 9 | from .t_sne import TSNE 10 | 11 | __all__ = ['locally_linear_embedding', 'LocallyLinearEmbedding', 'Isomap', 12 | 'MDS', 'smacof', 'SpectralEmbedding', 'spectral_embedding', "TSNE"] 13 | -------------------------------------------------------------------------------- /sklearn/externals/joblib/_compat.py: -------------------------------------------------------------------------------- 1 | """ 2 | Compatibility layer for Python 3/Python 2 single codebase 3 | """ 4 | import sys 5 | 6 | PY3_OR_LATER = sys.version_info[0] >= 3 7 | PY27 = sys.version_info[:2] == (2, 7) 8 | 9 | try: 10 | _basestring = basestring 11 | _bytes_or_unicode = (str, unicode) 12 | except NameError: 13 | _basestring = str 14 | _bytes_or_unicode = (bytes, str) 15 | 16 | 17 | def with_metaclass(meta, *bases): 18 | """Create a base class with a metaclass.""" 19 | return meta("NewBase", bases, {}) 20 | -------------------------------------------------------------------------------- /sklearn/externals/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | def configuration(parent_package='', top_path=None): 5 | from numpy.distutils.misc_util import Configuration 6 | config = Configuration('externals', parent_package, top_path) 7 | config.add_subpackage('joblib') 8 | config.add_subpackage('joblib/externals') 9 | config.add_subpackage('joblib/externals/loky') 10 | config.add_subpackage('joblib/externals/loky/backend') 11 | config.add_subpackage('joblib/externals/cloudpickle') 12 | 13 | return config 14 | -------------------------------------------------------------------------------- /sklearn/tree/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.tree` module includes decision tree-based models for 3 | classification and regression. 4 | """ 5 | 6 | from .tree import DecisionTreeClassifier 7 | from .tree import DecisionTreeRegressor 8 | from .tree import ExtraTreeClassifier 9 | from .tree import ExtraTreeRegressor 10 | from .export import export_graphviz, plot_tree 11 | 12 | __all__ = ["DecisionTreeClassifier", "DecisionTreeRegressor", 13 | "ExtraTreeClassifier", "ExtraTreeRegressor", "export_graphviz", 14 | "plot_tree"] 15 | -------------------------------------------------------------------------------- /sklearn/neighbors/typedefs.pxd: -------------------------------------------------------------------------------- 1 | #!python 2 | cimport numpy as np 3 | 4 | # Floating point/data type 5 | ctypedef np.float64_t DTYPE_t # WARNING: should match DTYPE in typedefs.pyx 6 | 7 | cdef enum: 8 | DTYPECODE = np.NPY_FLOAT64 9 | ITYPECODE = np.NPY_INTP 10 | 11 | # Index/integer type. 12 | # WARNING: ITYPE_t must be a signed integer type or you will have a bad time! 13 | ctypedef np.intp_t ITYPE_t # WARNING: should match ITYPE in typedefs.pyx 14 | 15 | # Fused type for certain operations 16 | ctypedef fused DITYPE_t: 17 | ITYPE_t 18 | DTYPE_t 19 | -------------------------------------------------------------------------------- /sklearn/externals/joblib/externals/loky/backend/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from .context import get_context 5 | 6 | LOKY_PICKLER = os.environ.get("LOKY_PICKLER") 7 | 8 | if sys.version_info > (3, 4): 9 | 10 | def _make_name(): 11 | name = '/loky-%i-%s' % (os.getpid(), next(synchronize.SemLock._rand)) 12 | return name 13 | 14 | # monkey patch the name creation for multiprocessing 15 | from multiprocessing import synchronize 16 | synchronize.SemLock._make_name = staticmethod(_make_name) 17 | 18 | __all__ = ["get_context"] 19 | -------------------------------------------------------------------------------- /sklearn/src/cblas/atlas_ptalias2.h: -------------------------------------------------------------------------------- 1 | #ifndef ATLAS_PTALIAS2_H 2 | #define ATLAS_PTALIAS2_H 3 | /* 4 | * Real BLAS 5 | */ 6 | #define ATL_sger ATL_stger 7 | #define ATL_sgemv ATL_stgemv 8 | 9 | #define ATL_dger ATL_dtger 10 | #define ATL_dgemv ATL_dtgemv 11 | 12 | /* 13 | * Complex BLAS 14 | */ 15 | #define ATL_cgemv ATL_ctgemv 16 | #define ATL_cgerc ATL_ctgerc 17 | #define ATL_cgeru ATL_ctgeru 18 | 19 | #define ATL_zgemv ATL_ztgemv 20 | #define ATL_zgerc ATL_ztgerc 21 | #define ATL_zgeru ATL_ztgeru 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /sklearn/ensemble/setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numpy.distutils.misc_util import Configuration 3 | 4 | 5 | def configuration(parent_package="", top_path=None): 6 | config = Configuration("ensemble", parent_package, top_path) 7 | config.add_extension("_gradient_boosting", 8 | sources=["_gradient_boosting.pyx"], 9 | include_dirs=[numpy.get_include()]) 10 | 11 | config.add_subpackage("tests") 12 | 13 | return config 14 | 15 | if __name__ == "__main__": 16 | from numpy.distutils.core import setup 17 | setup(**configuration().todict()) 18 | -------------------------------------------------------------------------------- /sklearn/tests/test_init.py: -------------------------------------------------------------------------------- 1 | # Basic unittests to test functioning of module's top-level 2 | 3 | from sklearn.utils.testing import assert_equal 4 | 5 | __author__ = 'Yaroslav Halchenko' 6 | __license__ = 'BSD' 7 | 8 | 9 | try: 10 | from sklearn import * # noqa 11 | _top_import_error = None 12 | except Exception as e: 13 | _top_import_error = e 14 | 15 | 16 | def test_import_skl(): 17 | # Test either above import has failed for some reason 18 | # "import *" is discouraged outside of the module level, hence we 19 | # rely on setting up the variable above 20 | assert_equal(_top_import_error, None) 21 | -------------------------------------------------------------------------------- /sklearn/utils/bench.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions for benchmarking 3 | """ 4 | 5 | 6 | def total_seconds(delta): 7 | """ 8 | helper function to emulate function total_seconds, 9 | introduced in python2.7 10 | 11 | https://docs.python.org/library/datetime.html\ 12 | #datetime.timedelta.total_seconds 13 | 14 | Parameters 15 | ---------- 16 | delta : datetime object 17 | 18 | Returns 19 | ------- 20 | int 21 | The number of seconds contained in delta 22 | """ 23 | 24 | mu_sec = 1e-6 # number of seconds in one microseconds 25 | 26 | return delta.seconds + delta.microseconds * mu_sec 27 | -------------------------------------------------------------------------------- /sklearn/utils/fast_dict.pxd: -------------------------------------------------------------------------------- 1 | """ 2 | Uses C++ map containers for fast dict-like behavior with keys being 3 | integers, and values float. 4 | """ 5 | # Author: Gael Varoquaux 6 | # License: BSD 7 | 8 | from libcpp.map cimport map as cpp_map 9 | 10 | # Import the C-level symbols of numpy 11 | cimport numpy as np 12 | 13 | ctypedef np.float64_t DTYPE_t 14 | 15 | ctypedef np.intp_t ITYPE_t 16 | 17 | ############################################################################### 18 | # An object to be used in Python 19 | 20 | cdef class IntFloatDict: 21 | cdef cpp_map[ITYPE_t, DTYPE_t] my_map 22 | cdef _to_arrays(self, ITYPE_t [:] keys, DTYPE_t [:] values) 23 | -------------------------------------------------------------------------------- /sklearn/__check_build/setup.py: -------------------------------------------------------------------------------- 1 | # Author: Virgile Fritsch 2 | # License: BSD 3 clause 3 | 4 | import numpy 5 | 6 | 7 | def configuration(parent_package='', top_path=None): 8 | from numpy.distutils.misc_util import Configuration 9 | config = Configuration('__check_build', parent_package, top_path) 10 | config.add_extension('_check_build', 11 | sources=['_check_build.pyx'], 12 | include_dirs=[numpy.get_include()]) 13 | 14 | return config 15 | 16 | if __name__ == '__main__': 17 | from numpy.distutils.core import setup 18 | setup(**configuration(top_path='').todict()) 19 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test = pytest 3 | 4 | [tool:pytest] 5 | # disable-pytest-warnings should be removed once we rewrite tests 6 | # using yield with parametrize 7 | addopts = 8 | --ignore build_tools 9 | --ignore benchmarks 10 | --ignore doc 11 | --ignore examples 12 | --doctest-modules 13 | --disable-pytest-warnings 14 | -rs 15 | 16 | [wheelhouse_uploader] 17 | artifact_indexes= 18 | # Wheels built by travis (only for specific tags): 19 | # https://github.com/MacPython/scikit-learn-wheels 20 | http://wheels.scipy.org 21 | 22 | [flake8] 23 | # Default flake8 3.5 ignored flags 24 | ignore=E121,E123,E126,E226,E24,E704,W503,W504 25 | -------------------------------------------------------------------------------- /sklearn/preprocessing/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def configuration(parent_package='', top_path=None): 5 | import numpy 6 | from numpy.distutils.misc_util import Configuration 7 | 8 | config = Configuration('preprocessing', parent_package, top_path) 9 | libraries = [] 10 | if os.name == 'posix': 11 | libraries.append('m') 12 | 13 | config.add_extension('_csr_polynomial_expansion', 14 | sources=['_csr_polynomial_expansion.pyx'], 15 | include_dirs=[numpy.get_include()], 16 | libraries=libraries) 17 | 18 | config.add_subpackage('tests') 19 | 20 | return config 21 | -------------------------------------------------------------------------------- /sklearn/src/cblas/README.txt: -------------------------------------------------------------------------------- 1 | This is a stripped-down version of CBLAS (C-interface to the Basic Linear 2 | Algebra Subroutines), containing only those parts used by scikit-learn's 3 | C/C++/Cython extensions. It is used when no CBLAS implementation is available 4 | at build time. 5 | 6 | Sources here are taken from the reference implementation in ATLAS. To add new 7 | algorithms, the only thing that should be done is to copy the reference 8 | implementation from ${ATLAS}/src/blas/reference/level* into this directory. 9 | 10 | Header files are taken from ${ATLAS}/include, the only change being the 11 | inclusion of "atlas_refalias*.h" into its respective "atlas_level*.h" file. 12 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/.gitignore: -------------------------------------------------------------------------------- 1 | # cruft 2 | .*.swp 3 | *.pyc 4 | .DS_Store 5 | *.pdf 6 | 7 | # folder to be used for working on the exercises 8 | workspace 9 | 10 | # output of the sphinx build of the documentation 11 | tutorial/_build 12 | 13 | # datasets to be fetched from the web and cached locally 14 | data/twenty_newsgroups/20news-bydate.tar.gz 15 | data/twenty_newsgroups/20news-bydate-train 16 | data/twenty_newsgroups/20news-bydate-test 17 | 18 | data/movie_reviews/txt_sentoken 19 | data/movie_reviews/poldata.README.2.0 20 | 21 | data/languages/paragraphs 22 | data/languages/short_paragraphs 23 | data/languages/html 24 | 25 | data/labeled_faces_wild/lfw_preprocessed/ 26 | -------------------------------------------------------------------------------- /sklearn/datasets/descr/linnerud.rst: -------------------------------------------------------------------------------- 1 | .. _linnerrud_dataset: 2 | 3 | Linnerrud dataset 4 | ----------------- 5 | 6 | **Data Set Characteristics:** 7 | 8 | :Number of Instances: 20 9 | :Number of Attributes: 3 10 | :Missing Attribute Values: None 11 | 12 | The Linnerud dataset constains two small dataset: 13 | 14 | - *physiological* - CSV containing 20 observations on 3 exercise variables: 15 | Weight, Waist and Pulse. 16 | 17 | - *exercise* - CSV containing 20 observations on 3 physiological variables: 18 | Chins, Situps and Jumps. 19 | 20 | .. topic:: References 21 | 22 | * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris: Editions Technic. 23 | -------------------------------------------------------------------------------- /doc/preface.rst: -------------------------------------------------------------------------------- 1 | .. This helps define the TOC ordering for "about us" sections. Particularly 2 | useful for PDF output as this section is not linked from elsewhere. 3 | 4 | .. _preface_menu: 5 | 6 | .. include:: includes/big_toc_css.rst 7 | .. include:: tune_toc.rst 8 | 9 | .. top level heading needed for LaTeX TOC in sphinx<=1.3.1 10 | 11 | ************ 12 | scikit-learn 13 | ************ 14 | 15 | ======================= 16 | Welcome to scikit-learn 17 | ======================= 18 | 19 | | 20 | 21 | .. toctree:: 22 | :maxdepth: 2 23 | 24 | install 25 | faq 26 | support 27 | related_projects 28 | about 29 | testimonials/testimonials 30 | whats_new 31 | 32 | | 33 | -------------------------------------------------------------------------------- /doc/supervised_learning.rst: -------------------------------------------------------------------------------- 1 | .. include:: includes/big_toc_css.rst 2 | 3 | .. _supervised-learning: 4 | 5 | Supervised learning 6 | ----------------------- 7 | 8 | .. toctree:: 9 | 10 | modules/linear_model 11 | modules/lda_qda.rst 12 | modules/kernel_ridge.rst 13 | modules/svm 14 | modules/sgd 15 | modules/neighbors 16 | modules/gaussian_process 17 | modules/cross_decomposition.rst 18 | modules/naive_bayes 19 | modules/tree 20 | modules/ensemble 21 | modules/multiclass 22 | modules/feature_selection.rst 23 | modules/label_propagation.rst 24 | modules/isotonic.rst 25 | modules/calibration.rst 26 | modules/neural_networks_supervised 27 | -------------------------------------------------------------------------------- /sklearn/mixture/tests/test_mixture.py: -------------------------------------------------------------------------------- 1 | # Author: Guillaume Lemaitre 2 | # License: BSD 3 clause 3 | 4 | import pytest 5 | import numpy as np 6 | 7 | from sklearn.mixture import GaussianMixture 8 | from sklearn.mixture import BayesianGaussianMixture 9 | 10 | 11 | @pytest.mark.parametrize( 12 | "estimator", 13 | [GaussianMixture(), 14 | BayesianGaussianMixture()] 15 | ) 16 | def test_gaussian_mixture_n_iter(estimator): 17 | # check that n_iter is the number of iteration performed. 18 | rng = np.random.RandomState(0) 19 | X = rng.rand(10, 5) 20 | max_iter = 1 21 | estimator.set_params(max_iter=max_iter) 22 | estimator.fit(X) 23 | assert estimator.n_iter_ == max_iter 24 | -------------------------------------------------------------------------------- /sklearn/utils/stats.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.utils.extmath import stable_cumsum 4 | 5 | 6 | def _weighted_percentile(array, sample_weight, percentile=50): 7 | """ 8 | Compute the weighted ``percentile`` of ``array`` with ``sample_weight``. 9 | """ 10 | sorted_idx = np.argsort(array) 11 | 12 | # Find index of median prediction for each sample 13 | weight_cdf = stable_cumsum(sample_weight[sorted_idx]) 14 | percentile_idx = np.searchsorted( 15 | weight_cdf, (percentile / 100.) * weight_cdf[-1]) 16 | # in rare cases, percentile_idx equals to len(sorted_idx) 17 | percentile_idx = np.clip(percentile_idx, 0, len(sorted_idx)-1) 18 | return array[sorted_idx[percentile_idx]] 19 | -------------------------------------------------------------------------------- /sklearn/feature_extraction/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | 4 | 5 | def configuration(parent_package='', top_path=None): 6 | import numpy 7 | from numpy.distutils.misc_util import Configuration 8 | 9 | config = Configuration('feature_extraction', parent_package, top_path) 10 | libraries = [] 11 | if os.name == 'posix': 12 | libraries.append('m') 13 | 14 | if platform.python_implementation() != 'PyPy': 15 | config.add_extension('_hashing', 16 | sources=['_hashing.pyx'], 17 | include_dirs=[numpy.get_include()], 18 | libraries=libraries) 19 | config.add_subpackage("tests") 20 | 21 | return config 22 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | # Commits pushed to master should not make the overall 8 | # project coverage decrease by more than 1%: 9 | target: auto 10 | threshold: 1% 11 | patch: 12 | default: 13 | # Be tolerant on slight code coverage diff on PRs to limit 14 | # noisy red coverage status on github PRs. 15 | # Note The coverage stats are still uploaded 16 | # to codecov so that PR reviewers can see uncovered lines 17 | # in the github diff if they install the codecov browser 18 | # extension: 19 | # https://github.com/codecov/browser-extension 20 | target: auto 21 | threshold: 1% 22 | 23 | -------------------------------------------------------------------------------- /sklearn/model_selection/tests/common.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common utilities for testing model selection. 3 | """ 4 | 5 | import numpy as np 6 | 7 | from sklearn.model_selection import KFold 8 | 9 | 10 | class OneTimeSplitter: 11 | """A wrapper to make KFold single entry cv iterator""" 12 | def __init__(self, n_splits=4, n_samples=99): 13 | self.n_splits = n_splits 14 | self.n_samples = n_samples 15 | self.indices = iter(KFold(n_splits=n_splits).split(np.ones(n_samples))) 16 | 17 | def split(self, X=None, y=None, groups=None): 18 | """Split can be called only once""" 19 | for index in self.indices: 20 | yield index 21 | 22 | def get_n_splits(self, X=None, y=None, groups=None): 23 | return self.n_splits 24 | -------------------------------------------------------------------------------- /sklearn/gaussian_process/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Author: Jan Hendrik Metzen 4 | # Vincent Dubourg 5 | # (mostly translation, see implementation details) 6 | # License: BSD 3 clause 7 | 8 | """ 9 | The :mod:`sklearn.gaussian_process` module implements Gaussian Process 10 | based regression and classification. 11 | """ 12 | 13 | from .gpr import GaussianProcessRegressor 14 | from .gpc import GaussianProcessClassifier 15 | from . import kernels 16 | 17 | from . import correlation_models 18 | from . import regression_models 19 | 20 | __all__ = ['correlation_models', 'regression_models', 21 | 'GaussianProcessRegressor', 'GaussianProcessClassifier', 22 | 'kernels'] 23 | -------------------------------------------------------------------------------- /sklearn/metrics/cluster/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy 4 | from numpy.distutils.misc_util import Configuration 5 | 6 | 7 | def configuration(parent_package="", top_path=None): 8 | config = Configuration("cluster", parent_package, top_path) 9 | libraries = [] 10 | if os.name == 'posix': 11 | libraries.append('m') 12 | config.add_extension("expected_mutual_info_fast", 13 | sources=["expected_mutual_info_fast.pyx"], 14 | include_dirs=[numpy.get_include()], 15 | libraries=libraries) 16 | 17 | config.add_subpackage("tests") 18 | 19 | return config 20 | 21 | 22 | if __name__ == "__main__": 23 | from numpy.distutils.core import setup 24 | setup(**configuration().todict()) 25 | -------------------------------------------------------------------------------- /sklearn/externals/joblib/externals/loky/backend/compat.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | ############################################################################### 3 | # Compat file to import the correct modules for each platform and python 4 | # version. 5 | # 6 | # author: Thomas Moreau and Olivier grisel 7 | # 8 | import sys 9 | 10 | if sys.version_info[:2] >= (3, 3): 11 | import queue 12 | else: 13 | import Queue as queue 14 | 15 | from pickle import PicklingError 16 | 17 | if sys.version_info >= (3, 4): 18 | from multiprocessing.process import BaseProcess 19 | else: 20 | from multiprocessing.process import Process as BaseProcess 21 | 22 | # Platform specific compat 23 | if sys.platform == "win32": 24 | from .compat_win32 import * 25 | else: 26 | from .compat_posix import * 27 | -------------------------------------------------------------------------------- /sklearn/datasets/images/README.txt: -------------------------------------------------------------------------------- 1 | Image: china.jpg 2 | Released under a creative commons license. [1] 3 | Attribution: Some rights reserved by danielbuechele [2] 4 | Retrieved 21st August, 2011 from [3] by Robert Layton 5 | 6 | [1] https://creativecommons.org/licenses/by/2.0/ 7 | [2] https://www.flickr.com/photos/danielbuechele/ 8 | [3] https://www.flickr.com/photos/danielbuechele/6061409035/sizes/z/in/photostream/ 9 | 10 | 11 | Image: flower.jpg 12 | Released under a creative commons license. [1] 13 | Attribution: Some rights reserved by danielbuechele [2] 14 | Retrieved 21st August, 2011 from [3] by Robert Layton 15 | 16 | [1] https://creativecommons.org/licenses/by/2.0/ 17 | [2] https://www.flickr.com/photos/vultilion/ 18 | [3] https://www.flickr.com/photos/vultilion/6056698931/sizes/z/in/photostream/ 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /sklearn/svm/src/liblinear/tron.h: -------------------------------------------------------------------------------- 1 | #ifndef _TRON_H 2 | #define _TRON_H 3 | 4 | class function 5 | { 6 | public: 7 | virtual double fun(double *w) = 0 ; 8 | virtual void grad(double *w, double *g) = 0 ; 9 | virtual void Hv(double *s, double *Hs) = 0 ; 10 | 11 | virtual int get_nr_variable(void) = 0 ; 12 | virtual ~function(void){} 13 | }; 14 | 15 | class TRON 16 | { 17 | public: 18 | TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000); 19 | ~TRON(); 20 | 21 | int tron(double *w); 22 | void set_print_string(void (*i_print) (const char *buf)); 23 | 24 | private: 25 | int trcg(double delta, double *g, double *s, double *r); 26 | double norm_inf(int n, double *x); 27 | 28 | double eps; 29 | int max_iter; 30 | function *fun_obj; 31 | void info(const char *fmt,...); 32 | void (*tron_print_string)(const char *buf); 33 | }; 34 | #endif 35 | -------------------------------------------------------------------------------- /sklearn/utils/_logistic_sigmoid.pyx: -------------------------------------------------------------------------------- 1 | #cython: boundscheck=False 2 | #cython: cdivision=True 3 | #cython: wraparound=False 4 | 5 | from libc.math cimport log, exp 6 | 7 | import numpy as np 8 | cimport numpy as np 9 | 10 | ctypedef np.float64_t DTYPE_t 11 | 12 | 13 | cdef DTYPE_t _inner_log_logistic_sigmoid(DTYPE_t x): 14 | """Log of the logistic sigmoid function log(1 / (1 + e ** -x))""" 15 | if x > 0: 16 | return -log(1 + exp(-x)) 17 | else: 18 | return x - log(1 + exp(x)) 19 | 20 | 21 | def _log_logistic_sigmoid(int n_samples, int n_features, 22 | np.ndarray[DTYPE_t, ndim=2] X, 23 | np.ndarray[DTYPE_t, ndim=2] out): 24 | for i in range(n_samples): 25 | for j in range(n_features): 26 | out[i, j] = _inner_log_logistic_sigmoid(X[i, j]) 27 | return out 28 | -------------------------------------------------------------------------------- /sklearn/neighbors/typedefs.pyx: -------------------------------------------------------------------------------- 1 | #!python 2 | 3 | import numpy as np 4 | cimport numpy as np 5 | from libc.math cimport sqrt 6 | 7 | # use a hack to determine the associated numpy data types 8 | # NOTE: the following requires the buffer interface, only available in 9 | # numpy 1.5+. We'll choose the DTYPE by hand instead. 10 | #cdef ITYPE_t idummy 11 | #cdef ITYPE_t[:] idummy_view = &idummy 12 | #ITYPE = np.asarray(idummy_view).dtype 13 | ITYPE = np.intp # WARNING: this should match ITYPE_t in typedefs.pxd 14 | 15 | #cdef DTYPE_t ddummy 16 | #cdef DTYPE_t[:] ddummy_view = &ddummy 17 | #DTYPE = np.asarray(ddummy_view).dtype 18 | DTYPE = np.float64 # WARNING: this should match DTYPE_t in typedefs.pxd 19 | 20 | # some handy constants 21 | cdef DTYPE_t INF = np.inf 22 | cdef DTYPE_t PI = np.pi 23 | cdef DTYPE_t ROOT_2PI = sqrt(2 * PI) 24 | -------------------------------------------------------------------------------- /sklearn/externals/copy_joblib.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Script to do a local install of joblib 3 | set +x 4 | export LC_ALL=C 5 | INSTALL_FOLDER=tmp/joblib_install 6 | rm -rf joblib $INSTALL_FOLDER 7 | if [ -z "$1" ] 8 | then 9 | JOBLIB=joblib 10 | else 11 | JOBLIB=$1 12 | fi 13 | 14 | pip install --no-cache $JOBLIB --target $INSTALL_FOLDER 15 | cp -r $INSTALL_FOLDER/joblib joblib 16 | rm -rf $INSTALL_FOLDER 17 | 18 | # Needed to rewrite the doctests 19 | # Note: BSD sed -i needs an argument unders OSX 20 | # so first renaming to .bak and then deleting backup files 21 | find joblib -name "*.py" | xargs sed -i.bak "s/from joblib/from sklearn.externals.joblib/" 22 | find joblib -name "*.bak" | xargs rm 23 | 24 | # Remove the tests folders to speed-up test time for scikit-learn. 25 | # joblib is already tested on its own CI infrastructure upstream. 26 | rm -r joblib/test 27 | -------------------------------------------------------------------------------- /doc/tutorial/index.rst: -------------------------------------------------------------------------------- 1 | .. _tutorial_menu: 2 | 3 | 4 | 5 | .. include:: ../includes/big_toc_css.rst 6 | .. include:: ../tune_toc.rst 7 | 8 | ====================== 9 | scikit-learn Tutorials 10 | ====================== 11 | 12 | | 13 | 14 | .. toctree:: 15 | :maxdepth: 2 16 | 17 | basic/tutorial.rst 18 | statistical_inference/index.rst 19 | text_analytics/working_with_text_data.rst 20 | machine_learning_map/index 21 | ../presentations 22 | 23 | | 24 | 25 | .. note:: **Doctest Mode** 26 | 27 | The code-examples in the above tutorials are written in a 28 | *python-console* format. If you wish to easily execute these examples 29 | in **IPython**, use:: 30 | 31 | %doctest_mode 32 | 33 | in the IPython-console. You can then simply copy and paste the examples 34 | directly into IPython without having to worry about removing the **>>>** 35 | manually. 36 | -------------------------------------------------------------------------------- /sklearn/datasets/setup.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy 3 | import os 4 | import platform 5 | 6 | 7 | def configuration(parent_package='', top_path=None): 8 | from numpy.distutils.misc_util import Configuration 9 | config = Configuration('datasets', parent_package, top_path) 10 | config.add_data_dir('data') 11 | config.add_data_dir('descr') 12 | config.add_data_dir('images') 13 | config.add_data_dir(os.path.join('tests', 'data')) 14 | if platform.python_implementation() != 'PyPy': 15 | config.add_extension('_svmlight_format', 16 | sources=['_svmlight_format.pyx'], 17 | include_dirs=[numpy.get_include()]) 18 | config.add_subpackage('tests') 19 | return config 20 | 21 | 22 | if __name__ == '__main__': 23 | from numpy.distutils.core import setup 24 | setup(**configuration(top_path='').todict()) 25 | -------------------------------------------------------------------------------- /sklearn/utils/_joblib.py: -------------------------------------------------------------------------------- 1 | # We need the absolute_import to avoid the local joblib to override the 2 | # site one 3 | from __future__ import absolute_import 4 | import os as _os 5 | import warnings as _warnings 6 | 7 | # An environment variable to use the site joblib 8 | if _os.environ.get('SKLEARN_SITE_JOBLIB', False): 9 | with _warnings.catch_warnings(): 10 | _warnings.simplefilter("ignore") 11 | # joblib imports may raise DeprecationWarning on certain Python 12 | # versions 13 | from joblib import __all__ 14 | from joblib import * # noqa 15 | from joblib import __version__ 16 | from joblib import logger 17 | else: 18 | from ..externals.joblib import __all__ # noqa 19 | from ..externals.joblib import * # noqa 20 | from ..externals.joblib import __version__ # noqa 21 | from ..externals.joblib import logger # noqa 22 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/test_california_housing.py: -------------------------------------------------------------------------------- 1 | """Test the california_housing loader. 2 | 3 | Skipped if california_housing is not already downloaded to data_home. 4 | """ 5 | 6 | from sklearn.datasets import fetch_california_housing 7 | from sklearn.utils.testing import SkipTest 8 | from sklearn.datasets.tests.test_common import check_return_X_y 9 | from functools import partial 10 | 11 | 12 | def fetch(*args, **kwargs): 13 | return fetch_california_housing(*args, download_if_missing=False, **kwargs) 14 | 15 | 16 | def test_fetch(): 17 | try: 18 | data = fetch() 19 | except IOError: 20 | raise SkipTest("California housing dataset can not be loaded.") 21 | assert((20640, 8) == data.data.shape) 22 | assert((20640, ) == data.target.shape) 23 | 24 | # test return_X_y option 25 | fetch_func = partial(fetch) 26 | check_return_X_y(data, fetch_func) 27 | -------------------------------------------------------------------------------- /doc/modules/isotonic.rst: -------------------------------------------------------------------------------- 1 | .. _isotonic: 2 | 3 | =================== 4 | Isotonic regression 5 | =================== 6 | 7 | .. currentmodule:: sklearn.isotonic 8 | 9 | The class :class:`IsotonicRegression` fits a non-decreasing function to data. 10 | It solves the following problem: 11 | 12 | minimize :math:`\sum_i w_i (y_i - \hat{y}_i)^2` 13 | 14 | subject to :math:`\hat{y}_{min} = \hat{y}_1 \le \hat{y}_2 ... \le \hat{y}_n = \hat{y}_{max}` 15 | 16 | where each :math:`w_i` is strictly positive and each :math:`y_i` is an 17 | arbitrary real number. It yields the vector which is composed of non-decreasing 18 | elements the closest in terms of mean squared error. In practice this list 19 | of elements forms a function that is piecewise linear. 20 | 21 | .. figure:: ../auto_examples/images/sphx_glr_plot_isotonic_regression_001.png 22 | :target: ../auto_examples/plot_isotonic_regression.html 23 | :align: center 24 | -------------------------------------------------------------------------------- /examples/decomposition/plot_beta_divergence.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================== 3 | Beta-divergence loss functions 4 | ============================== 5 | 6 | A plot that compares the various Beta-divergence loss functions supported by 7 | the Multiplicative-Update ('mu') solver in :class:`sklearn.decomposition.NMF`. 8 | """ 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | from sklearn.decomposition.nmf import _beta_divergence 12 | 13 | print(__doc__) 14 | 15 | x = np.linspace(0.001, 4, 1000) 16 | y = np.zeros(x.shape) 17 | 18 | colors = 'mbgyr' 19 | for j, beta in enumerate((0., 0.5, 1., 1.5, 2.)): 20 | for i, xi in enumerate(x): 21 | y[i] = _beta_divergence(1, xi, 1, beta) 22 | name = "beta = %1.1f" % beta 23 | plt.plot(x, y, label=name, color=colors[j]) 24 | 25 | plt.xlabel("x") 26 | plt.title("beta-divergence(1, x)") 27 | plt.legend(loc=0) 28 | plt.axis([0, 4, 0, 3]) 29 | plt.show() 30 | -------------------------------------------------------------------------------- /doc/includes/big_toc_css.rst: -------------------------------------------------------------------------------- 1 | .. 2 | File to ..include in a document with a big table of content, to give 3 | it 'style' 4 | 5 | .. raw:: html 6 | 7 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_optimize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.utils.optimize import newton_cg 4 | from scipy.optimize import fmin_ncg 5 | 6 | from sklearn.utils.testing import assert_array_almost_equal 7 | 8 | 9 | def test_newton_cg(): 10 | # Test that newton_cg gives same result as scipy's fmin_ncg 11 | 12 | rng = np.random.RandomState(0) 13 | A = rng.normal(size=(10, 10)) 14 | x0 = np.ones(10) 15 | 16 | def func(x): 17 | Ax = A.dot(x) 18 | return .5 * (Ax).dot(Ax) 19 | 20 | def grad(x): 21 | return A.T.dot(A.dot(x)) 22 | 23 | def hess(x, p): 24 | return p.dot(A.T.dot(A.dot(x.all()))) 25 | 26 | def grad_hess(x): 27 | return grad(x), lambda x: A.T.dot(A.dot(x)) 28 | 29 | assert_array_almost_equal( 30 | newton_cg(grad_hess, func, grad, x0, tol=1e-10)[0], 31 | fmin_ncg(f=func, x0=x0, fprime=grad, fhess_p=hess) 32 | ) 33 | -------------------------------------------------------------------------------- /sklearn/svm/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.svm` module includes Support Vector Machine algorithms. 3 | """ 4 | 5 | # See http://scikit-learn.sourceforge.net/modules/svm.html for complete 6 | # documentation. 7 | 8 | # Author: Fabian Pedregosa with help from 9 | # the scikit-learn community. LibSVM and LibLinear are copyright 10 | # of their respective owners. 11 | # License: BSD 3 clause (C) INRIA 2010 12 | 13 | from .classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, \ 14 | LinearSVR 15 | from .bounds import l1_min_c 16 | from . import libsvm, liblinear, libsvm_sparse 17 | 18 | __all__ = ['LinearSVC', 19 | 'LinearSVR', 20 | 'NuSVC', 21 | 'NuSVR', 22 | 'OneClassSVM', 23 | 'SVC', 24 | 'SVR', 25 | 'l1_min_c', 26 | 'liblinear', 27 | 'libsvm', 28 | 'libsvm_sparse'] 29 | -------------------------------------------------------------------------------- /sklearn/linear_model/sgd_fast.pxd: -------------------------------------------------------------------------------- 1 | """Helper to load LossFunction from sgd_fast.pyx to sag_fast.pyx""" 2 | # License: BSD 3 clause 3 | 4 | cdef class LossFunction: 5 | cdef double loss(self, double p, double y) nogil 6 | cdef double _dloss(self, double p, double y) nogil 7 | 8 | 9 | cdef class Regression(LossFunction): 10 | cdef double loss(self, double p, double y) nogil 11 | cdef double _dloss(self, double p, double y) nogil 12 | 13 | 14 | cdef class Classification(LossFunction): 15 | cdef double loss(self, double p, double y) nogil 16 | cdef double _dloss(self, double p, double y) nogil 17 | 18 | 19 | cdef class Log(Classification): 20 | cdef double loss(self, double p, double y) nogil 21 | cdef double _dloss(self, double p, double y) nogil 22 | 23 | 24 | cdef class SquaredLoss(Regression): 25 | cdef double loss(self, double p, double y) nogil 26 | cdef double _dloss(self, double p, double y) nogil 27 | -------------------------------------------------------------------------------- /sklearn/utils/murmurhash.pxd: -------------------------------------------------------------------------------- 1 | """Export fast murmurhash C/C++ routines + cython wrappers""" 2 | 3 | cimport numpy as np 4 | 5 | # The C API is disabled for now, since it requires -I flags to get 6 | # compilation to work even when these functions are not used. 7 | #cdef extern from "MurmurHash3.h": 8 | # void MurmurHash3_x86_32(void* key, int len, unsigned int seed, 9 | # void* out) 10 | # 11 | # void MurmurHash3_x86_128(void* key, int len, unsigned int seed, 12 | # void* out) 13 | # 14 | # void MurmurHash3_x64_128(void* key, int len, unsigned int seed, 15 | # void* out) 16 | 17 | 18 | cpdef np.uint32_t murmurhash3_int_u32(int key, unsigned int seed) 19 | cpdef np.int32_t murmurhash3_int_s32(int key, unsigned int seed) 20 | cpdef np.uint32_t murmurhash3_bytes_u32(bytes key, unsigned int seed) 21 | cpdef np.int32_t murmurhash3_bytes_s32(bytes key, unsigned int seed) 22 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_show_versions.py: -------------------------------------------------------------------------------- 1 | 2 | from sklearn.utils._show_versions import _get_sys_info 3 | from sklearn.utils._show_versions import _get_deps_info 4 | from sklearn.utils._show_versions import show_versions 5 | 6 | 7 | def test_get_sys_info(): 8 | sys_info = _get_sys_info() 9 | 10 | assert 'python' in sys_info 11 | assert 'executable' in sys_info 12 | assert 'machine' in sys_info 13 | 14 | 15 | def test_get_deps_info(): 16 | deps_info = _get_deps_info() 17 | 18 | assert 'pip' in deps_info 19 | assert 'setuptools' in deps_info 20 | assert 'sklearn' in deps_info 21 | assert 'numpy' in deps_info 22 | assert 'scipy' in deps_info 23 | assert 'Cython' in deps_info 24 | assert 'pandas' in deps_info 25 | 26 | 27 | def test_show_versions_with_blas(capsys): 28 | show_versions() 29 | out, err = capsys.readouterr() 30 | assert 'python' in out 31 | assert 'numpy' in out 32 | assert 'BLAS' in out 33 | -------------------------------------------------------------------------------- /examples/model_selection/plot_cv_predict.py: -------------------------------------------------------------------------------- 1 | """ 2 | ==================================== 3 | Plotting Cross-Validated Predictions 4 | ==================================== 5 | 6 | This example shows how to use `cross_val_predict` to visualize prediction 7 | errors. 8 | 9 | """ 10 | from sklearn import datasets 11 | from sklearn.model_selection import cross_val_predict 12 | from sklearn import linear_model 13 | import matplotlib.pyplot as plt 14 | 15 | lr = linear_model.LinearRegression() 16 | boston = datasets.load_boston() 17 | y = boston.target 18 | 19 | # cross_val_predict returns an array of the same size as `y` where each entry 20 | # is a prediction obtained by cross validation: 21 | predicted = cross_val_predict(lr, boston.data, y, cv=10) 22 | 23 | fig, ax = plt.subplots() 24 | ax.scatter(y, predicted, edgecolors=(0, 0, 0)) 25 | ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) 26 | ax.set_xlabel('Measured') 27 | ax.set_ylabel('Predicted') 28 | plt.show() 29 | -------------------------------------------------------------------------------- /benchmarks/plot_tsne_mnist.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import os.path as op 4 | 5 | import argparse 6 | 7 | 8 | LOG_DIR = "mnist_tsne_output" 9 | 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser('Plot benchmark results for t-SNE') 13 | parser.add_argument( 14 | '--labels', type=str, 15 | default=op.join(LOG_DIR, 'mnist_original_labels_10000.npy'), 16 | help='1D integer numpy array for labels') 17 | parser.add_argument( 18 | '--embedding', type=str, 19 | default=op.join(LOG_DIR, 'mnist_sklearn_TSNE_10000.npy'), 20 | help='2D float numpy array for embedded data') 21 | args = parser.parse_args() 22 | 23 | X = np.load(args.embedding) 24 | y = np.load(args.labels) 25 | 26 | for i in np.unique(y): 27 | mask = y == i 28 | plt.scatter(X[mask, 0], X[mask, 1], alpha=0.2, label=int(i)) 29 | plt.legend(loc='best') 30 | plt.show() 31 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/data/movie_reviews/fetch_data.py: -------------------------------------------------------------------------------- 1 | """Script to download the movie review dataset""" 2 | 3 | import os 4 | import tarfile 5 | from contextlib import closing 6 | try: 7 | from urllib import urlopen 8 | except ImportError: 9 | from urllib.request import urlopen 10 | 11 | 12 | URL = ("http://www.cs.cornell.edu/people/pabo/" 13 | "movie-review-data/review_polarity.tar.gz") 14 | 15 | ARCHIVE_NAME = URL.rsplit('/', 1)[1] 16 | DATA_FOLDER = "txt_sentoken" 17 | 18 | 19 | if not os.path.exists(DATA_FOLDER): 20 | 21 | if not os.path.exists(ARCHIVE_NAME): 22 | print("Downloading dataset from %s (3 MB)" % URL) 23 | opener = urlopen(URL) 24 | with open(ARCHIVE_NAME, 'wb') as archive: 25 | archive.write(opener.read()) 26 | 27 | print("Decompressing %s" % ARCHIVE_NAME) 28 | with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive: 29 | archive.extractall(path='.') 30 | os.remove(ARCHIVE_NAME) 31 | -------------------------------------------------------------------------------- /sklearn/cluster/tests/common.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common utilities for testing clustering. 3 | 4 | """ 5 | 6 | import numpy as np 7 | 8 | 9 | ############################################################################### 10 | # Generate sample data 11 | 12 | def generate_clustered_data(seed=0, n_clusters=3, n_features=2, 13 | n_samples_per_cluster=20, std=.4): 14 | prng = np.random.RandomState(seed) 15 | 16 | # the data is voluntary shifted away from zero to check clustering 17 | # algorithm robustness with regards to non centered data 18 | means = np.array([[1, 1, 1, 0], 19 | [-1, -1, 0, 1], 20 | [1, -1, 1, 1], 21 | [-1, 1, 1, 0], 22 | ]) + 10 23 | 24 | X = np.empty((0, n_features)) 25 | for i in range(n_clusters): 26 | X = np.r_[X, means[i][:n_features] 27 | + std * prng.randn(n_samples_per_cluster, n_features)] 28 | return X 29 | -------------------------------------------------------------------------------- /sklearn/decomposition/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy 3 | from numpy.distutils.misc_util import Configuration 4 | 5 | 6 | def configuration(parent_package="", top_path=None): 7 | config = Configuration("decomposition", parent_package, top_path) 8 | 9 | libraries = [] 10 | if os.name == 'posix': 11 | libraries.append('m') 12 | 13 | config.add_extension("_online_lda", 14 | sources=["_online_lda.pyx"], 15 | include_dirs=[numpy.get_include()], 16 | libraries=libraries) 17 | 18 | config.add_extension('cdnmf_fast', 19 | sources=['cdnmf_fast.pyx'], 20 | include_dirs=[numpy.get_include()], 21 | libraries=libraries) 22 | 23 | config.add_subpackage("tests") 24 | 25 | return config 26 | 27 | if __name__ == "__main__": 28 | from numpy.distutils.core import setup 29 | setup(**configuration().todict()) 30 | -------------------------------------------------------------------------------- /sklearn/utils/weight_vector.pxd: -------------------------------------------------------------------------------- 1 | """Efficient (dense) parameter vector implementation for linear models. """ 2 | 3 | cimport numpy as np 4 | 5 | 6 | cdef extern from "math.h": 7 | cdef extern double sqrt(double x) 8 | 9 | 10 | cdef class WeightVector(object): 11 | cdef np.ndarray w 12 | cdef np.ndarray aw 13 | cdef double *w_data_ptr 14 | cdef double *aw_data_ptr 15 | cdef double wscale 16 | cdef double average_a 17 | cdef double average_b 18 | cdef int n_features 19 | cdef double sq_norm 20 | 21 | cdef void add(self, double *x_data_ptr, int *x_ind_ptr, 22 | int xnnz, double c) nogil 23 | cdef void add_average(self, double *x_data_ptr, int *x_ind_ptr, 24 | int xnnz, double c, double num_iter) nogil 25 | cdef double dot(self, double *x_data_ptr, int *x_ind_ptr, 26 | int xnnz) nogil 27 | cdef void scale(self, double c) nogil 28 | cdef void reset_wscale(self) nogil 29 | cdef double norm(self) nogil 30 | -------------------------------------------------------------------------------- /sklearn/externals/joblib/externals/loky/__init__.py: -------------------------------------------------------------------------------- 1 | r"""The :mod:`loky` module manages a pool of worker that can be re-used across time. 2 | It provides a robust and dynamic implementation os the 3 | :class:`ProcessPoolExecutor` and a function :func:`get_reusable_executor` which 4 | hide the pool management under the hood. 5 | """ 6 | from ._base import Executor, Future 7 | from ._base import wait, as_completed 8 | from ._base import TimeoutError, CancelledError 9 | from ._base import ALL_COMPLETED, FIRST_COMPLETED, FIRST_EXCEPTION 10 | 11 | from .backend.context import cpu_count 12 | from .reusable_executor import get_reusable_executor 13 | from .process_executor import BrokenProcessPool, ProcessPoolExecutor 14 | 15 | 16 | __all__ = ["get_reusable_executor", "cpu_count", "wait", "as_completed", 17 | "Future", "Executor", "ProcessPoolExecutor", 18 | "BrokenProcessPool", "CancelledError", "TimeoutError", 19 | "FIRST_COMPLETED", "FIRST_EXCEPTION", "ALL_COMPLETED", ] 20 | 21 | 22 | __version__ = '2.3.1' 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.so 3 | *.pyd 4 | *~ 5 | .#* 6 | *.lprof 7 | *.swp 8 | *.swo 9 | .DS_Store 10 | build 11 | sklearn/datasets/__config__.py 12 | sklearn/**/*.html 13 | 14 | dist/ 15 | MANIFEST 16 | doc/_build/ 17 | doc/auto_examples/ 18 | doc/modules/generated/ 19 | doc/datasets/generated/ 20 | *.pdf 21 | pip-log.txt 22 | scikit_learn.egg-info/ 23 | .coverage 24 | coverage 25 | *.py,cover 26 | .tags* 27 | tags 28 | covtype.data.gz 29 | 20news-18828/ 30 | 20news-18828.tar.gz 31 | coverages.zip 32 | samples.zip 33 | doc/coverages.zip 34 | doc/samples.zip 35 | coverages 36 | samples 37 | doc/coverages 38 | doc/samples 39 | *.prof 40 | .tox/ 41 | .coverage 42 | 43 | lfw_preprocessed/ 44 | nips2010_pdf/ 45 | 46 | *.nt.bz2 47 | *.tar.gz 48 | *.tgz 49 | 50 | examples/cluster/joblib 51 | reuters/ 52 | benchmarks/bench_covertype_data/ 53 | 54 | *.prefs 55 | .pydevproject 56 | .idea 57 | .vscode 58 | 59 | *.c 60 | *.cpp 61 | 62 | !*/src/*.c 63 | !*/src/*.cpp 64 | *.sln 65 | *.pyproj 66 | 67 | # Used by py.test 68 | .cache 69 | .pytest_cache/ 70 | _configtest.o.d 71 | -------------------------------------------------------------------------------- /doc/whats_new.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: sklearn 2 | .. include:: includes/big_toc_css.rst 3 | .. include:: whats_new/_contributors.rst 4 | 5 | Release History 6 | =============== 7 | 8 | Release notes for current and recent releases are detailed on this page, with 9 | :ref:`previous releases ` linked below. 10 | 11 | **Tip:** `Subscribe to scikit-learn releases `__ 12 | on libraries.io to be notified when new versions are released. 13 | 14 | .. include:: whats_new/v0.21.rst 15 | .. include:: whats_new/v0.20.rst 16 | 17 | .. _previous_releases_whats_new: 18 | 19 | Previous Releases 20 | ================= 21 | .. toctree:: 22 | :maxdepth: 1 23 | 24 | Version 0.19 25 | Version 0.18 26 | Version 0.17 27 | Version 0.16 28 | Version 0.15 29 | Version 0.14 30 | Version 0.13 31 | Older Versions 32 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/data/twenty_newsgroups/fetch_data.py: -------------------------------------------------------------------------------- 1 | """Script to download the 20 newsgroups text classification set""" 2 | 3 | import os 4 | import tarfile 5 | from contextlib import closing 6 | 7 | try: 8 | from urllib import urlopen 9 | except ImportError: 10 | from urllib.request import urlopen 11 | 12 | URL = ("http://people.csail.mit.edu/jrennie/" 13 | "20Newsgroups/20news-bydate.tar.gz") 14 | 15 | ARCHIVE_NAME = URL.rsplit('/', 1)[1] 16 | TRAIN_FOLDER = "20news-bydate-train" 17 | TEST_FOLDER = "20news-bydate-test" 18 | 19 | 20 | if not os.path.exists(TRAIN_FOLDER) or not os.path.exists(TEST_FOLDER): 21 | 22 | if not os.path.exists(ARCHIVE_NAME): 23 | print("Downloading dataset from %s (14 MB)" % URL) 24 | opener = urlopen(URL) 25 | with open(ARCHIVE_NAME, 'wb') as archive: 26 | archive.write(opener.read()) 27 | 28 | print("Decompressing %s" % ARCHIVE_NAME) 29 | with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive: 30 | archive.extractall(path='.') 31 | os.remove(ARCHIVE_NAME) 32 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_fast_dict.py: -------------------------------------------------------------------------------- 1 | """ Test fast_dict. 2 | """ 3 | import numpy as np 4 | 5 | from sklearn.utils.fast_dict import IntFloatDict, argmin 6 | from sklearn.utils.testing import assert_equal 7 | from sklearn.externals.six.moves import xrange 8 | 9 | 10 | def test_int_float_dict(): 11 | rng = np.random.RandomState(0) 12 | keys = np.unique(rng.randint(100, size=10).astype(np.intp)) 13 | values = rng.rand(len(keys)) 14 | 15 | d = IntFloatDict(keys, values) 16 | for key, value in zip(keys, values): 17 | assert_equal(d[key], value) 18 | assert_equal(len(d), len(keys)) 19 | 20 | d.append(120, 3.) 21 | assert_equal(d[120], 3.0) 22 | assert_equal(len(d), len(keys) + 1) 23 | for i in xrange(2000): 24 | d.append(i + 1000, 4.0) 25 | assert_equal(d[1100], 4.0) 26 | 27 | 28 | def test_int_float_dict_argmin(): 29 | # Test the argmin implementation on the IntFloatDict 30 | keys = np.arange(100, dtype=np.intp) 31 | values = np.arange(100, dtype=np.float64) 32 | d = IntFloatDict(keys, values) 33 | assert_equal(argmin(d), (0, 0)) 34 | -------------------------------------------------------------------------------- /examples/feature_selection/plot_rfe_digits.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================= 3 | Recursive feature elimination 4 | ============================= 5 | 6 | A recursive feature elimination example showing the relevance of pixels in 7 | a digit classification task. 8 | 9 | .. note:: 10 | 11 | See also :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py` 12 | 13 | """ 14 | print(__doc__) 15 | 16 | from sklearn.svm import SVC 17 | from sklearn.datasets import load_digits 18 | from sklearn.feature_selection import RFE 19 | import matplotlib.pyplot as plt 20 | 21 | # Load the digits dataset 22 | digits = load_digits() 23 | X = digits.images.reshape((len(digits.images), -1)) 24 | y = digits.target 25 | 26 | # Create the RFE object and rank each pixel 27 | svc = SVC(kernel="linear", C=1) 28 | rfe = RFE(estimator=svc, n_features_to_select=1, step=1) 29 | rfe.fit(X, y) 30 | ranking = rfe.ranking_.reshape(digits.images[0].shape) 31 | 32 | # Plot pixel ranking 33 | plt.matshow(ranking, cmap=plt.cm.Blues) 34 | plt.colorbar() 35 | plt.title("Ranking of pixels with RFE") 36 | plt.show() 37 | -------------------------------------------------------------------------------- /examples/datasets/plot_digits_last_image.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | ========================================================= 6 | The Digit Dataset 7 | ========================================================= 8 | 9 | This dataset is made up of 1797 8x8 images. Each image, 10 | like the one shown below, is of a hand-written digit. 11 | In order to utilize an 8x8 figure like this, we'd have to 12 | first transform it into a feature vector with length 64. 13 | 14 | See `here 15 | `_ 16 | for more information about this dataset. 17 | """ 18 | print(__doc__) 19 | 20 | 21 | # Code source: Gaël Varoquaux 22 | # Modified for documentation by Jaques Grobler 23 | # License: BSD 3 clause 24 | 25 | from sklearn import datasets 26 | 27 | import matplotlib.pyplot as plt 28 | 29 | #Load the digits dataset 30 | digits = datasets.load_digits() 31 | 32 | #Display the first digit 33 | plt.figure(1, figsize=(3, 3)) 34 | plt.imshow(digits.images[-1], cmap=plt.cm.gray_r, interpolation='nearest') 35 | plt.show() 36 | -------------------------------------------------------------------------------- /sklearn/feature_selection/tests/test_variance_threshold.py: -------------------------------------------------------------------------------- 1 | from sklearn.utils.testing import (assert_array_equal, assert_equal, 2 | assert_raises) 3 | 4 | from scipy.sparse import bsr_matrix, csc_matrix, csr_matrix 5 | 6 | from sklearn.feature_selection import VarianceThreshold 7 | 8 | data = [[0, 1, 2, 3, 4], 9 | [0, 2, 2, 3, 5], 10 | [1, 1, 2, 4, 0]] 11 | 12 | 13 | def test_zero_variance(): 14 | # Test VarianceThreshold with default setting, zero variance. 15 | 16 | for X in [data, csr_matrix(data), csc_matrix(data), bsr_matrix(data)]: 17 | sel = VarianceThreshold().fit(X) 18 | assert_array_equal([0, 1, 3, 4], sel.get_support(indices=True)) 19 | 20 | assert_raises(ValueError, VarianceThreshold().fit, [[0, 1, 2, 3]]) 21 | assert_raises(ValueError, VarianceThreshold().fit, [[0, 1], [0, 1]]) 22 | 23 | 24 | def test_variance_threshold(): 25 | # Test VarianceThreshold with custom variance. 26 | for X in [data, csr_matrix(data)]: 27 | X = VarianceThreshold(threshold=.4).fit_transform(X) 28 | assert_equal((len(data), 1), X.shape) 29 | -------------------------------------------------------------------------------- /sklearn/datasets/descr/covtype.rst: -------------------------------------------------------------------------------- 1 | .. _covtype_dataset: 2 | 3 | Forest covertypes 4 | ----------------- 5 | 6 | The samples in this dataset correspond to 30×30m patches of forest in the US, 7 | collected for the task of predicting each patch's cover type, 8 | i.e. the dominant species of tree. 9 | There are seven covertypes, making this a multiclass classification problem. 10 | Each sample has 54 features, described on the 11 | `dataset's homepage `__. 12 | Some of the features are boolean indicators, 13 | while others are discrete or continuous measurements. 14 | 15 | **Data Set Characteristics:** 16 | 17 | ================= ============ 18 | Classes 7 19 | Samples total 581012 20 | Dimensionality 54 21 | Features int 22 | ================= ============ 23 | 24 | :func:`sklearn.datasets.fetch_covtype` will load the covertype dataset; 25 | it returns a dictionary-like object 26 | with the feature matrix in the ``data`` member 27 | and the target values in ``target``. 28 | The dataset will be downloaded from the web if necessary. 29 | -------------------------------------------------------------------------------- /sklearn/datasets/tests/test_covtype.py: -------------------------------------------------------------------------------- 1 | """Test the covtype loader. 2 | 3 | Skipped if covtype is not already downloaded to data_home. 4 | """ 5 | 6 | from sklearn.datasets import fetch_covtype 7 | from sklearn.utils.testing import assert_equal, SkipTest 8 | from sklearn.datasets.tests.test_common import check_return_X_y 9 | from functools import partial 10 | 11 | 12 | def fetch(*args, **kwargs): 13 | return fetch_covtype(*args, download_if_missing=False, **kwargs) 14 | 15 | 16 | def test_fetch(): 17 | try: 18 | data1 = fetch(shuffle=True, random_state=42) 19 | except IOError: 20 | raise SkipTest("Covertype dataset can not be loaded.") 21 | 22 | data2 = fetch(shuffle=True, random_state=37) 23 | 24 | X1, X2 = data1['data'], data2['data'] 25 | assert_equal((581012, 54), X1.shape) 26 | assert_equal(X1.shape, X2.shape) 27 | 28 | assert_equal(X1.sum(), X2.sum()) 29 | 30 | y1, y2 = data1['target'], data2['target'] 31 | assert_equal((X1.shape[0],), y1.shape) 32 | assert_equal((X1.shape[0],), y2.shape) 33 | 34 | # test return_X_y option 35 | fetch_func = partial(fetch) 36 | check_return_X_y(data1, fetch_func) 37 | -------------------------------------------------------------------------------- /doc/tutorial/text_analytics/solutions/generate_skeletons.py: -------------------------------------------------------------------------------- 1 | """Generate skeletons from the example code""" 2 | import os 3 | 4 | exercise_dir = os.path.dirname(__file__) 5 | if exercise_dir == '': 6 | exercise_dir = '.' 7 | 8 | skeleton_dir = os.path.abspath(os.path.join(exercise_dir, '..', 'skeletons')) 9 | if not os.path.exists(skeleton_dir): 10 | os.makedirs(skeleton_dir) 11 | 12 | solutions = os.listdir(exercise_dir) 13 | 14 | for f in solutions: 15 | if not f.endswith('.py'): 16 | continue 17 | 18 | if f == os.path.basename(__file__): 19 | continue 20 | 21 | print("Generating skeleton for %s" % f) 22 | 23 | input_file = open(os.path.join(exercise_dir, f)) 24 | output_file = open(os.path.join(skeleton_dir, f), 'w') 25 | 26 | in_exercise_region = False 27 | 28 | for line in input_file: 29 | linestrip = line.strip() 30 | if len(linestrip) == 0: 31 | in_exercise_region = False 32 | elif linestrip.startswith('# TASK:'): 33 | in_exercise_region = True 34 | 35 | if not in_exercise_region or linestrip.startswith('#'): 36 | output_file.write(line) 37 | 38 | output_file.close() 39 | -------------------------------------------------------------------------------- /sklearn/neighbors/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.neighbors` module implements the k-nearest neighbors 3 | algorithm. 4 | """ 5 | 6 | from .ball_tree import BallTree 7 | from .kd_tree import KDTree 8 | from .dist_metrics import DistanceMetric 9 | from .graph import kneighbors_graph, radius_neighbors_graph 10 | from .unsupervised import NearestNeighbors 11 | from .classification import KNeighborsClassifier, RadiusNeighborsClassifier 12 | from .regression import KNeighborsRegressor, RadiusNeighborsRegressor 13 | from .nearest_centroid import NearestCentroid 14 | from .kde import KernelDensity 15 | from .lof import LocalOutlierFactor 16 | from .base import VALID_METRICS, VALID_METRICS_SPARSE 17 | 18 | __all__ = ['BallTree', 19 | 'DistanceMetric', 20 | 'KDTree', 21 | 'KNeighborsClassifier', 22 | 'KNeighborsRegressor', 23 | 'NearestCentroid', 24 | 'NearestNeighbors', 25 | 'RadiusNeighborsClassifier', 26 | 'RadiusNeighborsRegressor', 27 | 'kneighbors_graph', 28 | 'radius_neighbors_graph', 29 | 'KernelDensity', 30 | 'LocalOutlierFactor', 31 | 'VALID_METRICS', 32 | 'VALID_METRICS_SPARSE'] 33 | -------------------------------------------------------------------------------- /examples/exercises/plot_digits_classification_exercise.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================ 3 | Digits Classification Exercise 4 | ================================ 5 | 6 | A tutorial exercise regarding the use of classification techniques on 7 | the Digits dataset. 8 | 9 | This exercise is used in the :ref:`clf_tut` part of the 10 | :ref:`supervised_learning_tut` section of the 11 | :ref:`stat_learn_tut_index`. 12 | """ 13 | print(__doc__) 14 | 15 | from sklearn import datasets, neighbors, linear_model 16 | 17 | digits = datasets.load_digits() 18 | X_digits = digits.data / digits.data.max() 19 | y_digits = digits.target 20 | 21 | n_samples = len(X_digits) 22 | 23 | X_train = X_digits[:int(.9 * n_samples)] 24 | y_train = y_digits[:int(.9 * n_samples)] 25 | X_test = X_digits[int(.9 * n_samples):] 26 | y_test = y_digits[int(.9 * n_samples):] 27 | 28 | knn = neighbors.KNeighborsClassifier() 29 | logistic = linear_model.LogisticRegression(solver='lbfgs', max_iter=1000, 30 | multi_class='multinomial') 31 | 32 | print('KNN score: %f' % knn.fit(X_train, y_train).score(X_test, y_test)) 33 | print('LogisticRegression score: %f' 34 | % logistic.fit(X_train, y_train).score(X_test, y_test)) 35 | -------------------------------------------------------------------------------- /examples/feature_selection/plot_feature_selection_pipeline.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================== 3 | Pipeline Anova SVM 4 | ================== 5 | 6 | Simple usage of Pipeline that runs successively a univariate 7 | feature selection with anova and then a C-SVM of the selected features. 8 | """ 9 | from sklearn import svm 10 | from sklearn.datasets import samples_generator 11 | from sklearn.feature_selection import SelectKBest, f_regression 12 | from sklearn.pipeline import make_pipeline 13 | from sklearn.model_selection import train_test_split 14 | from sklearn.metrics import classification_report 15 | 16 | print(__doc__) 17 | 18 | # import some data to play with 19 | X, y = samples_generator.make_classification( 20 | n_features=20, n_informative=3, n_redundant=0, n_classes=4, 21 | n_clusters_per_class=2) 22 | 23 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) 24 | 25 | # ANOVA SVM-C 26 | # 1) anova filter, take 3 best ranked features 27 | anova_filter = SelectKBest(f_regression, k=3) 28 | # 2) svm 29 | clf = svm.SVC(kernel='linear') 30 | 31 | anova_svm = make_pipeline(anova_filter, clf) 32 | anova_svm.fit(X_train, y_train) 33 | y_pred = anova_svm.predict(X_test) 34 | print(classification_report(y_test, y_pred)) 35 | -------------------------------------------------------------------------------- /sklearn/metrics/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | 4 | import numpy 5 | from numpy.distutils.misc_util import Configuration 6 | 7 | from sklearn._build_utils import get_blas_info 8 | 9 | 10 | def configuration(parent_package="", top_path=None): 11 | config = Configuration("metrics", parent_package, top_path) 12 | 13 | cblas_libs, blas_info = get_blas_info() 14 | if os.name == 'posix': 15 | cblas_libs.append('m') 16 | 17 | config.add_subpackage('cluster') 18 | config.add_extension("pairwise_fast", 19 | sources=["pairwise_fast.pyx"], 20 | include_dirs=[os.path.join('..', 'src', 'cblas'), 21 | numpy.get_include(), 22 | blas_info.pop('include_dirs', [])], 23 | libraries=cblas_libs, 24 | extra_compile_args=blas_info.pop('extra_compile_args', 25 | []), 26 | **blas_info) 27 | config.add_subpackage('tests') 28 | 29 | return config 30 | 31 | if __name__ == "__main__": 32 | from numpy.distutils.core import setup 33 | setup(**configuration().todict()) 34 | -------------------------------------------------------------------------------- /examples/linear_model/plot_lasso_lars.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | ===================== 4 | Lasso path using LARS 5 | ===================== 6 | 7 | Computes Lasso Path along the regularization parameter using the LARS 8 | algorithm on the diabetes dataset. Each color represents a different 9 | feature of the coefficient vector, and this is displayed as a function 10 | of the regularization parameter. 11 | 12 | """ 13 | print(__doc__) 14 | 15 | # Author: Fabian Pedregosa 16 | # Alexandre Gramfort 17 | # License: BSD 3 clause 18 | 19 | import numpy as np 20 | import matplotlib.pyplot as plt 21 | 22 | from sklearn import linear_model 23 | from sklearn import datasets 24 | 25 | diabetes = datasets.load_diabetes() 26 | X = diabetes.data 27 | y = diabetes.target 28 | 29 | print("Computing regularization path using the LARS ...") 30 | _, _, coefs = linear_model.lars_path(X, y, method='lasso', verbose=True) 31 | 32 | xx = np.sum(np.abs(coefs.T), axis=1) 33 | xx /= xx[-1] 34 | 35 | plt.plot(xx, coefs.T) 36 | ymin, ymax = plt.ylim() 37 | plt.vlines(xx, ymin, ymax, linestyle='dashed') 38 | plt.xlabel('|coef| / max|coef|') 39 | plt.ylabel('Coefficients') 40 | plt.title('LASSO Path') 41 | plt.axis('tight') 42 | plt.show() 43 | -------------------------------------------------------------------------------- /doc/includes/bigger_toc_css.rst: -------------------------------------------------------------------------------- 1 | .. 2 | File to ..include in a document with a very big table of content, to 3 | give it 'style' 4 | 5 | .. raw:: html 6 | 7 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /examples/svm/plot_svm_nonlinear.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============== 3 | Non-linear SVM 4 | ============== 5 | 6 | Perform binary classification using non-linear SVC 7 | with RBF kernel. The target to predict is a XOR of the 8 | inputs. 9 | 10 | The color map illustrates the decision function learned by the SVC. 11 | """ 12 | print(__doc__) 13 | 14 | import numpy as np 15 | import matplotlib.pyplot as plt 16 | from sklearn import svm 17 | 18 | xx, yy = np.meshgrid(np.linspace(-3, 3, 500), 19 | np.linspace(-3, 3, 500)) 20 | np.random.seed(0) 21 | X = np.random.randn(300, 2) 22 | Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) 23 | 24 | # fit the model 25 | clf = svm.NuSVC() 26 | clf.fit(X, Y) 27 | 28 | # plot the decision function for each datapoint on the grid 29 | Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) 30 | Z = Z.reshape(xx.shape) 31 | 32 | plt.imshow(Z, interpolation='nearest', 33 | extent=(xx.min(), xx.max(), yy.min(), yy.max()), aspect='auto', 34 | origin='lower', cmap=plt.cm.PuOr_r) 35 | contours = plt.contour(xx, yy, Z, levels=[0], linewidths=2, 36 | linetypes='--') 37 | plt.scatter(X[:, 0], X[:, 1], s=30, c=Y, cmap=plt.cm.Paired, 38 | edgecolors='k') 39 | plt.xticks(()) 40 | plt.yticks(()) 41 | plt.axis([-3, 3, -3, 3]) 42 | plt.show() 43 | -------------------------------------------------------------------------------- /PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 5 | 6 | #### Reference Issues/PRs 7 | 13 | 14 | 15 | #### What does this implement/fix? Explain your changes. 16 | 17 | 18 | #### Any other comments? 19 | 20 | 21 | 33 | -------------------------------------------------------------------------------- /sklearn/utils/src/MurmurHash3.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 3 | // domain. The author hereby disclaims copyright to this source code. 4 | 5 | #ifndef _MURMURHASH3_H_ 6 | #define _MURMURHASH3_H_ 7 | 8 | //----------------------------------------------------------------------------- 9 | // Platform-specific functions and macros 10 | 11 | // Microsoft Visual Studio 12 | 13 | #if defined(_MSC_VER) 14 | 15 | typedef unsigned char uint8_t; 16 | typedef unsigned long uint32_t; 17 | typedef unsigned __int64 uint64_t; 18 | 19 | // Other compilers 20 | 21 | #else // defined(_MSC_VER) 22 | 23 | #include 24 | 25 | #endif // !defined(_MSC_VER) 26 | 27 | //----------------------------------------------------------------------------- 28 | #ifdef __cplusplus 29 | extern "C" { 30 | #endif 31 | 32 | 33 | void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ); 34 | 35 | void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out ); 36 | 37 | void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); 38 | 39 | #ifdef __cplusplus 40 | } 41 | #endif 42 | 43 | //----------------------------------------------------------------------------- 44 | 45 | #endif // _MURMURHASH3_H_ 46 | -------------------------------------------------------------------------------- /sklearn/decomposition/cdnmf_fast.pyx: -------------------------------------------------------------------------------- 1 | # cython: cdivision=True 2 | # cython: boundscheck=False 3 | # cython: wraparound=False 4 | 5 | # Author: Mathieu Blondel, Tom Dupre la Tour 6 | # License: BSD 3 clause 7 | 8 | cimport cython 9 | from libc.math cimport fabs 10 | 11 | 12 | def _update_cdnmf_fast(double[:, ::1] W, double[:, :] HHt, double[:, :] XHt, 13 | Py_ssize_t[::1] permutation): 14 | cdef double violation = 0 15 | cdef Py_ssize_t n_components = W.shape[1] 16 | cdef Py_ssize_t n_samples = W.shape[0] # n_features for H update 17 | cdef double grad, pg, hess 18 | cdef Py_ssize_t i, r, s, t 19 | 20 | with nogil: 21 | for s in range(n_components): 22 | t = permutation[s] 23 | 24 | for i in range(n_samples): 25 | # gradient = GW[t, i] where GW = np.dot(W, HHt) - XHt 26 | grad = -XHt[i, t] 27 | 28 | for r in range(n_components): 29 | grad += HHt[t, r] * W[i, r] 30 | 31 | # projected gradient 32 | pg = min(0., grad) if W[i, t] == 0 else grad 33 | violation += fabs(pg) 34 | 35 | # Hessian 36 | hess = HHt[t, t] 37 | 38 | if hess != 0: 39 | W[i, t] = max(W[i, t] - grad / hess, 0.) 40 | 41 | return violation 42 | -------------------------------------------------------------------------------- /sklearn/datasets/descr/diabetes.rst: -------------------------------------------------------------------------------- 1 | .. _diabetes_dataset: 2 | 3 | Diabetes dataset 4 | ---------------- 5 | 6 | Ten baseline variables, age, sex, body mass index, average blood 7 | pressure, and six blood serum measurements were obtained for each of n = 8 | 442 diabetes patients, as well as the response of interest, a 9 | quantitative measure of disease progression one year after baseline. 10 | 11 | **Data Set Characteristics:** 12 | 13 | :Number of Instances: 442 14 | 15 | :Number of Attributes: First 10 columns are numeric predictive values 16 | 17 | :Target: Column 11 is a quantitative measure of disease progression one year after baseline 18 | 19 | :Attribute Information: 20 | - Age 21 | - Sex 22 | - Body mass index 23 | - Average blood pressure 24 | - S1 25 | - S2 26 | - S3 27 | - S4 28 | - S5 29 | - S6 30 | 31 | Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1). 32 | 33 | Source URL: 34 | https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html 35 | 36 | For more information see: 37 | Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) "Least Angle Regression," Annals of Statistics (with discussion), 407-499. 38 | (https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf) -------------------------------------------------------------------------------- /doc/tutorial/statistical_inference/finding_help.rst: -------------------------------------------------------------------------------- 1 | Finding help 2 | ============ 3 | 4 | 5 | The project mailing list 6 | ------------------------ 7 | 8 | If you encounter a bug with ``scikit-learn`` or something that needs 9 | clarification in the docstring or the online documentation, please feel free to 10 | ask on the `Mailing List `_ 11 | 12 | 13 | Q&A communities with Machine Learning practitioners 14 | ---------------------------------------------------- 15 | 16 | :Quora.com: 17 | 18 | Quora has a topic for Machine Learning related questions that 19 | also features some interesting discussions: 20 | https://www.quora.com/topic/Machine-Learning 21 | 22 | :Stack Exchange: 23 | 24 | The Stack Exchange family of sites hosts `multiple subdomains for Machine Learning questions`_. 25 | 26 | .. _`How do I learn machine learning?`: https://www.quora.com/How-do-I-learn-machine-learning-1 27 | 28 | .. _`multiple subdomains for Machine Learning questions`: https://meta.stackexchange.com/q/130524 29 | 30 | -- _'An excellent free online course for Machine Learning taught by Professor Andrew Ng of Stanford': https://www.coursera.org/learn/machine-learning 31 | 32 | -- _'Another excellent free online course that takes a more general approach to Artificial Intelligence': https://www.udacity.com/course/intro-to-artificial-intelligence--cs271 33 | -------------------------------------------------------------------------------- /sklearn/externals/joblib/externals/loky/backend/fork_exec.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Launch a subprocess using forkexec and make sure only the needed fd are 3 | # shared in the two process. 4 | # 5 | # author: Thomas Moreau and Olivier Grisel 6 | # 7 | import os 8 | import sys 9 | 10 | if sys.platform == "darwin" and sys.version_info < (3, 3): 11 | FileNotFoundError = OSError 12 | 13 | 14 | def close_fds(keep_fds): # pragma: no cover 15 | """Close all the file descriptors except those in keep_fds.""" 16 | 17 | # Make sure to keep stdout and stderr open for logging purpose 18 | keep_fds = set(keep_fds).union([1, 2]) 19 | 20 | # We try to retrieve all the open fds 21 | try: 22 | open_fds = set(int(fd) for fd in os.listdir('/proc/self/fd')) 23 | except FileNotFoundError: 24 | import resource 25 | max_nfds = resource.getrlimit(resource.RLIMIT_NOFILE)[0] 26 | open_fds = set(fd for fd in range(3, max_nfds)) 27 | open_fds.add(0) 28 | 29 | for i in open_fds - keep_fds: 30 | try: 31 | os.close(i) 32 | except OSError: 33 | pass 34 | 35 | 36 | def fork_exec(cmd, keep_fds): 37 | 38 | pid = os.fork() 39 | if pid == 0: # pragma: no cover 40 | close_fds(keep_fds) 41 | os.execv(sys.executable, cmd) 42 | else: 43 | return pid 44 | -------------------------------------------------------------------------------- /examples/exercises/plot_cv_digits.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================================= 3 | Cross-validation on Digits Dataset Exercise 4 | ============================================= 5 | 6 | A tutorial exercise using Cross-validation with an SVM on the Digits dataset. 7 | 8 | This exercise is used in the :ref:`cv_generators_tut` part of the 9 | :ref:`model_selection_tut` section of the :ref:`stat_learn_tut_index`. 10 | """ 11 | print(__doc__) 12 | 13 | 14 | import numpy as np 15 | from sklearn.model_selection import cross_val_score 16 | from sklearn import datasets, svm 17 | 18 | digits = datasets.load_digits() 19 | X = digits.data 20 | y = digits.target 21 | 22 | svc = svm.SVC(kernel='linear') 23 | C_s = np.logspace(-10, 0, 10) 24 | 25 | scores = list() 26 | scores_std = list() 27 | for C in C_s: 28 | svc.C = C 29 | this_scores = cross_val_score(svc, X, y, cv=5, n_jobs=1) 30 | scores.append(np.mean(this_scores)) 31 | scores_std.append(np.std(this_scores)) 32 | 33 | # Do the plotting 34 | import matplotlib.pyplot as plt 35 | plt.figure(1, figsize=(4, 3)) 36 | plt.clf() 37 | plt.semilogx(C_s, scores) 38 | plt.semilogx(C_s, np.array(scores) + np.array(scores_std), 'b--') 39 | plt.semilogx(C_s, np.array(scores) - np.array(scores_std), 'b--') 40 | locs, labels = plt.yticks() 41 | plt.yticks(locs, list(map(lambda x: "%g" % x, locs))) 42 | plt.ylabel('CV score') 43 | plt.xlabel('Parameter C') 44 | plt.ylim(0, 1.1) 45 | plt.show() 46 | -------------------------------------------------------------------------------- /examples/linear_model/plot_sgd_loss_functions.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================== 3 | SGD: convex loss functions 4 | ========================== 5 | 6 | A plot that compares the various convex loss functions supported by 7 | :class:`sklearn.linear_model.SGDClassifier` . 8 | """ 9 | print(__doc__) 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | def modified_huber_loss(y_true, y_pred): 16 | z = y_pred * y_true 17 | loss = -4 * z 18 | loss[z >= -1] = (1 - z[z >= -1]) ** 2 19 | loss[z >= 1.] = 0 20 | return loss 21 | 22 | 23 | xmin, xmax = -4, 4 24 | xx = np.linspace(xmin, xmax, 100) 25 | lw = 2 26 | plt.plot([xmin, 0, 0, xmax], [1, 1, 0, 0], color='gold', lw=lw, 27 | label="Zero-one loss") 28 | plt.plot(xx, np.where(xx < 1, 1 - xx, 0), color='teal', lw=lw, 29 | label="Hinge loss") 30 | plt.plot(xx, -np.minimum(xx, 0), color='yellowgreen', lw=lw, 31 | label="Perceptron loss") 32 | plt.plot(xx, np.log2(1 + np.exp(-xx)), color='cornflowerblue', lw=lw, 33 | label="Log loss") 34 | plt.plot(xx, np.where(xx < 1, 1 - xx, 0) ** 2, color='orange', lw=lw, 35 | label="Squared hinge loss") 36 | plt.plot(xx, modified_huber_loss(xx, 1), color='darkorchid', lw=lw, 37 | linestyle='--', label="Modified Huber loss") 38 | plt.ylim((0, 8)) 39 | plt.legend(loc="upper right") 40 | plt.xlabel(r"Decision function $f(x)$") 41 | plt.ylabel("$L(y=1, f(x))$") 42 | plt.show() 43 | -------------------------------------------------------------------------------- /sklearn/manifold/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import join 3 | 4 | import numpy 5 | from numpy.distutils.misc_util import Configuration 6 | from sklearn._build_utils import get_blas_info 7 | 8 | 9 | def configuration(parent_package="", top_path=None): 10 | config = Configuration("manifold", parent_package, top_path) 11 | libraries = [] 12 | if os.name == 'posix': 13 | libraries.append('m') 14 | config.add_extension("_utils", 15 | sources=["_utils.pyx"], 16 | include_dirs=[numpy.get_include()], 17 | libraries=libraries, 18 | extra_compile_args=["-O3"]) 19 | cblas_libs, blas_info = get_blas_info() 20 | eca = blas_info.pop('extra_compile_args', []) 21 | eca.append("-O4") 22 | config.add_extension("_barnes_hut_tsne", 23 | libraries=cblas_libs, 24 | sources=["_barnes_hut_tsne.pyx"], 25 | include_dirs=[join('..', 'src', 'cblas'), 26 | numpy.get_include(), 27 | blas_info.pop('include_dirs', [])], 28 | extra_compile_args=eca, **blas_info) 29 | 30 | config.add_subpackage('tests') 31 | 32 | return config 33 | 34 | 35 | if __name__ == "__main__": 36 | from numpy.distutils.core import setup 37 | setup(**configuration().todict()) 38 | -------------------------------------------------------------------------------- /doc/data_transforms.rst: -------------------------------------------------------------------------------- 1 | .. include:: includes/big_toc_css.rst 2 | 3 | .. _data-transforms: 4 | 5 | Dataset transformations 6 | ----------------------- 7 | 8 | scikit-learn provides a library of transformers, which may clean (see 9 | :ref:`preprocessing`), reduce (see :ref:`data_reduction`), expand (see 10 | :ref:`kernel_approximation`) or generate (see :ref:`feature_extraction`) 11 | feature representations. 12 | 13 | Like other estimators, these are represented by classes with a ``fit`` method, 14 | which learns model parameters (e.g. mean and standard deviation for 15 | normalization) from a training set, and a ``transform`` method which applies 16 | this transformation model to unseen data. ``fit_transform`` may be more 17 | convenient and efficient for modelling and transforming the training data 18 | simultaneously. 19 | 20 | Combining such transformers, either in parallel or series is covered in 21 | :ref:`combining_estimators`. :ref:`metrics` covers transforming feature 22 | spaces into affinity matrices, while :ref:`preprocessing_targets` considers 23 | transformations of the target space (e.g. categorical labels) for use in 24 | scikit-learn. 25 | 26 | .. toctree:: 27 | 28 | modules/compose 29 | modules/feature_extraction 30 | modules/preprocessing 31 | modules/impute 32 | modules/unsupervised_reduction 33 | modules/random_projection 34 | modules/kernel_approximation 35 | modules/metrics 36 | modules/preprocessing_targets 37 | -------------------------------------------------------------------------------- /benchmarks/bench_plot_ward.py: -------------------------------------------------------------------------------- 1 | """ 2 | Benchmark scikit-learn's Ward implement compared to SciPy's 3 | """ 4 | 5 | import time 6 | 7 | import numpy as np 8 | from scipy.cluster import hierarchy 9 | import matplotlib.pyplot as plt 10 | 11 | from sklearn.cluster import AgglomerativeClustering 12 | 13 | ward = AgglomerativeClustering(n_clusters=3, linkage='ward') 14 | 15 | n_samples = np.logspace(.5, 3, 9) 16 | n_features = np.logspace(1, 3.5, 7) 17 | N_samples, N_features = np.meshgrid(n_samples, 18 | n_features) 19 | scikits_time = np.zeros(N_samples.shape) 20 | scipy_time = np.zeros(N_samples.shape) 21 | 22 | for i, n in enumerate(n_samples): 23 | for j, p in enumerate(n_features): 24 | X = np.random.normal(size=(n, p)) 25 | t0 = time.time() 26 | ward.fit(X) 27 | scikits_time[j, i] = time.time() - t0 28 | t0 = time.time() 29 | hierarchy.ward(X) 30 | scipy_time[j, i] = time.time() - t0 31 | 32 | ratio = scikits_time / scipy_time 33 | 34 | plt.figure("scikit-learn Ward's method benchmark results") 35 | plt.imshow(np.log(ratio), aspect='auto', origin="lower") 36 | plt.colorbar() 37 | plt.contour(ratio, levels=[1, ], colors='k') 38 | plt.yticks(range(len(n_features)), n_features.astype(np.int)) 39 | plt.ylabel('N features') 40 | plt.xticks(range(len(n_samples)), n_samples.astype(np.int)) 41 | plt.xlabel('N samples') 42 | plt.title("Scikit's time, in units of scipy time (log)") 43 | plt.show() 44 | -------------------------------------------------------------------------------- /sklearn/covariance/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.covariance` module includes methods and algorithms to 3 | robustly estimate the covariance of features given a set of points. The 4 | precision matrix defined as the inverse of the covariance is also estimated. 5 | Covariance estimation is closely related to the theory of Gaussian Graphical 6 | Models. 7 | """ 8 | 9 | from .empirical_covariance_ import empirical_covariance, EmpiricalCovariance, \ 10 | log_likelihood 11 | from .shrunk_covariance_ import shrunk_covariance, ShrunkCovariance, \ 12 | ledoit_wolf, ledoit_wolf_shrinkage, \ 13 | LedoitWolf, oas, OAS 14 | from .robust_covariance import fast_mcd, MinCovDet 15 | from .graph_lasso_ import graph_lasso, GraphLasso, GraphLassoCV,\ 16 | graphical_lasso, GraphicalLasso, GraphicalLassoCV 17 | from .elliptic_envelope import EllipticEnvelope 18 | 19 | 20 | __all__ = ['EllipticEnvelope', 21 | 'EmpiricalCovariance', 22 | 'GraphLasso', 23 | 'GraphLassoCV', 24 | 'GraphicalLasso', 25 | 'GraphicalLassoCV', 26 | 'LedoitWolf', 27 | 'MinCovDet', 28 | 'OAS', 29 | 'ShrunkCovariance', 30 | 'empirical_covariance', 31 | 'fast_mcd', 32 | 'graph_lasso', 33 | 'graphical_lasso', 34 | 'ledoit_wolf', 35 | 'ledoit_wolf_shrinkage', 36 | 'log_likelihood', 37 | 'oas', 38 | 'shrunk_covariance'] 39 | -------------------------------------------------------------------------------- /examples/linear_model/plot_sgd_separating_hyperplane.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================= 3 | SGD: Maximum margin separating hyperplane 4 | ========================================= 5 | 6 | Plot the maximum margin separating hyperplane within a two-class 7 | separable dataset using a linear Support Vector Machines classifier 8 | trained using SGD. 9 | """ 10 | print(__doc__) 11 | 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | from sklearn.linear_model import SGDClassifier 15 | from sklearn.datasets.samples_generator import make_blobs 16 | 17 | # we create 50 separable points 18 | X, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60) 19 | 20 | # fit the model 21 | clf = SGDClassifier(loss="hinge", alpha=0.01, max_iter=200, fit_intercept=True) 22 | clf.fit(X, Y) 23 | 24 | # plot the line, the points, and the nearest vectors to the plane 25 | xx = np.linspace(-1, 5, 10) 26 | yy = np.linspace(-1, 5, 10) 27 | 28 | X1, X2 = np.meshgrid(xx, yy) 29 | Z = np.empty(X1.shape) 30 | for (i, j), val in np.ndenumerate(X1): 31 | x1 = val 32 | x2 = X2[i, j] 33 | p = clf.decision_function([[x1, x2]]) 34 | Z[i, j] = p[0] 35 | levels = [-1.0, 0.0, 1.0] 36 | linestyles = ['dashed', 'solid', 'dashed'] 37 | colors = 'k' 38 | plt.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles) 39 | plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired, 40 | edgecolor='black', s=20) 41 | 42 | plt.axis('tight') 43 | plt.show() 44 | -------------------------------------------------------------------------------- /sklearn/cluster/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.cluster` module gathers popular unsupervised clustering 3 | algorithms. 4 | """ 5 | 6 | from .spectral import spectral_clustering, SpectralClustering 7 | from .mean_shift_ import (mean_shift, MeanShift, 8 | estimate_bandwidth, get_bin_seeds) 9 | from .affinity_propagation_ import affinity_propagation, AffinityPropagation 10 | from .hierarchical import (ward_tree, AgglomerativeClustering, linkage_tree, 11 | FeatureAgglomeration) 12 | from .k_means_ import k_means, KMeans, MiniBatchKMeans 13 | from .dbscan_ import dbscan, DBSCAN 14 | from .optics_ import OPTICS, optics 15 | from .bicluster import SpectralBiclustering, SpectralCoclustering 16 | from .birch import Birch 17 | 18 | __all__ = ['AffinityPropagation', 19 | 'AgglomerativeClustering', 20 | 'Birch', 21 | 'DBSCAN', 22 | 'OPTICS', 23 | 'KMeans', 24 | 'FeatureAgglomeration', 25 | 'MeanShift', 26 | 'MiniBatchKMeans', 27 | 'SpectralClustering', 28 | 'affinity_propagation', 29 | 'dbscan', 30 | 'estimate_bandwidth', 31 | 'get_bin_seeds', 32 | 'k_means', 33 | 'linkage_tree', 34 | 'mean_shift', 35 | 'optics', 36 | 'spectral_clustering', 37 | 'ward_tree', 38 | 'SpectralBiclustering', 39 | 'SpectralCoclustering'] 40 | -------------------------------------------------------------------------------- /sklearn/svm/liblinear.pxd: -------------------------------------------------------------------------------- 1 | cimport numpy as np 2 | 3 | 4 | cdef extern from "src/liblinear/linear.h": 5 | cdef struct feature_node 6 | cdef struct problem 7 | cdef struct model 8 | cdef struct parameter 9 | ctypedef problem* problem_const_ptr "problem const *" 10 | ctypedef parameter* parameter_const_ptr "parameter const *" 11 | ctypedef char* char_const_ptr "char const *" 12 | char_const_ptr check_parameter(problem_const_ptr prob, parameter_const_ptr param) 13 | model *train(problem_const_ptr prob, parameter_const_ptr param) nogil 14 | int get_nr_feature (model *model) 15 | int get_nr_class (model *model) 16 | void get_n_iter (model *model, int *n_iter) 17 | void free_and_destroy_model (model **) 18 | void destroy_param (parameter *) 19 | 20 | cdef extern from "src/liblinear/liblinear_helper.c": 21 | void copy_w(void *, model *, int) 22 | parameter *set_parameter(int, double, double, int, char *, char *, int, int, double) 23 | problem *set_problem (char *, char *, np.npy_intp *, double, char *) 24 | problem *csr_set_problem (char *values, np.npy_intp *n_indices, 25 | char *indices, np.npy_intp *n_indptr, char *indptr, char *Y, 26 | np.npy_intp n_features, double bias, char *) 27 | 28 | model *set_model(parameter *, char *, np.npy_intp *, char *, double) 29 | 30 | double get_bias(model *) 31 | void free_problem (problem *) 32 | void free_parameter (parameter *) 33 | void set_verbosity(int) 34 | -------------------------------------------------------------------------------- /benchmarks/bench_plot_parallel_pairwise.py: -------------------------------------------------------------------------------- 1 | # Author: Mathieu Blondel 2 | # License: BSD 3 clause 3 | import time 4 | 5 | import matplotlib.pyplot as plt 6 | 7 | from sklearn.utils import check_random_state 8 | from sklearn.metrics.pairwise import pairwise_distances 9 | from sklearn.metrics.pairwise import pairwise_kernels 10 | 11 | def plot(func): 12 | random_state = check_random_state(0) 13 | one_core = [] 14 | multi_core = [] 15 | sample_sizes = range(1000, 6000, 1000) 16 | 17 | for n_samples in sample_sizes: 18 | X = random_state.rand(n_samples, 300) 19 | 20 | start = time.time() 21 | func(X, n_jobs=1) 22 | one_core.append(time.time() - start) 23 | 24 | start = time.time() 25 | func(X, n_jobs=-1) 26 | multi_core.append(time.time() - start) 27 | 28 | plt.figure('scikit-learn parallel %s benchmark results' % func.__name__) 29 | plt.plot(sample_sizes, one_core, label="one core") 30 | plt.plot(sample_sizes, multi_core, label="multi core") 31 | plt.xlabel('n_samples') 32 | plt.ylabel('Time (s)') 33 | plt.title('Parallel %s' % func.__name__) 34 | plt.legend() 35 | 36 | 37 | def euclidean_distances(X, n_jobs): 38 | return pairwise_distances(X, metric="euclidean", n_jobs=n_jobs) 39 | 40 | 41 | def rbf_kernels(X, n_jobs): 42 | return pairwise_kernels(X, metric="rbf", n_jobs=n_jobs, gamma=0.1) 43 | 44 | plot(euclidean_distances) 45 | plot(rbf_kernels) 46 | plt.show() 47 | -------------------------------------------------------------------------------- /sklearn/feature_selection/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.feature_selection` module implements feature selection 3 | algorithms. It currently includes univariate filter selection methods and the 4 | recursive feature elimination algorithm. 5 | """ 6 | 7 | from .univariate_selection import chi2 8 | from .univariate_selection import f_classif 9 | from .univariate_selection import f_oneway 10 | from .univariate_selection import f_regression 11 | from .univariate_selection import SelectPercentile 12 | from .univariate_selection import SelectKBest 13 | from .univariate_selection import SelectFpr 14 | from .univariate_selection import SelectFdr 15 | from .univariate_selection import SelectFwe 16 | from .univariate_selection import GenericUnivariateSelect 17 | 18 | from .variance_threshold import VarianceThreshold 19 | 20 | from .rfe import RFE 21 | from .rfe import RFECV 22 | 23 | from .from_model import SelectFromModel 24 | 25 | from .mutual_info_ import mutual_info_regression, mutual_info_classif 26 | 27 | 28 | __all__ = ['GenericUnivariateSelect', 29 | 'RFE', 30 | 'RFECV', 31 | 'SelectFdr', 32 | 'SelectFpr', 33 | 'SelectFwe', 34 | 'SelectKBest', 35 | 'SelectFromModel', 36 | 'SelectPercentile', 37 | 'VarianceThreshold', 38 | 'chi2', 39 | 'f_classif', 40 | 'f_oneway', 41 | 'f_regression', 42 | 'mutual_info_classif', 43 | 'mutual_info_regression'] 44 | -------------------------------------------------------------------------------- /examples/manifold/plot_swissroll.py: -------------------------------------------------------------------------------- 1 | """ 2 | =================================== 3 | Swiss Roll reduction with LLE 4 | =================================== 5 | 6 | An illustration of Swiss Roll reduction 7 | with locally linear embedding 8 | """ 9 | 10 | # Author: Fabian Pedregosa -- 11 | # License: BSD 3 clause (C) INRIA 2011 12 | 13 | print(__doc__) 14 | 15 | import matplotlib.pyplot as plt 16 | 17 | # This import is needed to modify the way figure behaves 18 | from mpl_toolkits.mplot3d import Axes3D 19 | Axes3D 20 | 21 | #---------------------------------------------------------------------- 22 | # Locally linear embedding of the swiss roll 23 | 24 | from sklearn import manifold, datasets 25 | X, color = datasets.samples_generator.make_swiss_roll(n_samples=1500) 26 | 27 | print("Computing LLE embedding") 28 | X_r, err = manifold.locally_linear_embedding(X, n_neighbors=12, 29 | n_components=2) 30 | print("Done. Reconstruction error: %g" % err) 31 | 32 | #---------------------------------------------------------------------- 33 | # Plot result 34 | 35 | fig = plt.figure() 36 | 37 | ax = fig.add_subplot(211, projection='3d') 38 | ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral) 39 | 40 | ax.set_title("Original data") 41 | ax = fig.add_subplot(212) 42 | ax.scatter(X_r[:, 0], X_r[:, 1], c=color, cmap=plt.cm.Spectral) 43 | plt.axis('tight') 44 | plt.xticks([]), plt.yticks([]) 45 | plt.title('Projected data') 46 | plt.show() 47 | -------------------------------------------------------------------------------- /examples/svm/plot_separating_hyperplane.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================= 3 | SVM: Maximum margin separating hyperplane 4 | ========================================= 5 | 6 | Plot the maximum margin separating hyperplane within a two-class 7 | separable dataset using a Support Vector Machine classifier with 8 | linear kernel. 9 | """ 10 | print(__doc__) 11 | 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | from sklearn import svm 15 | from sklearn.datasets import make_blobs 16 | 17 | 18 | # we create 40 separable points 19 | X, y = make_blobs(n_samples=40, centers=2, random_state=6) 20 | 21 | # fit the model, don't regularize for illustration purposes 22 | clf = svm.SVC(kernel='linear', C=1000) 23 | clf.fit(X, y) 24 | 25 | plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired) 26 | 27 | # plot the decision function 28 | ax = plt.gca() 29 | xlim = ax.get_xlim() 30 | ylim = ax.get_ylim() 31 | 32 | # create grid to evaluate model 33 | xx = np.linspace(xlim[0], xlim[1], 30) 34 | yy = np.linspace(ylim[0], ylim[1], 30) 35 | YY, XX = np.meshgrid(yy, xx) 36 | xy = np.vstack([XX.ravel(), YY.ravel()]).T 37 | Z = clf.decision_function(xy).reshape(XX.shape) 38 | 39 | # plot decision boundary and margins 40 | ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, 41 | linestyles=['--', '-', '--']) 42 | # plot support vectors 43 | ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100, 44 | linewidth=1, facecolors='none', edgecolors='k') 45 | plt.show() 46 | -------------------------------------------------------------------------------- /sklearn/ensemble/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.ensemble` module includes ensemble-based methods for 3 | classification, regression and anomaly detection. 4 | """ 5 | 6 | from .base import BaseEnsemble 7 | from .forest import RandomForestClassifier 8 | from .forest import RandomForestRegressor 9 | from .forest import RandomTreesEmbedding 10 | from .forest import ExtraTreesClassifier 11 | from .forest import ExtraTreesRegressor 12 | from .bagging import BaggingClassifier 13 | from .bagging import BaggingRegressor 14 | from .iforest import IsolationForest 15 | from .weight_boosting import AdaBoostClassifier 16 | from .weight_boosting import AdaBoostRegressor 17 | from .gradient_boosting import GradientBoostingClassifier 18 | from .gradient_boosting import GradientBoostingRegressor 19 | from .voting_classifier import VotingClassifier 20 | 21 | from . import bagging 22 | from . import forest 23 | from . import weight_boosting 24 | from . import gradient_boosting 25 | from . import partial_dependence 26 | 27 | __all__ = ["BaseEnsemble", 28 | "RandomForestClassifier", "RandomForestRegressor", 29 | "RandomTreesEmbedding", "ExtraTreesClassifier", 30 | "ExtraTreesRegressor", "BaggingClassifier", 31 | "BaggingRegressor", "IsolationForest", "GradientBoostingClassifier", 32 | "GradientBoostingRegressor", "AdaBoostClassifier", 33 | "AdaBoostRegressor", "VotingClassifier", 34 | "bagging", "forest", "gradient_boosting", 35 | "partial_dependence", "weight_boosting"] 36 | -------------------------------------------------------------------------------- /sklearn/datasets/descr/california_housing.rst: -------------------------------------------------------------------------------- 1 | .. _california_housing_dataset: 2 | 3 | California Housing dataset 4 | -------------------------- 5 | 6 | **Data Set Characteristics:** 7 | 8 | :Number of Instances: 20640 9 | 10 | :Number of Attributes: 8 numeric, predictive attributes and the target 11 | 12 | :Attribute Information: 13 | - MedInc median income in block 14 | - HouseAge median house age in block 15 | - AveRooms average number of rooms 16 | - AveBedrms average number of bedrooms 17 | - Population block population 18 | - AveOccup average house occupancy 19 | - Latitude house block latitude 20 | - Longitude house block longitude 21 | 22 | :Missing Attribute Values: None 23 | 24 | This dataset was obtained from the StatLib repository. 25 | http://lib.stat.cmu.edu/datasets/ 26 | 27 | The target variable is the median house value for California districts. 28 | 29 | This dataset was derived from the 1990 U.S. census, using one row per census 30 | block group. A block group is the smallest geographical unit for which the U.S. 31 | Census Bureau publishes sample data (a block group typically has a population 32 | of 600 to 3,000 people). 33 | 34 | It can be downloaded/loaded using the 35 | :func:`sklearn.datasets.fetch_california_housing` function. 36 | 37 | .. topic:: References 38 | 39 | - Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions, 40 | Statistics and Probability Letters, 33 (1997) 291-297 41 | -------------------------------------------------------------------------------- /doc/tutorial/statistical_inference/index.rst: -------------------------------------------------------------------------------- 1 | .. _stat_learn_tut_index: 2 | 3 | ========================================================================== 4 | A tutorial on statistical-learning for scientific data processing 5 | ========================================================================== 6 | 7 | .. topic:: Statistical learning 8 | 9 | `Machine learning `_ is 10 | a technique with a growing importance, as the 11 | size of the datasets experimental sciences are facing is rapidly 12 | growing. Problems it tackles range from building a prediction function 13 | linking different observations, to classifying observations, or 14 | learning the structure in an unlabeled dataset. 15 | 16 | This tutorial will explore *statistical learning*, the use of 17 | machine learning techniques with the goal of `statistical inference 18 | `_: 19 | drawing conclusions on the data at hand. 20 | 21 | Scikit-learn is a Python module integrating classic machine 22 | learning algorithms in the tightly-knit world of scientific Python 23 | packages (`NumPy `_, `SciPy 24 | `_, `matplotlib 25 | `_). 26 | 27 | .. include:: ../../includes/big_toc_css.rst 28 | 29 | .. toctree:: 30 | :maxdepth: 2 31 | 32 | settings 33 | supervised_learning 34 | model_selection 35 | unsupervised_learning 36 | putting_together 37 | finding_help 38 | -------------------------------------------------------------------------------- /sklearn/decomposition/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.decomposition` module includes matrix decomposition 3 | algorithms, including among others PCA, NMF or ICA. Most of the algorithms of 4 | this module can be regarded as dimensionality reduction techniques. 5 | """ 6 | 7 | from .nmf import NMF, non_negative_factorization 8 | from .pca import PCA 9 | from .incremental_pca import IncrementalPCA 10 | from .kernel_pca import KernelPCA 11 | from .sparse_pca import SparsePCA, MiniBatchSparsePCA 12 | from .truncated_svd import TruncatedSVD 13 | from .fastica_ import FastICA, fastica 14 | from .dict_learning import (dict_learning, dict_learning_online, sparse_encode, 15 | DictionaryLearning, MiniBatchDictionaryLearning, 16 | SparseCoder) 17 | from .factor_analysis import FactorAnalysis 18 | from ..utils.extmath import randomized_svd 19 | from .online_lda import LatentDirichletAllocation 20 | 21 | __all__ = ['DictionaryLearning', 22 | 'FastICA', 23 | 'IncrementalPCA', 24 | 'KernelPCA', 25 | 'MiniBatchDictionaryLearning', 26 | 'MiniBatchSparsePCA', 27 | 'NMF', 28 | 'PCA', 29 | 'SparseCoder', 30 | 'SparsePCA', 31 | 'dict_learning', 32 | 'dict_learning_online', 33 | 'fastica', 34 | 'non_negative_factorization', 35 | 'randomized_svd', 36 | 'sparse_encode', 37 | 'FactorAnalysis', 38 | 'TruncatedSVD', 39 | 'LatentDirichletAllocation'] 40 | -------------------------------------------------------------------------------- /examples/feature_selection/plot_rfe_with_cross_validation.py: -------------------------------------------------------------------------------- 1 | """ 2 | =================================================== 3 | Recursive feature elimination with cross-validation 4 | =================================================== 5 | 6 | A recursive feature elimination example with automatic tuning of the 7 | number of features selected with cross-validation. 8 | """ 9 | print(__doc__) 10 | 11 | import matplotlib.pyplot as plt 12 | from sklearn.svm import SVC 13 | from sklearn.model_selection import StratifiedKFold 14 | from sklearn.feature_selection import RFECV 15 | from sklearn.datasets import make_classification 16 | 17 | # Build a classification task using 3 informative features 18 | X, y = make_classification(n_samples=1000, n_features=25, n_informative=3, 19 | n_redundant=2, n_repeated=0, n_classes=8, 20 | n_clusters_per_class=1, random_state=0) 21 | 22 | # Create the RFE object and compute a cross-validated score. 23 | svc = SVC(kernel="linear") 24 | # The "accuracy" scoring is proportional to the number of correct 25 | # classifications 26 | rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(2), 27 | scoring='accuracy') 28 | rfecv.fit(X, y) 29 | 30 | print("Optimal number of features : %d" % rfecv.n_features_) 31 | 32 | # Plot number of features VS. cross-validation scores 33 | plt.figure() 34 | plt.xlabel("Number of features selected") 35 | plt.ylabel("Cross validation score (nb of correct classifications)") 36 | plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_) 37 | plt.show() 38 | -------------------------------------------------------------------------------- /sklearn/tree/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy 4 | from numpy.distutils.misc_util import Configuration 5 | 6 | 7 | def configuration(parent_package="", top_path=None): 8 | config = Configuration("tree", parent_package, top_path) 9 | libraries = [] 10 | if os.name == 'posix': 11 | libraries.append('m') 12 | config.add_extension("_tree", 13 | sources=["_tree.pyx"], 14 | include_dirs=[numpy.get_include()], 15 | libraries=libraries, 16 | extra_compile_args=["-O3"]) 17 | config.add_extension("_splitter", 18 | sources=["_splitter.pyx"], 19 | include_dirs=[numpy.get_include()], 20 | libraries=libraries, 21 | extra_compile_args=["-O3"]) 22 | config.add_extension("_criterion", 23 | sources=["_criterion.pyx"], 24 | include_dirs=[numpy.get_include()], 25 | libraries=libraries, 26 | extra_compile_args=["-O3"]) 27 | config.add_extension("_utils", 28 | sources=["_utils.pyx"], 29 | include_dirs=[numpy.get_include()], 30 | libraries=libraries, 31 | extra_compile_args=["-O3"]) 32 | 33 | config.add_subpackage("tests") 34 | 35 | return config 36 | 37 | if __name__ == "__main__": 38 | from numpy.distutils.core import setup 39 | setup(**configuration().todict()) 40 | -------------------------------------------------------------------------------- /examples/linear_model/plot_sgd_penalties.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============== 3 | SGD: Penalties 4 | ============== 5 | 6 | Contours of where the penalty is equal to 1 7 | for the three penalties L1, L2 and elastic-net. 8 | 9 | All of the above are supported by 10 | :class:`sklearn.linear_model.stochastic_gradient`. 11 | 12 | """ 13 | print(__doc__) 14 | 15 | import numpy as np 16 | import matplotlib.pyplot as plt 17 | 18 | l1_color = "navy" 19 | l2_color = "c" 20 | elastic_net_color = "darkorange" 21 | 22 | line = np.linspace(-1.5, 1.5, 1001) 23 | xx, yy = np.meshgrid(line, line) 24 | 25 | l2 = xx ** 2 + yy ** 2 26 | l1 = np.abs(xx) + np.abs(yy) 27 | rho = 0.5 28 | elastic_net = rho * l1 + (1 - rho) * l2 29 | 30 | plt.figure(figsize=(10, 10), dpi=100) 31 | ax = plt.gca() 32 | 33 | elastic_net_contour = plt.contour(xx, yy, elastic_net, levels=[1], 34 | colors=elastic_net_color) 35 | l2_contour = plt.contour(xx, yy, l2, levels=[1], colors=l2_color) 36 | l1_contour = plt.contour(xx, yy, l1, levels=[1], colors=l1_color) 37 | ax.set_aspect("equal") 38 | ax.spines['left'].set_position('center') 39 | ax.spines['right'].set_color('none') 40 | ax.spines['bottom'].set_position('center') 41 | ax.spines['top'].set_color('none') 42 | 43 | plt.clabel(elastic_net_contour, inline=1, fontsize=18, 44 | fmt={1.0: 'elastic-net'}, manual=[(-1, -1)]) 45 | plt.clabel(l2_contour, inline=1, fontsize=18, 46 | fmt={1.0: 'L2'}, manual=[(-1, -1)]) 47 | plt.clabel(l1_contour, inline=1, fontsize=18, 48 | fmt={1.0: 'L1'}, manual=[(-1, -1)]) 49 | 50 | plt.tight_layout() 51 | plt.show() 52 | -------------------------------------------------------------------------------- /sklearn/svm/src/liblinear/COPYRIGHT: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2007-2014 The LIBLINEAR Project. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | 3. Neither name of copyright holders nor the names of its contributors 17 | may be used to endorse or promote products derived from this software 18 | without specific prior written permission. 19 | 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /sklearn/neighbors/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def configuration(parent_package='', top_path=None): 5 | import numpy 6 | from numpy.distutils.misc_util import Configuration 7 | 8 | config = Configuration('neighbors', parent_package, top_path) 9 | libraries = [] 10 | if os.name == 'posix': 11 | libraries.append('m') 12 | 13 | config.add_extension('ball_tree', 14 | sources=['ball_tree.pyx'], 15 | include_dirs=[numpy.get_include()], 16 | libraries=libraries) 17 | 18 | config.add_extension('kd_tree', 19 | sources=['kd_tree.pyx'], 20 | include_dirs=[numpy.get_include()], 21 | libraries=libraries) 22 | 23 | config.add_extension('dist_metrics', 24 | sources=['dist_metrics.pyx'], 25 | include_dirs=[numpy.get_include(), 26 | os.path.join(numpy.get_include(), 27 | 'numpy')], 28 | libraries=libraries) 29 | 30 | config.add_extension('typedefs', 31 | sources=['typedefs.pyx'], 32 | include_dirs=[numpy.get_include()], 33 | libraries=libraries) 34 | config.add_extension("quad_tree", 35 | sources=["quad_tree.pyx"], 36 | include_dirs=[numpy.get_include()], 37 | libraries=libraries) 38 | 39 | config.add_subpackage('tests') 40 | 41 | return config 42 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_linear_assignment.py: -------------------------------------------------------------------------------- 1 | # Author: Brian M. Clapper, G Varoquaux 2 | # License: BSD 3 | 4 | import numpy as np 5 | 6 | # XXX we should be testing the public API here 7 | from sklearn.utils.linear_assignment_ import _hungarian 8 | 9 | 10 | def test_hungarian(): 11 | matrices = [ 12 | # Square 13 | ([[400, 150, 400], 14 | [400, 450, 600], 15 | [300, 225, 300]], 16 | 850 # expected cost 17 | ), 18 | 19 | # Rectangular variant 20 | ([[400, 150, 400, 1], 21 | [400, 450, 600, 2], 22 | [300, 225, 300, 3]], 23 | 452 # expected cost 24 | ), 25 | 26 | # Square 27 | ([[10, 10, 8], 28 | [9, 8, 1], 29 | [9, 7, 4]], 30 | 18 31 | ), 32 | 33 | # Rectangular variant 34 | ([[10, 10, 8, 11], 35 | [9, 8, 1, 1], 36 | [9, 7, 4, 10]], 37 | 15 38 | ), 39 | 40 | # n == 2, m == 0 matrix 41 | ([[], []], 42 | 0 43 | ), 44 | ] 45 | 46 | for cost_matrix, expected_total in matrices: 47 | cost_matrix = np.array(cost_matrix) 48 | indexes = _hungarian(cost_matrix) 49 | total_cost = 0 50 | for r, c in indexes: 51 | x = cost_matrix[r, c] 52 | total_cost += x 53 | assert expected_total == total_cost 54 | 55 | indexes = _hungarian(cost_matrix.T) 56 | total_cost = 0 57 | for c, r in indexes: 58 | x = cost_matrix[r, c] 59 | total_cost += x 60 | assert expected_total == total_cost 61 | -------------------------------------------------------------------------------- /examples/neighbors/plot_regression.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================ 3 | Nearest Neighbors regression 4 | ============================ 5 | 6 | Demonstrate the resolution of a regression problem 7 | using a k-Nearest Neighbor and the interpolation of the 8 | target using both barycenter and constant weights. 9 | 10 | """ 11 | print(__doc__) 12 | 13 | # Author: Alexandre Gramfort 14 | # Fabian Pedregosa 15 | # 16 | # License: BSD 3 clause (C) INRIA 17 | 18 | 19 | # ############################################################################# 20 | # Generate sample data 21 | import numpy as np 22 | import matplotlib.pyplot as plt 23 | from sklearn import neighbors 24 | 25 | np.random.seed(0) 26 | X = np.sort(5 * np.random.rand(40, 1), axis=0) 27 | T = np.linspace(0, 5, 500)[:, np.newaxis] 28 | y = np.sin(X).ravel() 29 | 30 | # Add noise to targets 31 | y[::5] += 1 * (0.5 - np.random.rand(8)) 32 | 33 | # ############################################################################# 34 | # Fit regression model 35 | n_neighbors = 5 36 | 37 | for i, weights in enumerate(['uniform', 'distance']): 38 | knn = neighbors.KNeighborsRegressor(n_neighbors, weights=weights) 39 | y_ = knn.fit(X, y).predict(T) 40 | 41 | plt.subplot(2, 1, i + 1) 42 | plt.scatter(X, y, c='k', label='data') 43 | plt.plot(T, y_, c='g', label='prediction') 44 | plt.axis('tight') 45 | plt.legend() 46 | plt.title("KNeighborsRegressor (k = %i, weights = '%s')" % (n_neighbors, 47 | weights)) 48 | 49 | plt.tight_layout() 50 | plt.show() 51 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # simple makefile to simplify repetitive build env management tasks under posix 2 | 3 | # caution: testing won't work on windows, see README 4 | 5 | PYTHON ?= python 6 | CYTHON ?= cython 7 | PYTEST ?= pytest 8 | CTAGS ?= ctags 9 | 10 | # skip doctests on 32bit python 11 | BITS := $(shell python -c 'import struct; print(8 * struct.calcsize("P"))') 12 | 13 | all: clean inplace test 14 | 15 | clean-ctags: 16 | rm -f tags 17 | 18 | clean: clean-ctags 19 | $(PYTHON) setup.py clean 20 | rm -rf dist 21 | 22 | in: inplace # just a shortcut 23 | inplace: 24 | $(PYTHON) setup.py build_ext -i 25 | 26 | test-code: in 27 | $(PYTEST) --showlocals -v sklearn --durations=20 28 | test-sphinxext: 29 | $(PYTEST) --showlocals -v doc/sphinxext/ 30 | test-doc: 31 | ifeq ($(BITS),64) 32 | $(PYTEST) $(shell find doc -name '*.rst' | sort) 33 | endif 34 | 35 | test-coverage: 36 | rm -rf coverage .coverage 37 | $(PYTEST) sklearn --showlocals -v --cov=sklearn --cov-report=html:coverage 38 | 39 | test: test-code test-sphinxext test-doc 40 | 41 | trailing-spaces: 42 | find sklearn -name "*.py" -exec perl -pi -e 's/[ \t]*$$//' {} \; 43 | 44 | cython: 45 | python setup.py build_src 46 | 47 | ctags: 48 | # make tags for symbol based navigation in emacs and vim 49 | # Install with: sudo apt-get install exuberant-ctags 50 | $(CTAGS) --python-kinds=-i -R sklearn 51 | 52 | doc: inplace 53 | $(MAKE) -C doc html 54 | 55 | doc-noplot: inplace 56 | $(MAKE) -C doc html-noplot 57 | 58 | code-analysis: 59 | flake8 sklearn | grep -v __init__ | grep -v external 60 | pylint -E -i y sklearn/ -d E1103,E0611,E1101 61 | 62 | flake8-diff: 63 | ./build_tools/travis/flake8_diff.sh 64 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | New BSD License 2 | 3 | Copyright (c) 2007–2018 The scikit-learn developers. 4 | All rights reserved. 5 | 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | a. Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | b. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | c. Neither the name of the Scikit-learn Developers nor the names of 16 | its contributors may be used to endorse or promote products 17 | derived from this software without specific prior written 18 | permission. 19 | 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 31 | DAMAGE. 32 | 33 | -------------------------------------------------------------------------------- /examples/svm/plot_svm_regression.py: -------------------------------------------------------------------------------- 1 | """ 2 | =================================================================== 3 | Support Vector Regression (SVR) using linear and non-linear kernels 4 | =================================================================== 5 | 6 | Toy example of 1D regression using linear, polynomial and RBF kernels. 7 | 8 | """ 9 | print(__doc__) 10 | 11 | import numpy as np 12 | from sklearn.svm import SVR 13 | import matplotlib.pyplot as plt 14 | 15 | # ############################################################################# 16 | # Generate sample data 17 | X = np.sort(5 * np.random.rand(40, 1), axis=0) 18 | y = np.sin(X).ravel() 19 | 20 | # ############################################################################# 21 | # Add noise to targets 22 | y[::5] += 3 * (0.5 - np.random.rand(8)) 23 | 24 | # ############################################################################# 25 | # Fit regression model 26 | svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) 27 | svr_lin = SVR(kernel='linear', C=1e3) 28 | svr_poly = SVR(kernel='poly', C=1e3, degree=2) 29 | y_rbf = svr_rbf.fit(X, y).predict(X) 30 | y_lin = svr_lin.fit(X, y).predict(X) 31 | y_poly = svr_poly.fit(X, y).predict(X) 32 | 33 | # ############################################################################# 34 | # Look at the results 35 | lw = 2 36 | plt.scatter(X, y, color='darkorange', label='data') 37 | plt.plot(X, y_rbf, color='navy', lw=lw, label='RBF model') 38 | plt.plot(X, y_lin, color='c', lw=lw, label='Linear model') 39 | plt.plot(X, y_poly, color='cornflowerblue', lw=lw, label='Polynomial model') 40 | plt.xlabel('data') 41 | plt.ylabel('target') 42 | plt.title('Support Vector Regression') 43 | plt.legend() 44 | plt.show() 45 | -------------------------------------------------------------------------------- /examples/linear_model/plot_sgd_weighted_samples.py: -------------------------------------------------------------------------------- 1 | """ 2 | ===================== 3 | SGD: Weighted samples 4 | ===================== 5 | 6 | Plot decision function of a weighted dataset, where the size of points 7 | is proportional to its weight. 8 | """ 9 | print(__doc__) 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | from sklearn import linear_model 14 | 15 | # we create 20 points 16 | np.random.seed(0) 17 | X = np.r_[np.random.randn(10, 2) + [1, 1], np.random.randn(10, 2)] 18 | y = [1] * 10 + [-1] * 10 19 | sample_weight = 100 * np.abs(np.random.randn(20)) 20 | # and assign a bigger weight to the last 10 samples 21 | sample_weight[:10] *= 10 22 | 23 | # plot the weighted data points 24 | xx, yy = np.meshgrid(np.linspace(-4, 5, 500), np.linspace(-4, 5, 500)) 25 | plt.figure() 26 | plt.scatter(X[:, 0], X[:, 1], c=y, s=sample_weight, alpha=0.9, 27 | cmap=plt.cm.bone, edgecolor='black') 28 | 29 | # fit the unweighted model 30 | clf = linear_model.SGDClassifier(alpha=0.01, max_iter=100) 31 | clf.fit(X, y) 32 | Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) 33 | Z = Z.reshape(xx.shape) 34 | no_weights = plt.contour(xx, yy, Z, levels=[0], linestyles=['solid']) 35 | 36 | # fit the weighted model 37 | clf = linear_model.SGDClassifier(alpha=0.01, max_iter=100) 38 | clf.fit(X, y, sample_weight=sample_weight) 39 | Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) 40 | Z = Z.reshape(xx.shape) 41 | samples_weights = plt.contour(xx, yy, Z, levels=[0], linestyles=['dashed']) 42 | 43 | plt.legend([no_weights.collections[0], samples_weights.collections[0]], 44 | ["no weights", "with weights"], loc="lower left") 45 | 46 | plt.xticks(()) 47 | plt.yticks(()) 48 | plt.show() 49 | -------------------------------------------------------------------------------- /examples/mixture/plot_gmm_pdf.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================= 3 | Density Estimation for a Gaussian mixture 4 | ========================================= 5 | 6 | Plot the density estimation of a mixture of two Gaussians. Data is 7 | generated from two Gaussians with different centers and covariance 8 | matrices. 9 | """ 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | from matplotlib.colors import LogNorm 14 | from sklearn import mixture 15 | 16 | n_samples = 300 17 | 18 | # generate random sample, two components 19 | np.random.seed(0) 20 | 21 | # generate spherical data centered on (20, 20) 22 | shifted_gaussian = np.random.randn(n_samples, 2) + np.array([20, 20]) 23 | 24 | # generate zero centered stretched Gaussian data 25 | C = np.array([[0., -0.7], [3.5, .7]]) 26 | stretched_gaussian = np.dot(np.random.randn(n_samples, 2), C) 27 | 28 | # concatenate the two datasets into the final training set 29 | X_train = np.vstack([shifted_gaussian, stretched_gaussian]) 30 | 31 | # fit a Gaussian Mixture Model with two components 32 | clf = mixture.GaussianMixture(n_components=2, covariance_type='full') 33 | clf.fit(X_train) 34 | 35 | # display predicted scores by the model as a contour plot 36 | x = np.linspace(-20., 30.) 37 | y = np.linspace(-20., 40.) 38 | X, Y = np.meshgrid(x, y) 39 | XX = np.array([X.ravel(), Y.ravel()]).T 40 | Z = -clf.score_samples(XX) 41 | Z = Z.reshape(X.shape) 42 | 43 | CS = plt.contour(X, Y, Z, norm=LogNorm(vmin=1.0, vmax=1000.0), 44 | levels=np.logspace(0, 3, 10)) 45 | CB = plt.colorbar(CS, shrink=0.8, extend='both') 46 | plt.scatter(X_train[:, 0], X_train[:, 1], .8) 47 | 48 | plt.title('Negative log-likelihood predicted by a GMM') 49 | plt.axis('tight') 50 | plt.show() 51 | -------------------------------------------------------------------------------- /examples/ensemble/plot_forest_importances_faces.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================================= 3 | Pixel importances with a parallel forest of trees 4 | ================================================= 5 | 6 | This example shows the use of forests of trees to evaluate the importance 7 | of the pixels in an image classification task (faces). The hotter the pixel, 8 | the more important. 9 | 10 | The code below also illustrates how the construction and the computation 11 | of the predictions can be parallelized within multiple jobs. 12 | """ 13 | print(__doc__) 14 | 15 | from time import time 16 | import matplotlib.pyplot as plt 17 | 18 | from sklearn.datasets import fetch_olivetti_faces 19 | from sklearn.ensemble import ExtraTreesClassifier 20 | 21 | # Number of cores to use to perform parallel fitting of the forest model 22 | n_jobs = 1 23 | 24 | # Load the faces dataset 25 | data = fetch_olivetti_faces() 26 | X = data.images.reshape((len(data.images), -1)) 27 | y = data.target 28 | 29 | mask = y < 5 # Limit to 5 classes 30 | X = X[mask] 31 | y = y[mask] 32 | 33 | # Build a forest and compute the pixel importances 34 | print("Fitting ExtraTreesClassifier on faces data with %d cores..." % n_jobs) 35 | t0 = time() 36 | forest = ExtraTreesClassifier(n_estimators=1000, 37 | max_features=128, 38 | n_jobs=n_jobs, 39 | random_state=0) 40 | 41 | forest.fit(X, y) 42 | print("done in %0.3fs" % (time() - t0)) 43 | importances = forest.feature_importances_ 44 | importances = importances.reshape(data.images[0].shape) 45 | 46 | # Plot pixel importances 47 | plt.matshow(importances, cmap=plt.cm.hot) 48 | plt.title("Pixel importances with forests of trees") 49 | plt.show() 50 | -------------------------------------------------------------------------------- /benchmarks/bench_glm.py: -------------------------------------------------------------------------------- 1 | """ 2 | A comparison of different methods in GLM 3 | 4 | Data comes from a random square matrix. 5 | 6 | """ 7 | from datetime import datetime 8 | import numpy as np 9 | from sklearn import linear_model 10 | from sklearn.utils.bench import total_seconds 11 | 12 | 13 | if __name__ == '__main__': 14 | 15 | import matplotlib.pyplot as plt 16 | 17 | n_iter = 40 18 | 19 | time_ridge = np.empty(n_iter) 20 | time_ols = np.empty(n_iter) 21 | time_lasso = np.empty(n_iter) 22 | 23 | dimensions = 500 * np.arange(1, n_iter + 1) 24 | 25 | for i in range(n_iter): 26 | 27 | print('Iteration %s of %s' % (i, n_iter)) 28 | 29 | n_samples, n_features = 10 * i + 3, 10 * i + 3 30 | 31 | X = np.random.randn(n_samples, n_features) 32 | Y = np.random.randn(n_samples) 33 | 34 | start = datetime.now() 35 | ridge = linear_model.Ridge(alpha=1.) 36 | ridge.fit(X, Y) 37 | time_ridge[i] = total_seconds(datetime.now() - start) 38 | 39 | start = datetime.now() 40 | ols = linear_model.LinearRegression() 41 | ols.fit(X, Y) 42 | time_ols[i] = total_seconds(datetime.now() - start) 43 | 44 | start = datetime.now() 45 | lasso = linear_model.LassoLars() 46 | lasso.fit(X, Y) 47 | time_lasso[i] = total_seconds(datetime.now() - start) 48 | 49 | plt.figure('scikit-learn GLM benchmark results') 50 | plt.xlabel('Dimensions') 51 | plt.ylabel('Time (s)') 52 | plt.plot(dimensions, time_ridge, color='r') 53 | plt.plot(dimensions, time_ols, color='g') 54 | plt.plot(dimensions, time_lasso, color='b') 55 | 56 | plt.legend(['Ridge', 'OLS', 'LassoLars'], loc='upper left') 57 | plt.axis('tight') 58 | plt.show() 59 | -------------------------------------------------------------------------------- /examples/ensemble/plot_adaboost_regression.py: -------------------------------------------------------------------------------- 1 | """ 2 | ====================================== 3 | Decision Tree Regression with AdaBoost 4 | ====================================== 5 | 6 | A decision tree is boosted using the AdaBoost.R2 [1]_ algorithm on a 1D 7 | sinusoidal dataset with a small amount of Gaussian noise. 8 | 299 boosts (300 decision trees) is compared with a single decision tree 9 | regressor. As the number of boosts is increased the regressor can fit more 10 | detail. 11 | 12 | .. [1] H. Drucker, "Improving Regressors using Boosting Techniques", 1997. 13 | 14 | """ 15 | print(__doc__) 16 | 17 | # Author: Noel Dawe 18 | # 19 | # License: BSD 3 clause 20 | 21 | # importing necessary libraries 22 | import numpy as np 23 | import matplotlib.pyplot as plt 24 | from sklearn.tree import DecisionTreeRegressor 25 | from sklearn.ensemble import AdaBoostRegressor 26 | 27 | # Create the dataset 28 | rng = np.random.RandomState(1) 29 | X = np.linspace(0, 6, 100)[:, np.newaxis] 30 | y = np.sin(X).ravel() + np.sin(6 * X).ravel() + rng.normal(0, 0.1, X.shape[0]) 31 | 32 | # Fit regression model 33 | regr_1 = DecisionTreeRegressor(max_depth=4) 34 | 35 | regr_2 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4), 36 | n_estimators=300, random_state=rng) 37 | 38 | regr_1.fit(X, y) 39 | regr_2.fit(X, y) 40 | 41 | # Predict 42 | y_1 = regr_1.predict(X) 43 | y_2 = regr_2.predict(X) 44 | 45 | # Plot the results 46 | plt.figure() 47 | plt.scatter(X, y, c="k", label="training samples") 48 | plt.plot(X, y_1, c="g", label="n_estimators=1", linewidth=2) 49 | plt.plot(X, y_2, c="r", label="n_estimators=300", linewidth=2) 50 | plt.xlabel("data") 51 | plt.ylabel("target") 52 | plt.title("Boosted Decision Tree Regression") 53 | plt.legend() 54 | plt.show() 55 | -------------------------------------------------------------------------------- /examples/feature_selection/plot_select_from_model_boston.py: -------------------------------------------------------------------------------- 1 | """ 2 | =================================================== 3 | Feature selection using SelectFromModel and LassoCV 4 | =================================================== 5 | 6 | Use SelectFromModel meta-transformer along with Lasso to select the best 7 | couple of features from the Boston dataset. 8 | """ 9 | # Author: Manoj Kumar 10 | # License: BSD 3 clause 11 | 12 | print(__doc__) 13 | 14 | import matplotlib.pyplot as plt 15 | import numpy as np 16 | 17 | from sklearn.datasets import load_boston 18 | from sklearn.feature_selection import SelectFromModel 19 | from sklearn.linear_model import LassoCV 20 | 21 | # Load the boston dataset. 22 | boston = load_boston() 23 | X, y = boston['data'], boston['target'] 24 | 25 | # We use the base estimator LassoCV since the L1 norm promotes sparsity of features. 26 | clf = LassoCV(cv=5) 27 | 28 | # Set a minimum threshold of 0.25 29 | sfm = SelectFromModel(clf, threshold=0.25) 30 | sfm.fit(X, y) 31 | n_features = sfm.transform(X).shape[1] 32 | 33 | # Reset the threshold till the number of features equals two. 34 | # Note that the attribute can be set directly instead of repeatedly 35 | # fitting the metatransformer. 36 | while n_features > 2: 37 | sfm.threshold += 0.1 38 | X_transform = sfm.transform(X) 39 | n_features = X_transform.shape[1] 40 | 41 | # Plot the selected two features from X. 42 | plt.title( 43 | "Features selected from Boston using SelectFromModel with " 44 | "threshold %0.3f." % sfm.threshold) 45 | feature1 = X_transform[:, 0] 46 | feature2 = X_transform[:, 1] 47 | plt.plot(feature1, feature2, 'r.') 48 | plt.xlabel("Feature number 1") 49 | plt.ylabel("Feature number 2") 50 | plt.ylim([np.min(feature2), np.max(feature2)]) 51 | plt.show() 52 | -------------------------------------------------------------------------------- /sklearn/metrics/cluster/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`sklearn.metrics.cluster` submodule contains evaluation metrics for 3 | cluster analysis results. There are two forms of evaluation: 4 | 5 | - supervised, which uses a ground truth class values for each sample. 6 | - unsupervised, which does not and measures the 'quality' of the model itself. 7 | """ 8 | from .supervised import adjusted_mutual_info_score 9 | from .supervised import normalized_mutual_info_score 10 | from .supervised import adjusted_rand_score 11 | from .supervised import completeness_score 12 | from .supervised import contingency_matrix 13 | from .supervised import expected_mutual_information 14 | from .supervised import homogeneity_completeness_v_measure 15 | from .supervised import homogeneity_score 16 | from .supervised import mutual_info_score 17 | from .supervised import v_measure_score 18 | from .supervised import fowlkes_mallows_score 19 | from .supervised import entropy 20 | from .unsupervised import silhouette_samples 21 | from .unsupervised import silhouette_score 22 | from .unsupervised import calinski_harabasz_score 23 | from .unsupervised import calinski_harabaz_score 24 | from .unsupervised import davies_bouldin_score 25 | from .bicluster import consensus_score 26 | 27 | __all__ = ["adjusted_mutual_info_score", "normalized_mutual_info_score", 28 | "adjusted_rand_score", "completeness_score", "contingency_matrix", 29 | "expected_mutual_information", "homogeneity_completeness_v_measure", 30 | "homogeneity_score", "mutual_info_score", "v_measure_score", 31 | "fowlkes_mallows_score", "entropy", "silhouette_samples", 32 | "silhouette_score", "calinski_harabaz_score", 33 | "calinski_harabasz_score", "davies_bouldin_score", 34 | "consensus_score"] 35 | -------------------------------------------------------------------------------- /examples/decomposition/plot_pca_iris.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | ========================================================= 6 | PCA example with Iris Data-set 7 | ========================================================= 8 | 9 | Principal Component Analysis applied to the Iris dataset. 10 | 11 | See `here `_ for more 12 | information on this dataset. 13 | 14 | """ 15 | print(__doc__) 16 | 17 | 18 | # Code source: Gaël Varoquaux 19 | # License: BSD 3 clause 20 | 21 | import numpy as np 22 | import matplotlib.pyplot as plt 23 | from mpl_toolkits.mplot3d import Axes3D 24 | 25 | 26 | from sklearn import decomposition 27 | from sklearn import datasets 28 | 29 | np.random.seed(5) 30 | 31 | centers = [[1, 1], [-1, -1], [1, -1]] 32 | iris = datasets.load_iris() 33 | X = iris.data 34 | y = iris.target 35 | 36 | fig = plt.figure(1, figsize=(4, 3)) 37 | plt.clf() 38 | ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134) 39 | 40 | plt.cla() 41 | pca = decomposition.PCA(n_components=3) 42 | pca.fit(X) 43 | X = pca.transform(X) 44 | 45 | for name, label in [('Setosa', 0), ('Versicolour', 1), ('Virginica', 2)]: 46 | ax.text3D(X[y == label, 0].mean(), 47 | X[y == label, 1].mean() + 1.5, 48 | X[y == label, 2].mean(), name, 49 | horizontalalignment='center', 50 | bbox=dict(alpha=.5, edgecolor='w', facecolor='w')) 51 | # Reorder the labels to have colors matching the cluster results 52 | y = np.choose(y, [1, 2, 0]).astype(np.float) 53 | ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.nipy_spectral, 54 | edgecolor='k') 55 | 56 | ax.w_xaxis.set_ticklabels([]) 57 | ax.w_yaxis.set_ticklabels([]) 58 | ax.w_zaxis.set_ticklabels([]) 59 | 60 | plt.show() 61 | -------------------------------------------------------------------------------- /examples/tree/plot_tree_regression.py: -------------------------------------------------------------------------------- 1 | """ 2 | =================================================================== 3 | Decision Tree Regression 4 | =================================================================== 5 | 6 | A 1D regression with decision tree. 7 | 8 | The :ref:`decision trees ` is 9 | used to fit a sine curve with addition noisy observation. As a result, it 10 | learns local linear regressions approximating the sine curve. 11 | 12 | We can see that if the maximum depth of the tree (controlled by the 13 | `max_depth` parameter) is set too high, the decision trees learn too fine 14 | details of the training data and learn from the noise, i.e. they overfit. 15 | """ 16 | print(__doc__) 17 | 18 | # Import the necessary modules and libraries 19 | import numpy as np 20 | from sklearn.tree import DecisionTreeRegressor 21 | import matplotlib.pyplot as plt 22 | 23 | # Create a random dataset 24 | rng = np.random.RandomState(1) 25 | X = np.sort(5 * rng.rand(80, 1), axis=0) 26 | y = np.sin(X).ravel() 27 | y[::5] += 3 * (0.5 - rng.rand(16)) 28 | 29 | # Fit regression model 30 | regr_1 = DecisionTreeRegressor(max_depth=2) 31 | regr_2 = DecisionTreeRegressor(max_depth=5) 32 | regr_1.fit(X, y) 33 | regr_2.fit(X, y) 34 | 35 | # Predict 36 | X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis] 37 | y_1 = regr_1.predict(X_test) 38 | y_2 = regr_2.predict(X_test) 39 | 40 | # Plot the results 41 | plt.figure() 42 | plt.scatter(X, y, s=20, edgecolor="black", 43 | c="darkorange", label="data") 44 | plt.plot(X_test, y_1, color="cornflowerblue", 45 | label="max_depth=2", linewidth=2) 46 | plt.plot(X_test, y_2, color="yellowgreen", label="max_depth=5", linewidth=2) 47 | plt.xlabel("data") 48 | plt.ylabel("target") 49 | plt.title("Decision Tree Regression") 50 | plt.legend() 51 | plt.show() 52 | -------------------------------------------------------------------------------- /examples/svm/plot_custom_kernel.py: -------------------------------------------------------------------------------- 1 | """ 2 | ====================== 3 | SVM with custom kernel 4 | ====================== 5 | 6 | Simple usage of Support Vector Machines to classify a sample. It will 7 | plot the decision surface and the support vectors. 8 | 9 | """ 10 | print(__doc__) 11 | 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | from sklearn import svm, datasets 15 | 16 | # import some data to play with 17 | iris = datasets.load_iris() 18 | X = iris.data[:, :2] # we only take the first two features. We could 19 | # avoid this ugly slicing by using a two-dim dataset 20 | Y = iris.target 21 | 22 | 23 | def my_kernel(X, Y): 24 | """ 25 | We create a custom kernel: 26 | 27 | (2 0) 28 | k(X, Y) = X ( ) Y.T 29 | (0 1) 30 | """ 31 | M = np.array([[2, 0], [0, 1.0]]) 32 | return np.dot(np.dot(X, M), Y.T) 33 | 34 | 35 | h = .02 # step size in the mesh 36 | 37 | # we create an instance of SVM and fit out data. 38 | clf = svm.SVC(kernel=my_kernel) 39 | clf.fit(X, Y) 40 | 41 | # Plot the decision boundary. For that, we will assign a color to each 42 | # point in the mesh [x_min, x_max]x[y_min, y_max]. 43 | x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 44 | y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 45 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) 46 | Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) 47 | 48 | # Put the result into a color plot 49 | Z = Z.reshape(xx.shape) 50 | plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired) 51 | 52 | # Plot also the training points 53 | plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired, edgecolors='k') 54 | plt.title('3-Class classification using Support Vector Machine with custom' 55 | ' kernel') 56 | plt.axis('tight') 57 | plt.show() 58 | -------------------------------------------------------------------------------- /sklearn/utils/tests/test_deprecation.py: -------------------------------------------------------------------------------- 1 | # Authors: Raghav RV 2 | # License: BSD 3 clause 3 | 4 | 5 | import sys 6 | import pickle 7 | 8 | from sklearn.utils.deprecation import _is_deprecated 9 | from sklearn.utils.deprecation import deprecated 10 | from sklearn.utils.testing import assert_warns_message 11 | from sklearn.utils.testing import SkipTest 12 | 13 | 14 | @deprecated('qwerty') 15 | class MockClass1: 16 | pass 17 | 18 | 19 | class MockClass2: 20 | @deprecated('mockclass2_method') 21 | def method(self): 22 | pass 23 | 24 | 25 | class MockClass3: 26 | @deprecated() 27 | def __init__(self): 28 | pass 29 | 30 | 31 | class MockClass4: 32 | pass 33 | 34 | 35 | @deprecated() 36 | def mock_function(): 37 | return 10 38 | 39 | 40 | def test_deprecated(): 41 | assert_warns_message(DeprecationWarning, 'qwerty', MockClass1) 42 | assert_warns_message(DeprecationWarning, 'mockclass2_method', 43 | MockClass2().method) 44 | assert_warns_message(DeprecationWarning, 'deprecated', MockClass3) 45 | val = assert_warns_message(DeprecationWarning, 'deprecated', mock_function) 46 | assert val == 10 47 | 48 | 49 | def test_is_deprecated(): 50 | if sys.version_info < (3, 5): 51 | raise SkipTest("This test will run only on python3.5 and above") 52 | # Test if _is_deprecated helper identifies wrapping via deprecated 53 | # NOTE it works only for class methods and functions 54 | assert _is_deprecated(MockClass1.__init__) 55 | assert _is_deprecated(MockClass2().method) 56 | assert _is_deprecated(MockClass3.__init__) 57 | assert not _is_deprecated(MockClass4.__init__) 58 | assert _is_deprecated(mock_function) 59 | 60 | 61 | def test_pickle(): 62 | pickle.loads(pickle.dumps(mock_function)) 63 | -------------------------------------------------------------------------------- /sklearn/externals/joblib/_multiprocessing_helpers.py: -------------------------------------------------------------------------------- 1 | """Helper module to factorize the conditional multiprocessing import logic 2 | 3 | We use a distinct module to simplify import statements and avoid introducing 4 | circular dependencies (for instance for the assert_spawning name). 5 | """ 6 | import os 7 | import sys 8 | import warnings 9 | 10 | 11 | # Obtain possible configuration from the environment, assuming 1 (on) 12 | # by default, upon 0 set to None. Should instructively fail if some non 13 | # 0/1 value is set. 14 | mp = int(os.environ.get('JOBLIB_MULTIPROCESSING', 1)) or None 15 | if mp: 16 | try: 17 | import multiprocessing as mp 18 | except ImportError: 19 | mp = None 20 | 21 | # 2nd stage: validate that locking is available on the system and 22 | # issue a warning if not 23 | if mp is not None: 24 | try: 25 | # Use the spawn context 26 | if sys.version_info < (3, 3): 27 | Semaphore = mp.Semaphore 28 | else: 29 | # Using mp.Semaphore has a border effect and set the default 30 | # backend for multiprocessing. To avoid that, we use the 'spawn' 31 | # context which is available on all supported platforms. 32 | ctx = mp.get_context('spawn') 33 | Semaphore = ctx.Semaphore 34 | _sem = Semaphore() 35 | del _sem # cleanup 36 | except (ImportError, OSError) as e: 37 | mp = None 38 | warnings.warn('%s. joblib will operate in serial mode' % (e,)) 39 | 40 | 41 | # 3rd stage: backward compat for the assert_spawning helper 42 | if mp is not None: 43 | try: 44 | # Python 3.4+ 45 | from multiprocessing.context import assert_spawning 46 | except ImportError: 47 | from multiprocessing.forking import assert_spawning 48 | else: 49 | assert_spawning = None 50 | -------------------------------------------------------------------------------- /doc/modules/cross_decomposition.rst: -------------------------------------------------------------------------------- 1 | .. _cross_decomposition: 2 | 3 | =================== 4 | Cross decomposition 5 | =================== 6 | 7 | .. currentmodule:: sklearn.cross_decomposition 8 | 9 | The cross decomposition module contains two main families of algorithms: the 10 | partial least squares (PLS) and the canonical correlation analysis (CCA). 11 | 12 | These families of algorithms are useful to find linear relations between two 13 | multivariate datasets: the ``X`` and ``Y`` arguments of the ``fit`` method 14 | are 2D arrays. 15 | 16 | .. figure:: ../auto_examples/cross_decomposition/images/sphx_glr_plot_compare_cross_decomposition_001.png 17 | :target: ../auto_examples/cross_decomposition/plot_compare_cross_decomposition.html 18 | :scale: 75% 19 | :align: center 20 | 21 | 22 | Cross decomposition algorithms find the fundamental relations between two 23 | matrices (X and Y). They are latent variable approaches to modeling the 24 | covariance structures in these two spaces. They will try to find the 25 | multidimensional direction in the X space that explains the maximum 26 | multidimensional variance direction in the Y space. PLS-regression is 27 | particularly suited when the matrix of predictors has more variables than 28 | observations, and when there is multicollinearity among X values. By contrast, 29 | standard regression will fail in these cases. 30 | 31 | Classes included in this module are :class:`PLSRegression` 32 | :class:`PLSCanonical`, :class:`CCA` and :class:`PLSSVD` 33 | 34 | 35 | .. topic:: Reference: 36 | 37 | * JA Wegelin 38 | `A survey of Partial Least Squares (PLS) methods, with emphasis on the two-block case `_ 39 | 40 | .. topic:: Examples: 41 | 42 | * :ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py` 43 | -------------------------------------------------------------------------------- /sklearn/__check_build/__init__.py: -------------------------------------------------------------------------------- 1 | """ Module to give helpful messages to the user that did not 2 | compile scikit-learn properly. 3 | """ 4 | import os 5 | 6 | INPLACE_MSG = """ 7 | It appears that you are importing a local scikit-learn source tree. For 8 | this, you need to have an inplace install. Maybe you are in the source 9 | directory and you need to try from another location.""" 10 | 11 | STANDARD_MSG = """ 12 | If you have used an installer, please check that it is suited for your 13 | Python version, your operating system and your platform.""" 14 | 15 | 16 | def raise_build_error(e): 17 | # Raise a comprehensible error and list the contents of the 18 | # directory to help debugging on the mailing list. 19 | local_dir = os.path.split(__file__)[0] 20 | msg = STANDARD_MSG 21 | if local_dir == "sklearn/__check_build": 22 | # Picking up the local install: this will work only if the 23 | # install is an 'inplace build' 24 | msg = INPLACE_MSG 25 | dir_content = list() 26 | for i, filename in enumerate(os.listdir(local_dir)): 27 | if ((i + 1) % 3): 28 | dir_content.append(filename.ljust(26)) 29 | else: 30 | dir_content.append(filename + '\n') 31 | raise ImportError("""%s 32 | ___________________________________________________________________________ 33 | Contents of %s: 34 | %s 35 | ___________________________________________________________________________ 36 | It seems that scikit-learn has not been built correctly. 37 | 38 | If you have installed scikit-learn from source, please do not forget 39 | to build the package before using it: run `python setup.py install` or 40 | `make` in the source directory. 41 | %s""" % (e, local_dir, ''.join(dir_content).strip(), msg)) 42 | 43 | try: 44 | from ._check_build import check_build # noqa 45 | except ImportError as e: 46 | raise_build_error(e) 47 | -------------------------------------------------------------------------------- /sklearn/cluster/_dbscan_inner.pyx: -------------------------------------------------------------------------------- 1 | # Fast inner loop for DBSCAN. 2 | # Author: Lars Buitinck 3 | # License: 3-clause BSD 4 | 5 | cimport cython 6 | from libcpp.vector cimport vector 7 | cimport numpy as np 8 | import numpy as np 9 | 10 | 11 | # Work around Cython bug: C++ exceptions are not caught unless thrown within 12 | # a cdef function with an "except +" declaration. 13 | cdef inline void push(vector[np.npy_intp] &stack, np.npy_intp i) except +: 14 | stack.push_back(i) 15 | 16 | 17 | @cython.boundscheck(False) 18 | @cython.wraparound(False) 19 | def dbscan_inner(np.ndarray[np.uint8_t, ndim=1, mode='c'] is_core, 20 | np.ndarray[object, ndim=1] neighborhoods, 21 | np.ndarray[np.npy_intp, ndim=1, mode='c'] labels): 22 | cdef np.npy_intp i, label_num = 0, v 23 | cdef np.ndarray[np.npy_intp, ndim=1] neighb 24 | cdef vector[np.npy_intp] stack 25 | 26 | for i in range(labels.shape[0]): 27 | if labels[i] != -1 or not is_core[i]: 28 | continue 29 | 30 | # Depth-first search starting from i, ending at the non-core points. 31 | # This is very similar to the classic algorithm for computing connected 32 | # components, the difference being that we label non-core points as 33 | # part of a cluster (component), but don't expand their neighborhoods. 34 | while True: 35 | if labels[i] == -1: 36 | labels[i] = label_num 37 | if is_core[i]: 38 | neighb = neighborhoods[i] 39 | for i in range(neighb.shape[0]): 40 | v = neighb[i] 41 | if labels[v] == -1: 42 | push(stack, v) 43 | 44 | if stack.size() == 0: 45 | break 46 | i = stack.back() 47 | stack.pop_back() 48 | 49 | label_num += 1 50 | -------------------------------------------------------------------------------- /sklearn/utils/seq_dataset.pxd: -------------------------------------------------------------------------------- 1 | """Dataset abstractions for sequential data access. """ 2 | 3 | cimport numpy as np 4 | 5 | # SequentialDataset and its two concrete subclasses are (optionally randomized) 6 | # iterators over the rows of a matrix X and corresponding target values y. 7 | 8 | cdef class SequentialDataset: 9 | cdef int current_index 10 | cdef np.ndarray index 11 | cdef int *index_data_ptr 12 | cdef Py_ssize_t n_samples 13 | cdef np.uint32_t seed 14 | 15 | cdef void shuffle(self, np.uint32_t seed) nogil 16 | cdef int _get_next_index(self) nogil 17 | cdef int _get_random_index(self) nogil 18 | 19 | cdef void _sample(self, double **x_data_ptr, int **x_ind_ptr, 20 | int *nnz, double *y, double *sample_weight, 21 | int current_index) nogil 22 | cdef void next(self, double **x_data_ptr, int **x_ind_ptr, 23 | int *nnz, double *y, double *sample_weight) nogil 24 | cdef int random(self, double **x_data_ptr, int **x_ind_ptr, 25 | int *nnz, double *y, double *sample_weight) nogil 26 | 27 | 28 | cdef class ArrayDataset(SequentialDataset): 29 | cdef np.ndarray X 30 | cdef np.ndarray Y 31 | cdef np.ndarray sample_weights 32 | cdef Py_ssize_t n_features 33 | cdef np.npy_intp X_stride 34 | cdef double *X_data_ptr 35 | cdef double *Y_data_ptr 36 | cdef np.ndarray feature_indices 37 | cdef int *feature_indices_ptr 38 | cdef double *sample_weight_data 39 | 40 | 41 | cdef class CSRDataset(SequentialDataset): 42 | cdef np.ndarray X_data 43 | cdef np.ndarray X_indptr 44 | cdef np.ndarray X_indices 45 | cdef np.ndarray Y 46 | cdef np.ndarray sample_weights 47 | cdef double *X_data_ptr 48 | cdef int *X_indptr_ptr 49 | cdef int *X_indices_ptr 50 | cdef double *Y_data_ptr 51 | cdef double *sample_weight_data 52 | --------------------------------------------------------------------------------