├── sklearn
    ├── tests
    │   ├── __init__.py
    │   ├── test_check_build.py
    │   └── test_init.py
    ├── cluster
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── common.py
    │   ├── __init__.py
    │   └── _dbscan_inner.pyx
    ├── compose
    │   ├── tests
    │   │   └── __init__.py
    │   └── __init__.py
    ├── metrics
    │   ├── tests
    │   │   └── __init__.py
    │   ├── cluster
    │   │   ├── tests
    │   │   │   └── __init__.py
    │   │   ├── setup.py
    │   │   └── __init__.py
    │   └── setup.py
    ├── mixture
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── test_mixture.py
    │   └── __init__.py
    ├── src
    │   └── cblas
    │   │   ├── atlas_type.h
    │   │   ├── atlas_dsysinfo.h
    │   │   ├── atlas_ssysinfo.h
    │   │   ├── atlas_ptalias2.h
    │   │   └── README.txt
    ├── svm
    │   ├── tests
    │   │   └── __init__.py
    │   ├── src
    │   │   ├── libsvm
    │   │   │   ├── libsvm_template.cpp
    │   │   │   └── LIBSVM_CHANGES
    │   │   └── liblinear
    │   │   │   ├── tron.h
    │   │   │   └── COPYRIGHT
    │   ├── __init__.py
    │   └── liblinear.pxd
    ├── tree
    │   ├── tests
    │   │   └── __init__.py
    │   ├── __init__.py
    │   └── setup.py
    ├── utils
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_bench.py
    │   │   ├── test_optimize.py
    │   │   ├── test_show_versions.py
    │   │   ├── test_fast_dict.py
    │   │   ├── test_linear_assignment.py
    │   │   └── test_deprecation.py
    │   ├── lgamma.pxd
    │   ├── src
    │   │   ├── gamma.h
    │   │   └── MurmurHash3.h
    │   ├── lgamma.pyx
    │   ├── _random.pxd
    │   ├── bench.py
    │   ├── fast_dict.pxd
    │   ├── stats.py
    │   ├── _logistic_sigmoid.pyx
    │   ├── _joblib.py
    │   ├── murmurhash.pxd
    │   ├── weight_vector.pxd
    │   └── seq_dataset.pxd
    ├── covariance
    │   ├── tests
    │   │   └── __init__.py
    │   └── __init__.py
    ├── datasets
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   ├── svmlight_invalid_order.txt
    │   │   │   ├── svmlight_invalid.txt
    │   │   │   ├── svmlight_multilabel.txt
    │   │   │   ├── openml
    │   │   │   │   ├── 2
    │   │   │   │   │   ├── api-v1-json-data-2.json.gz
    │   │   │   │   │   ├── data-v1-download-1666876.arff.gz
    │   │   │   │   │   ├── api-v1-json-data-features-2.json.gz
    │   │   │   │   │   ├── api-v1-json-data-list-data_name-anneal-limit-2-data_version-1.json.gz
    │   │   │   │   │   └── api-v1-json-data-list-data_name-anneal-limit-2-status-active-.json.gz
    │   │   │   │   ├── 61
    │   │   │   │   │   ├── api-v1-json-data-61.json.gz
    │   │   │   │   │   ├── data-v1-download-61.arff.gz
    │   │   │   │   │   ├── api-v1-json-data-features-61.json.gz
    │   │   │   │   │   ├── api-v1-json-data-list-data_name-iris-limit-2-data_version-1.json.gz
    │   │   │   │   │   └── api-v1-json-data-list-data_name-iris-limit-2-status-active-.json.gz
    │   │   │   │   ├── 292
    │   │   │   │   │   ├── api-v1-json-data-292.json.gz
    │   │   │   │   │   ├── api-v1-json-data-40981.json.gz
    │   │   │   │   │   ├── data-v1-download-49822.arff.gz
    │   │   │   │   │   ├── api-v1-json-data-features-292.json.gz
    │   │   │   │   │   ├── api-v1-json-data-features-40981.json.gz
    │   │   │   │   │   ├── api-v1-json-data-list-data_name-australian-limit-2-data_version-1.json.gz
    │   │   │   │   │   ├── api-v1-json-data-list-data_name-australian-limit-2-status-active-.json.gz
    │   │   │   │   │   └── api-v1-json-data-list-data_name-australian-limit-2-data_version-1-status-deactivated.json.gz
    │   │   │   │   ├── 561
    │   │   │   │   │   ├── api-v1-json-data-561.json.gz
    │   │   │   │   │   ├── data-v1-download-52739.arff.gz
    │   │   │   │   │   ├── api-v1-json-data-features-561.json.gz
    │   │   │   │   │   ├── api-v1-json-data-list-data_name-cpu-limit-2-data_version-1.json.gz
    │   │   │   │   │   └── api-v1-json-data-list-data_name-cpu-limit-2-status-active-.json.gz
    │   │   │   │   ├── 1119
    │   │   │   │   │   ├── api-v1-json-data-1119.json.gz
    │   │   │   │   │   ├── data-v1-download-54002.arff.gz
    │   │   │   │   │   ├── api-v1-json-data-features-1119.json.gz
    │   │   │   │   │   ├── api-v1-json-data-list-data_name-adult-census-limit-2-data_version-1.json.gz
    │   │   │   │   │   └── api-v1-json-data-list-data_name-adult-census-limit-2-status-active-.json.gz
    │   │   │   │   ├── 40589
    │   │   │   │   │   ├── api-v1-json-data-40589.json.gz
    │   │   │   │   │   ├── data-v1-download-4644182.arff.gz
    │   │   │   │   │   ├── api-v1-json-data-features-40589.json.gz
    │   │   │   │   │   ├── api-v1-json-data-list-data_name-emotions-limit-2-data_version-3.json.gz
    │   │   │   │   │   └── api-v1-json-data-list-data_name-emotions-limit-2-status-active-.json.gz
    │   │   │   │   ├── 40675
    │   │   │   │   │   ├── api-v1-json-data-40675.json.gz
    │   │   │   │   │   ├── data-v1-download-4965250.arff.gz
    │   │   │   │   │   ├── api-v1-json-data-features-40675.json.gz
    │   │   │   │   │   ├── api-v1-json-data-list-data_name-glass2-limit-2-data_version-1.json.gz
    │   │   │   │   │   ├── api-v1-json-data-list-data_name-glass2-limit-2-status-active-.json.gz
    │   │   │   │   │   └── api-v1-json-data-list-data_name-glass2-limit-2-data_version-1-status-deactivated.json.gz
    │   │   │   │   ├── 40945
    │   │   │   │   │   ├── api-v1-json-data-40945.json.gz
    │   │   │   │   │   └── api-v1-json-data-features-40945.json.gz
    │   │   │   │   └── 40966
    │   │   │   │   │   ├── api-v1-json-data-40966.json.gz
    │   │   │   │   │   ├── data-v1-download-17928620.arff.gz
    │   │   │   │   │   ├── api-v1-json-data-features-40966.json.gz
    │   │   │   │   │   ├── api-v1-json-data-list-data_name-miceprotein-limit-2-data_version-4.json.gz
    │   │   │   │   │   └── api-v1-json-data-list-data_name-miceprotein-limit-2-status-active-.json.gz
    │   │   │   └── svmlight_classification.txt
    │   │   ├── test_common.py
    │   │   ├── test_california_housing.py
    │   │   └── test_covtype.py
    │   ├── images
    │   │   ├── china.jpg
    │   │   ├── flower.jpg
    │   │   └── README.txt
    │   ├── data
    │   │   ├── digits.csv.gz
    │   │   ├── diabetes_data.csv.gz
    │   │   ├── diabetes_target.csv.gz
    │   │   ├── linnerud_exercise.csv
    │   │   └── linnerud_physiological.csv
    │   ├── descr
    │   │   ├── linnerud.rst
    │   │   ├── covtype.rst
    │   │   ├── diabetes.rst
    │   │   └── california_housing.rst
    │   └── setup.py
    ├── ensemble
    │   ├── tests
    │   │   └── __init__.py
    │   ├── setup.py
    │   └── __init__.py
    ├── linear_model
    │   ├── tests
    │   │   └── __init__.py
    │   ├── sgd_fast_helpers.h
    │   └── sgd_fast.pxd
    ├── manifold
    │   ├── tests
    │   │   └── __init__.py
    │   ├── __init__.py
    │   └── setup.py
    ├── neighbors
    │   ├── tests
    │   │   └── __init__.py
    │   ├── typedefs.pxd
    │   ├── typedefs.pyx
    │   ├── __init__.py
    │   └── setup.py
    ├── decomposition
    │   ├── tests
    │   │   └── __init__.py
    │   ├── setup.py
    │   ├── cdnmf_fast.pyx
    │   └── __init__.py
    ├── feature_selection
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── test_variance_threshold.py
    │   └── __init__.py
    ├── gaussian_process
    │   ├── tests
    │   │   └── __init__.py
    │   └── __init__.py
    ├── model_selection
    │   └── tests
    │   │   ├── __init__.py
    │   │   └── common.py
    ├── neural_network
    │   ├── tests
    │   │   └── __init__.py
    │   └── __init__.py
    ├── preprocessing
    │   ├── tests
    │   │   └── __init__.py
    │   └── setup.py
    ├── semi_supervised
    │   ├── tests
    │   │   └── __init__.py
    │   └── __init__.py
    ├── cross_decomposition
    │   ├── tests
    │   │   └── __init__.py
    │   └── __init__.py
    ├── externals
    │   ├── joblib
    │   │   ├── externals
    │   │   │   ├── __init__.py
    │   │   │   ├── cloudpickle
    │   │   │   │   └── __init__.py
    │   │   │   └── loky
    │   │   │   │   ├── backend
    │   │   │   │       ├── compat_posix.py
    │   │   │   │       ├── __init__.py
    │   │   │   │       ├── compat.py
    │   │   │   │       └── fork_exec.py
    │   │   │   │   └── __init__.py
    │   │   ├── _compat.py
    │   │   └── _multiprocessing_helpers.py
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── README
    │   ├── setup.py
    │   └── copy_joblib.sh
    ├── feature_extraction
    │   ├── tests
    │   │   └── __init__.py
    │   ├── __init__.py
    │   └── setup.py
    └── __check_build
    │   ├── _check_build.pyx
    │   ├── setup.py
    │   └── __init__.py
├── doc
    ├── testimonials
    │   ├── images
    │   │   ├── Makefile
    │   │   ├── mars.png
    │   │   ├── yhat.png
    │   │   ├── zopa.png
    │   │   ├── aweber.png
    │   │   ├── inria.png
    │   │   ├── lovely.png
    │   │   ├── betaworks.png
    │   │   ├── birchbox.jpg
    │   │   ├── booking.png
    │   │   ├── datarobot.png
    │   │   ├── evernote.png
    │   │   ├── infonea.jpg
    │   │   ├── okcupid.png
    │   │   ├── peerindex.png
    │   │   ├── phimeca.png
    │   │   ├── rangespan.png
    │   │   ├── spotify.png
    │   │   ├── change-logo.png
    │   │   ├── datapublica.png
    │   │   ├── howaboutwe.png
    │   │   ├── huggingface.png
    │   │   ├── machinalis.png
    │   │   ├── solido_logo.png
    │   │   ├── dataiku_logo.png
    │   │   ├── ottogroup_logo.png
    │   │   ├── bestofmedia-logo.png
    │   │   └── telecomparistech.jpg
    │   └── README.txt
    ├── themes
    │   └── scikit-learn
    │   │   ├── static
    │   │       ├── css
    │   │       │   └── examples.css
    │   │       ├── img
    │   │       │   ├── columbia.png
    │   │       │   ├── forkme.png
    │   │       │   ├── google.png
    │   │       │   ├── telecom.png
    │   │       │   ├── FNRS-logo.png
    │   │       │   ├── digicosme.png
    │   │       │   ├── sloan_logo.jpg
    │   │       │   ├── inria-small.jpg
    │   │       │   ├── inria-small.png
    │   │       │   ├── nyu_short_color.png
    │   │       │   ├── sydney-primary.jpeg
    │   │       │   ├── sydney-stacked.jpeg
    │   │       │   ├── scikit-learn-logo.png
    │   │       │   ├── glyphicons-halflings.png
    │   │       │   ├── plot_manifold_sphere_1.png
    │   │       │   ├── scikit-learn-logo-small.png
    │   │       │   ├── scikit-learn-logo-notext.png
    │   │       │   ├── glyphicons-halflings-white.png
    │   │       │   └── plot_classifier_comparison_1.png
    │   │       └── js
    │   │       │   └── extra.js
    │   │   └── theme.conf
    ├── sphinxext
    │   └── MANIFEST.in
    ├── images
    │   ├── iris.pdf
    │   ├── dysco.png
    │   ├── ml_map.png
    │   ├── cds-logo.png
    │   ├── no_image.png
    │   ├── rbm_graph.png
    │   ├── inria-logo.jpg
    │   ├── last_digit.png
    │   ├── sloan_banner.png
    │   ├── lda_model_graph.png
    │   ├── nyu_short_color.png
    │   ├── plot_face_recognition_1.png
    │   ├── plot_face_recognition_2.png
    │   ├── scikit-learn-logo-notext.png
    │   ├── plot_digits_classification.png
    │   └── multilayerperceptron_network.png
    ├── logos
    │   ├── favicon.ico
    │   ├── identity.pdf
    │   ├── scikit-learn-logo.bmp
    │   ├── scikit-learn-logo.png
    │   ├── scikit-learn-logo-small.png
    │   ├── scikit-learn-logo-thumb.png
    │   └── scikit-learn-logo-notext.png
    ├── modules
    │   ├── glm_data
    │   │   └── lasso_enet_coordinate_descent.png
    │   ├── pipeline.rst
    │   ├── isotonic.rst
    │   └── cross_decomposition.rst
    ├── tutorial
    │   ├── common_includes
    │   │   └── info.txt
    │   ├── text_analytics
    │   │   ├── .gitignore
    │   │   ├── data
    │   │   │   ├── movie_reviews
    │   │   │   │   └── fetch_data.py
    │   │   │   └── twenty_newsgroups
    │   │   │   │   └── fetch_data.py
    │   │   └── solutions
    │   │   │   └── generate_skeletons.py
    │   ├── index.rst
    │   └── statistical_inference
    │   │   ├── finding_help.rst
    │   │   └── index.rst
    ├── templates
    │   ├── generate_deprecated.sh
    │   ├── function.rst
    │   ├── class_without_init.rst
    │   ├── numpydoc_docstring.rst
    │   ├── class.rst
    │   ├── deprecated_function.rst
    │   ├── deprecated_class_without_init.rst
    │   ├── class_with_call.rst
    │   ├── deprecated_class.rst
    │   └── deprecated_class_with_call.rst
    ├── README.md
    ├── developers
    │   └── index.rst
    ├── model_selection.rst
    ├── unsupervised_learning.rst
    ├── user_guide.rst
    ├── preface.rst
    ├── supervised_learning.rst
    ├── includes
    │   ├── big_toc_css.rst
    │   └── bigger_toc_css.rst
    ├── whats_new.rst
    └── data_transforms.rst
├── .gitattributes
├── benchmarks
    ├── .gitignore
    ├── plot_tsne_mnist.py
    ├── bench_plot_ward.py
    ├── bench_plot_parallel_pairwise.py
    └── bench_glm.py
├── examples
    ├── exercises
    │   ├── README.txt
    │   ├── plot_digits_classification_exercise.py
    │   └── plot_cv_digits.py
    ├── cluster
    │   └── README.txt
    ├── tree
    │   ├── README.txt
    │   └── plot_tree_regression.py
    ├── ensemble
    │   ├── README.txt
    │   ├── plot_forest_importances_faces.py
    │   └── plot_adaboost_regression.py
    ├── svm
    │   ├── README.txt
    │   ├── plot_svm_nonlinear.py
    │   ├── plot_separating_hyperplane.py
    │   ├── plot_svm_regression.py
    │   └── plot_custom_kernel.py
    ├── bicluster
    │   └── README.txt
    ├── datasets
    │   ├── README.txt
    │   └── plot_digits_last_image.py
    ├── classification
    │   └── README.txt
    ├── mixture
    │   ├── README.txt
    │   └── plot_gmm_pdf.py
    ├── neighbors
    │   ├── README.txt
    │   └── plot_regression.py
    ├── preprocessing
    │   └── README.txt
    ├── covariance
    │   └── README.txt
    ├── decomposition
    │   ├── README.txt
    │   ├── plot_beta_divergence.py
    │   └── plot_pca_iris.py
    ├── manifold
    │   ├── README.txt
    │   └── plot_swissroll.py
    ├── multioutput
    │   └── README.txt
    ├── linear_model
    │   ├── README.txt
    │   ├── plot_lasso_lars.py
    │   ├── plot_sgd_loss_functions.py
    │   ├── plot_sgd_separating_hyperplane.py
    │   ├── plot_sgd_penalties.py
    │   └── plot_sgd_weighted_samples.py
    ├── neural_networks
    │   └── README.txt
    ├── model_selection
    │   ├── README.txt
    │   └── plot_cv_predict.py
    ├── text
    │   └── README.txt
    ├── calibration
    │   └── README.txt
    ├── feature_selection
    │   ├── README.txt
    │   ├── plot_rfe_digits.py
    │   ├── plot_feature_selection_pipeline.py
    │   ├── plot_rfe_with_cross_validation.py
    │   └── plot_select_from_model_boston.py
    ├── .flake8
    ├── README.txt
    ├── cross_decomposition
    │   └── README.txt
    ├── semi_supervised
    │   └── README.txt
    ├── gaussian_process
    │   └── README.txt
    ├── applications
    │   └── README.txt
    └── compose
    │   └── README.txt
├── .landscape.yml
├── .coveragerc
├── MANIFEST.in
├── site.cfg
├── setup.cfg
├── .codecov.yml
├── .gitignore
├── PULL_REQUEST_TEMPLATE.md
├── Makefile
└── COPYING


/sklearn/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/doc/testimonials/images/Makefile:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/cluster/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/compose/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/metrics/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/mixture/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/src/cblas/atlas_type.h:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/svm/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/tree/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/covariance/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/ensemble/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/linear_model/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/manifold/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/neighbors/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/src/cblas/atlas_dsysinfo.h:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/src/cblas/atlas_ssysinfo.h:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/decomposition/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/feature_selection/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/gaussian_process/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/metrics/cluster/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/model_selection/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/neural_network/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/preprocessing/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/semi_supervised/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/css/examples.css:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/cross_decomposition/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/externals/joblib/externals/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn/feature_extraction/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | /doc/whats_new.rst merge=union
2 | 


--------------------------------------------------------------------------------
/sklearn/utils/lgamma.pxd:
--------------------------------------------------------------------------------
1 | cdef double lgamma(double x)
2 | 


--------------------------------------------------------------------------------
/sklearn/__check_build/_check_build.pyx:
--------------------------------------------------------------------------------
1 | def check_build():
2 |     return


--------------------------------------------------------------------------------
/benchmarks/.gitignore:
--------------------------------------------------------------------------------
1 | /bhtsne
2 | *.npy
3 | *.json
4 | /mnist_tsne_output/
5 | 


--------------------------------------------------------------------------------
/doc/sphinxext/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include tests *.py
2 | include *.txt
3 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/svmlight_invalid_order.txt:
--------------------------------------------------------------------------------
1 | -1 5:2.5 2:-5.2 15:1.5
2 | 


--------------------------------------------------------------------------------
/sklearn/externals/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | """
3 | External, bundled dependencies.
4 | 
5 | """
6 | 


--------------------------------------------------------------------------------
/doc/images/iris.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/iris.pdf


--------------------------------------------------------------------------------
/doc/images/dysco.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/dysco.png


--------------------------------------------------------------------------------
/doc/images/ml_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/ml_map.png


--------------------------------------------------------------------------------
/doc/logos/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/logos/favicon.ico


--------------------------------------------------------------------------------
/doc/logos/identity.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/logos/identity.pdf


--------------------------------------------------------------------------------
/sklearn/cross_decomposition/__init__.py:
--------------------------------------------------------------------------------
1 | from .pls_ import *  # noqa
2 | from .cca_ import *  # noqa
3 | 


--------------------------------------------------------------------------------
/doc/images/cds-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/cds-logo.png


--------------------------------------------------------------------------------
/doc/images/no_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/no_image.png


--------------------------------------------------------------------------------
/doc/images/rbm_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/rbm_graph.png


--------------------------------------------------------------------------------
/doc/images/inria-logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/inria-logo.jpg


--------------------------------------------------------------------------------
/doc/images/last_digit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/last_digit.png


--------------------------------------------------------------------------------
/doc/images/sloan_banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/sloan_banner.png


--------------------------------------------------------------------------------
/examples/exercises/README.txt:
--------------------------------------------------------------------------------
1 | Tutorial exercises
2 | ------------------
3 | 
4 | Exercises for the tutorials
5 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/svmlight_invalid.txt:
--------------------------------------------------------------------------------
1 | python 2:2.5 10:-5.2 15:1.5
2 | 2.0 5:1.0 12:-3
3 | 3.0 20:27
4 | 


--------------------------------------------------------------------------------
/.landscape.yml:
--------------------------------------------------------------------------------
1 | pylint:
2 |   disable:
3 |     - unpacking-non-sequence
4 | ignore-paths:
5 |     - sklearn/externals
6 | 


--------------------------------------------------------------------------------
/doc/images/lda_model_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/lda_model_graph.png


--------------------------------------------------------------------------------
/doc/images/nyu_short_color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/nyu_short_color.png


--------------------------------------------------------------------------------
/doc/logos/scikit-learn-logo.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/logos/scikit-learn-logo.bmp


--------------------------------------------------------------------------------
/doc/logos/scikit-learn-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/logos/scikit-learn-logo.png


--------------------------------------------------------------------------------
/doc/testimonials/images/mars.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/mars.png


--------------------------------------------------------------------------------
/doc/testimonials/images/yhat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/yhat.png


--------------------------------------------------------------------------------
/doc/testimonials/images/zopa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/zopa.png


--------------------------------------------------------------------------------
/doc/testimonials/images/aweber.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/aweber.png


--------------------------------------------------------------------------------
/doc/testimonials/images/inria.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/inria.png


--------------------------------------------------------------------------------
/doc/testimonials/images/lovely.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/lovely.png


--------------------------------------------------------------------------------
/sklearn/datasets/images/china.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/images/china.jpg


--------------------------------------------------------------------------------
/sklearn/datasets/images/flower.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/images/flower.jpg


--------------------------------------------------------------------------------
/doc/logos/scikit-learn-logo-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/logos/scikit-learn-logo-small.png


--------------------------------------------------------------------------------
/doc/logos/scikit-learn-logo-thumb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/logos/scikit-learn-logo-thumb.png


--------------------------------------------------------------------------------
/doc/testimonials/images/betaworks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/betaworks.png


--------------------------------------------------------------------------------
/doc/testimonials/images/birchbox.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/birchbox.jpg


--------------------------------------------------------------------------------
/doc/testimonials/images/booking.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/booking.png


--------------------------------------------------------------------------------
/doc/testimonials/images/datarobot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/datarobot.png


--------------------------------------------------------------------------------
/doc/testimonials/images/evernote.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/evernote.png


--------------------------------------------------------------------------------
/doc/testimonials/images/infonea.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/infonea.jpg


--------------------------------------------------------------------------------
/doc/testimonials/images/okcupid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/okcupid.png


--------------------------------------------------------------------------------
/doc/testimonials/images/peerindex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/peerindex.png


--------------------------------------------------------------------------------
/doc/testimonials/images/phimeca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/phimeca.png


--------------------------------------------------------------------------------
/doc/testimonials/images/rangespan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/rangespan.png


--------------------------------------------------------------------------------
/doc/testimonials/images/spotify.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/spotify.png


--------------------------------------------------------------------------------
/sklearn/datasets/data/digits.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/data/digits.csv.gz


--------------------------------------------------------------------------------
/doc/images/plot_face_recognition_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/plot_face_recognition_1.png


--------------------------------------------------------------------------------
/doc/images/plot_face_recognition_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/plot_face_recognition_2.png


--------------------------------------------------------------------------------
/doc/images/scikit-learn-logo-notext.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/scikit-learn-logo-notext.png


--------------------------------------------------------------------------------
/doc/logos/scikit-learn-logo-notext.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/logos/scikit-learn-logo-notext.png


--------------------------------------------------------------------------------
/doc/testimonials/images/change-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/change-logo.png


--------------------------------------------------------------------------------
/doc/testimonials/images/datapublica.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/datapublica.png


--------------------------------------------------------------------------------
/doc/testimonials/images/howaboutwe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/howaboutwe.png


--------------------------------------------------------------------------------
/doc/testimonials/images/huggingface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/huggingface.png


--------------------------------------------------------------------------------
/doc/testimonials/images/machinalis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/machinalis.png


--------------------------------------------------------------------------------
/doc/testimonials/images/solido_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/solido_logo.png


--------------------------------------------------------------------------------
/doc/images/plot_digits_classification.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/plot_digits_classification.png


--------------------------------------------------------------------------------
/doc/testimonials/images/dataiku_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/dataiku_logo.png


--------------------------------------------------------------------------------
/doc/testimonials/images/ottogroup_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/ottogroup_logo.png


--------------------------------------------------------------------------------
/sklearn/datasets/data/diabetes_data.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/data/diabetes_data.csv.gz


--------------------------------------------------------------------------------
/doc/images/multilayerperceptron_network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/images/multilayerperceptron_network.png


--------------------------------------------------------------------------------
/doc/testimonials/images/bestofmedia-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/bestofmedia-logo.png


--------------------------------------------------------------------------------
/doc/testimonials/images/telecomparistech.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/testimonials/images/telecomparistech.jpg


--------------------------------------------------------------------------------
/sklearn/datasets/data/diabetes_target.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/data/diabetes_target.csv.gz


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/columbia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/columbia.png


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/forkme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/forkme.png


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/google.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/google.png


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/telecom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/telecom.png


--------------------------------------------------------------------------------
/examples/cluster/README.txt:
--------------------------------------------------------------------------------
1 | .. _cluster_examples:
2 | 
3 | Clustering
4 | ----------
5 | 
6 | Examples concerning the :mod:`sklearn.cluster` module.
7 | 


--------------------------------------------------------------------------------
/examples/tree/README.txt:
--------------------------------------------------------------------------------
1 | .. _tree_examples:
2 | 
3 | Decision Trees
4 | --------------
5 | 
6 | Examples concerning the :mod:`sklearn.tree` module.
7 | 


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/FNRS-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/FNRS-logo.png


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/digicosme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/digicosme.png


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/sloan_logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/sloan_logo.jpg


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/inria-small.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/inria-small.jpg


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/inria-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/inria-small.png


--------------------------------------------------------------------------------
/sklearn/utils/src/gamma.h:
--------------------------------------------------------------------------------
1 | #ifndef GAMMA_H
2 | #define GAMMA_H
3 | 
4 | //double sklearn_gamma(double);
5 | double sklearn_lgamma(double);
6 | 
7 | #endif
8 | 
9 | 


--------------------------------------------------------------------------------
/doc/modules/glm_data/lasso_enet_coordinate_descent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/modules/glm_data/lasso_enet_coordinate_descent.png


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/nyu_short_color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/nyu_short_color.png


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/sydney-primary.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/sydney-primary.jpeg


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/sydney-stacked.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/sydney-stacked.jpeg


--------------------------------------------------------------------------------
/examples/ensemble/README.txt:
--------------------------------------------------------------------------------
1 | .. _ensemble_examples:
2 | 
3 | Ensemble methods
4 | ----------------
5 | 
6 | Examples concerning the :mod:`sklearn.ensemble` module.
7 | 


--------------------------------------------------------------------------------
/examples/svm/README.txt:
--------------------------------------------------------------------------------
1 | .. _svm_examples:
2 | 
3 | Support Vector Machines
4 | -----------------------
5 | 
6 | Examples concerning the :mod:`sklearn.svm` module.
7 | 


--------------------------------------------------------------------------------
/sklearn/externals/joblib/externals/cloudpickle/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | from .cloudpickle import *
4 | 
5 | __version__ = '0.5.6'
6 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | source = sklearn
4 | include = */sklearn/*
5 | omit =
6 |     */sklearn/externals/*
7 |     */benchmarks/*
8 |     */setup.py
9 | 


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/scikit-learn-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/scikit-learn-logo.png


--------------------------------------------------------------------------------
/examples/bicluster/README.txt:
--------------------------------------------------------------------------------
1 | .. _bicluster_examples:
2 | 
3 | Biclustering
4 | ------------
5 | 
6 | Examples concerning the :mod:`sklearn.cluster.bicluster` module.
7 | 


--------------------------------------------------------------------------------
/examples/datasets/README.txt:
--------------------------------------------------------------------------------
1 | .. _dataset_examples:
2 | 
3 | Dataset examples
4 | -----------------------
5 | 
6 | Examples concerning the :mod:`sklearn.datasets` module.
7 | 


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/glyphicons-halflings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/glyphicons-halflings.png


--------------------------------------------------------------------------------
/examples/classification/README.txt:
--------------------------------------------------------------------------------
1 | .. _classification_examples:
2 | 
3 | Classification
4 | -----------------------
5 | 
6 | General examples about classification algorithms.
7 | 


--------------------------------------------------------------------------------
/examples/mixture/README.txt:
--------------------------------------------------------------------------------
1 | .. _mixture_examples:
2 | 
3 | Gaussian Mixture Models
4 | -----------------------
5 | 
6 | Examples concerning the :mod:`sklearn.mixture` module.
7 | 


--------------------------------------------------------------------------------
/examples/neighbors/README.txt:
--------------------------------------------------------------------------------
1 | .. _neighbors_examples:
2 | 
3 | Nearest Neighbors
4 | -----------------------
5 | 
6 | Examples concerning the :mod:`sklearn.neighbors` module.
7 | 


--------------------------------------------------------------------------------
/examples/preprocessing/README.txt:
--------------------------------------------------------------------------------
1 | .. _preprocessing_examples:
2 | 
3 | Preprocessing
4 | -------------
5 | 
6 | Examples concerning the :mod:`sklearn.preprocessing` module.
7 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/svmlight_multilabel.txt:
--------------------------------------------------------------------------------
1 | # multilabel dataset in SVMlight format
2 | 1,0 2:2.5   10:-5.2 15:1.5
3 | 2 5:1.0 12:-3 
4 |  2:3.5 11:26
5 | 1,2 20:27
6 | 


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/plot_manifold_sphere_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/plot_manifold_sphere_1.png


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/scikit-learn-logo-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/scikit-learn-logo-small.png


--------------------------------------------------------------------------------
/examples/covariance/README.txt:
--------------------------------------------------------------------------------
1 | .. _covariance_examples:
2 | 
3 | Covariance estimation
4 | ---------------------
5 | 
6 | Examples concerning the :mod:`sklearn.covariance` module.
7 | 


--------------------------------------------------------------------------------
/examples/decomposition/README.txt:
--------------------------------------------------------------------------------
1 | .. _decomposition_examples:
2 | 
3 | Decomposition
4 | -------------
5 | 
6 | Examples concerning the :mod:`sklearn.decomposition` module.
7 | 
8 | 


--------------------------------------------------------------------------------
/examples/manifold/README.txt:
--------------------------------------------------------------------------------
1 | .. _manifold_examples:
2 | 
3 | Manifold learning
4 | -----------------------
5 | 
6 | Examples concerning the :mod:`sklearn.manifold` module.
7 | 
8 | 


--------------------------------------------------------------------------------
/examples/multioutput/README.txt:
--------------------------------------------------------------------------------
1 | .. _multioutput_examples:
2 | 
3 | Multioutput methods
4 | -------------------
5 | 
6 | Examples concerning the :mod:`sklearn.multioutput` module.
7 | 


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/scikit-learn-logo-notext.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/scikit-learn-logo-notext.png


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/2/api-v1-json-data-2.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/2/api-v1-json-data-2.json.gz


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/glyphicons-halflings-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/glyphicons-halflings-white.png


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/img/plot_classifier_comparison_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/doc/themes/scikit-learn/static/img/plot_classifier_comparison_1.png


--------------------------------------------------------------------------------
/examples/linear_model/README.txt:
--------------------------------------------------------------------------------
1 | .. _linear_examples:
2 | 
3 | Generalized Linear Models
4 | -------------------------
5 | 
6 | Examples concerning the :mod:`sklearn.linear_model` module.
7 | 


--------------------------------------------------------------------------------
/examples/neural_networks/README.txt:
--------------------------------------------------------------------------------
1 | .. _neural_network_examples:
2 | 
3 | Neural Networks
4 | -----------------------
5 | 
6 | Examples concerning the :mod:`sklearn.neural_network` module.
7 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/292/api-v1-json-data-292.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/api-v1-json-data-292.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/561/api-v1-json-data-561.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/561/api-v1-json-data-561.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/61/api-v1-json-data-61.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/61/api-v1-json-data-61.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/61/data-v1-download-61.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/61/data-v1-download-61.arff.gz


--------------------------------------------------------------------------------
/examples/model_selection/README.txt:
--------------------------------------------------------------------------------
1 | .. _model_selection_examples:
2 | 
3 | Model Selection
4 | -----------------------
5 | 
6 | Examples related to the :mod:`sklearn.model_selection` module.
7 | 


--------------------------------------------------------------------------------
/examples/text/README.txt:
--------------------------------------------------------------------------------
1 | .. _text_examples:
2 | 
3 | Working with text documents
4 | ----------------------------
5 | 
6 | Examples concerning the :mod:`sklearn.feature_extraction.text` module.
7 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-1119.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-1119.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/2/data-v1-download-1666876.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/2/data-v1-download-1666876.arff.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/292/api-v1-json-data-40981.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/api-v1-json-data-40981.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/292/data-v1-download-49822.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/data-v1-download-49822.arff.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/561/data-v1-download-52739.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/561/data-v1-download-52739.arff.gz


--------------------------------------------------------------------------------
/examples/calibration/README.txt:
--------------------------------------------------------------------------------
1 | .. _calibration_examples:
2 | 
3 | Calibration
4 | -----------------------
5 | 
6 | Examples illustrating the calibration of predicted probabilities of classifiers.
7 | 


--------------------------------------------------------------------------------
/examples/feature_selection/README.txt:
--------------------------------------------------------------------------------
1 | .. _feature_selection_examples:
2 | 
3 | Feature Selection
4 | -----------------------
5 | 
6 | Examples concerning the :mod:`sklearn.feature_selection` module.
7 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/1119/data-v1-download-54002.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/1119/data-v1-download-54002.arff.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/2/api-v1-json-data-features-2.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/2/api-v1-json-data-features-2.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-40589.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-40589.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-40675.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-40675.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40945/api-v1-json-data-40945.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40945/api-v1-json-data-40945.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-40966.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-40966.json.gz


--------------------------------------------------------------------------------
/examples/.flake8:
--------------------------------------------------------------------------------
1 | # Examples specific flake8 configuration
2 | 
3 | [flake8]
4 | # Same ignore as project-wide plus E402 (imports not at top of file)
5 | ignore=E121,E123,E126,E24,E226,E704,W503,W504,E402
6 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40589/data-v1-download-4644182.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40589/data-v1-download-4644182.arff.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40675/data-v1-download-4965250.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40675/data-v1-download-4965250.arff.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40966/data-v1-download-17928620.arff.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40966/data-v1-download-17928620.arff.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/61/api-v1-json-data-features-61.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/61/api-v1-json-data-features-61.json.gz


--------------------------------------------------------------------------------
/doc/tutorial/common_includes/info.txt:
--------------------------------------------------------------------------------
1 | Meant to share common RST file snippets that we want to reuse by inclusion 
2 | in the real tutorial in order to lower the maintenance burden 
3 | of redundant sections.
4 | 


--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
 1 | .. _general_examples:
 2 | 
 3 | Examples
 4 | ========
 5 | 
 6 | Miscellaneous examples
 7 | ----------------------
 8 | 
 9 | Miscellaneous and introductory examples for scikit-learn.
10 | 


--------------------------------------------------------------------------------
/examples/cross_decomposition/README.txt:
--------------------------------------------------------------------------------
1 | .. _cross_decomposition_examples:
2 | 
3 | Cross decomposition
4 | -------------------
5 | 
6 | Examples concerning the :mod:`sklearn.cross_decomposition` module.
7 | 
8 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/292/api-v1-json-data-features-292.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/api-v1-json-data-features-292.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/561/api-v1-json-data-features-561.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/561/api-v1-json-data-features-561.json.gz


--------------------------------------------------------------------------------
/examples/semi_supervised/README.txt:
--------------------------------------------------------------------------------
1 | .. _semi_supervised_examples:
2 | 
3 | Semi Supervised Classification
4 | ------------------------------
5 | 
6 | Examples concerning the :mod:`sklearn.semi_supervised` module.
7 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-features-1119.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-features-1119.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/292/api-v1-json-data-features-40981.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/api-v1-json-data-features-40981.json.gz


--------------------------------------------------------------------------------
/doc/templates/generate_deprecated.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | for f in [^d]*; do (head -n2 < $f; echo '
3 | .. meta::
4 |    :robots: noindex
5 | 
6 | .. warning::
7 |    **DEPRECATED**
8 | '; tail -n+3 $f) > deprecated_$f; done
9 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-features-40589.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-features-40589.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-features-40675.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-features-40675.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40945/api-v1-json-data-features-40945.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40945/api-v1-json-data-features-40945.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-features-40966.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-features-40966.json.gz


--------------------------------------------------------------------------------
/examples/gaussian_process/README.txt:
--------------------------------------------------------------------------------
1 | .. _gaussian_process_examples:
2 | 
3 | Gaussian Process for Machine Learning
4 | -------------------------------------
5 | 
6 | Examples concerning the :mod:`sklearn.gaussian_process` module.
7 | 
8 | 


--------------------------------------------------------------------------------
/sklearn/svm/src/libsvm/libsvm_template.cpp:
--------------------------------------------------------------------------------
1 | 
2 | /* this is a hack to generate libsvm with both sparse and dense
3 |    methods in the same binary*/
4 | 
5 | #define _DENSE_REP
6 | #include "svm.cpp"
7 | #undef _DENSE_REP
8 | #include "svm.cpp"
9 | 


--------------------------------------------------------------------------------
/examples/applications/README.txt:
--------------------------------------------------------------------------------
1 | .. _realworld_examples:
2 | 
3 | Examples based on real world datasets
4 | -------------------------------------
5 | 
6 | Applications to real world problems with some medium sized datasets or
7 | interactive user interface.
8 | 


--------------------------------------------------------------------------------
/examples/compose/README.txt:
--------------------------------------------------------------------------------
1 | .. _compose_examples:
2 | 
3 | Pipelines and composite estimators
4 | ----------------------------------
5 | 
6 | Examples of how to compose transformers and pipelines from other estimators. See the :ref:`User Guide <combining_estimators>`.
7 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/2/api-v1-json-data-list-data_name-anneal-limit-2-data_version-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/2/api-v1-json-data-list-data_name-anneal-limit-2-data_version-1.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/2/api-v1-json-data-list-data_name-anneal-limit-2-status-active-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/2/api-v1-json-data-list-data_name-anneal-limit-2-status-active-.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/561/api-v1-json-data-list-data_name-cpu-limit-2-data_version-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/561/api-v1-json-data-list-data_name-cpu-limit-2-data_version-1.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/561/api-v1-json-data-list-data_name-cpu-limit-2-status-active-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/561/api-v1-json-data-list-data_name-cpu-limit-2-status-active-.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/61/api-v1-json-data-list-data_name-iris-limit-2-data_version-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/61/api-v1-json-data-list-data_name-iris-limit-2-data_version-1.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/61/api-v1-json-data-list-data_name-iris-limit-2-status-active-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/61/api-v1-json-data-list-data_name-iris-limit-2-status-active-.json.gz


--------------------------------------------------------------------------------
/sklearn/utils/lgamma.pyx:
--------------------------------------------------------------------------------
1 | cdef extern from "src/gamma.h":
2 |     cdef double sklearn_lgamma(double x)
3 | 
4 | 
5 | cdef double lgamma(double x):
6 |     if x <= 0:
7 |         raise ValueError("x must be strictly positive, got %f" % x)
8 |     return sklearn_lgamma(x)
9 | 


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/theme.conf:
--------------------------------------------------------------------------------
 1 | [theme]
 2 | inherit = basic
 3 | stylesheet = nature.css
 4 | pygments_style = tango
 5 | 
 6 | [options]
 7 | oldversion = False
 8 | collapsiblesidebar = True
 9 | google_analytics = True
10 | surveybanner = False
11 | sprintbanner = True
12 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-data_version-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-data_version-1.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-status-active-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-status-active-.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-list-data_name-emotions-limit-2-data_version-3.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-list-data_name-emotions-limit-2-data_version-3.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-list-data_name-emotions-limit-2-status-active-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40589/api-v1-json-data-list-data_name-emotions-limit-2-status-active-.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-data_version-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-data_version-1.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-status-active-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-status-active-.json.gz


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.rst
2 | recursive-include doc *
3 | recursive-include examples *
4 | recursive-include sklearn *.c *.h *.pyx *.pxd *.pxi
5 | recursive-include sklearn/datasets *.csv *.csv.gz *.rst *.jpg *.txt *.arff.gz *.json.gz
6 | include COPYING
7 | include README.rst
8 | 
9 | 


--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
1 | # Documentation for scikit-learn
2 | 
3 | This directory contains the full manual and web site as displayed at
4 | http://scikit-learn.org. See
5 | http://scikit-learn.org/dev/developers/contributing.html#documentation for
6 | detailed information about the documentation. 
7 | 


--------------------------------------------------------------------------------
/doc/modules/pipeline.rst:
--------------------------------------------------------------------------------
 1 | :orphan:
 2 | 
 3 | .. raw:: html
 4 | 
 5 |     <meta http-equiv="refresh" content="1; url=./compose.html" />
 6 |     <script>
 7 |       window.location.href = "./compose.html";
 8 |     </script>
 9 | 
10 | This content is now at :ref:`combining_estimators`.
11 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-data_version-1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-data_version-1.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-status-active-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-status-active-.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-list-data_name-miceprotein-limit-2-data_version-4.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-list-data_name-miceprotein-limit-2-data_version-4.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-list-data_name-miceprotein-limit-2-status-active-.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40966/api-v1-json-data-list-data_name-miceprotein-limit-2-status-active-.json.gz


--------------------------------------------------------------------------------
/doc/testimonials/README.txt:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | To find the list of people we contacted, see:
4 | https://docs.google.com/spreadsheet/ccc?key=0AhGnAxuBDhjmdDYwNzlZVE5SMkFsMjNBbGlaWkpNZ1E&usp=sharing
5 | 
6 | To obtain access to this file, send an email to:
7 | nelle dot varoquaux at gmail dot com
8 | 
9 | 


--------------------------------------------------------------------------------
/doc/templates/function.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}====================
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | .. autofunction:: {{ objname }}
 7 | 
 8 | .. include:: {{module}}.{{objname}}.examples
 9 | 
10 | .. raw:: html
11 | 
12 |     <div class="clearer"></div>
13 | 


--------------------------------------------------------------------------------
/doc/templates/class_without_init.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}==============
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | .. autoclass:: {{ objname }}
 7 | 
 8 | .. include:: {{module}}.{{objname}}.examples
 9 | 
10 | .. raw:: html
11 | 
12 |     <div class="clearer"></div>
13 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-data_version-1-status-deactivated.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/40675/api-v1-json-data-list-data_name-glass2-limit-2-data_version-1-status-deactivated.json.gz


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-data_version-1-status-deactivated.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmcinnes/scikit-learn/master/sklearn/datasets/tests/data/openml/292/api-v1-json-data-list-data_name-australian-limit-2-data_version-1-status-deactivated.json.gz


--------------------------------------------------------------------------------
/site.cfg:
--------------------------------------------------------------------------------
1 | 
2 | # Uncomment to link against the MKL library on windows
3 | # [mkl]
4 | # include_dirs=C:\Program Files\Intel\MKL\10.2.5.035\include
5 | # library_dirs=C:\Program Files\Intel\MKL\10.2.5.035\ia32\lib
6 | # mkl_libs=mkl_core, mkl_intel_c, mkl_intel_s, libguide, libguide40, mkl_blacs_dll, mkl_intel_sequential
7 | 


--------------------------------------------------------------------------------
/doc/templates/numpydoc_docstring.rst:
--------------------------------------------------------------------------------
 1 | {{index}}
 2 | {{summary}}
 3 | {{extended_summary}}
 4 | {{parameters}}
 5 | {{returns}}
 6 | {{yields}}
 7 | {{other_parameters}}
 8 | {{attributes}}
 9 | {{raises}}
10 | {{warns}}
11 | {{warnings}}
12 | {{see_also}}
13 | {{notes}}
14 | {{references}}
15 | {{examples}}
16 | {{methods}}
17 | 


--------------------------------------------------------------------------------
/sklearn/mixture/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.mixture` module implements mixture modeling algorithms.
 3 | """
 4 | 
 5 | from .gaussian_mixture import GaussianMixture
 6 | from .bayesian_mixture import BayesianGaussianMixture
 7 | 
 8 | 
 9 | __all__ = ['GaussianMixture',
10 |            'BayesianGaussianMixture']
11 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/data/svmlight_classification.txt:
--------------------------------------------------------------------------------
 1 | # comment
 2 | # note: the next line contains a tab
 3 | 1.0 3:2.5 	   11:-5.2 16:1.5 # and an inline comment
 4 | 2.0 6:1.0 13:-3 
 5 | # another comment
 6 | 3.0 21:27
 7 | 4.0 2:1.234567890123456e10 # double precision value
 8 | 1.0     # empty line, all zeros
 9 | 2.0 3:0 # explicit zeros
10 | 


--------------------------------------------------------------------------------
/sklearn/svm/src/libsvm/LIBSVM_CHANGES:
--------------------------------------------------------------------------------
1 | Changes to Libsvm
2 | 
3 | This is here mainly as checklist for incorporation of new versions of libsvm.
4 | 
5 |   * Add copyright to files svm.cpp and svm.h
6 |   * Add random_seed support and call to srand in fit function
7 | 
8 | The changes made with respect to upstream are detailed in the heading of svm.cpp
9 | 


--------------------------------------------------------------------------------
/sklearn/datasets/data/linnerud_exercise.csv:
--------------------------------------------------------------------------------
 1 | Chins Situps Jumps
 2 | 5 162 60
 3 | 2 110 60
 4 | 12 101 101
 5 | 12 105 37
 6 | 13 155 58
 7 | 4 101 42
 8 | 8 101 38
 9 | 6 125 40
10 | 15 200 40
11 | 17 251 250
12 | 17 120 38
13 | 13 210 115
14 | 14 215 105
15 | 1 50 50
16 | 6 70 31
17 | 12 210 120
18 | 4 60 25
19 | 11 230 80
20 | 15 225 73
21 | 2 110 43
22 | 


--------------------------------------------------------------------------------
/sklearn/externals/conftest.py:
--------------------------------------------------------------------------------
1 | # Do not collect any tests in externals. This is more robust than using
2 | # --ignore because --ignore needs a path and it is not convenient to pass in
3 | # the externals path (very long install-dependent path in site-packages) when
4 | # using --pyargs
5 | def pytest_ignore_collect(path, config):
6 |     return True
7 | 
8 | 


--------------------------------------------------------------------------------
/doc/developers/index.rst:
--------------------------------------------------------------------------------
 1 | .. _developers_guide:
 2 | 
 3 | =================
 4 | Developer's Guide
 5 | =================
 6 | 
 7 | .. include:: ../includes/big_toc_css.rst
 8 | .. include:: ../tune_toc.rst
 9 | 
10 | .. toctree::
11 | 
12 |    contributing
13 |    tips
14 |    utilities
15 |    performance
16 |    advanced_installation
17 |    maintainer
18 | 


--------------------------------------------------------------------------------
/sklearn/linear_model/sgd_fast_helpers.h:
--------------------------------------------------------------------------------
 1 | // We cannot directly reuse the npy_isfinite from npy_math.h as numpy
 2 | // and scikit-learn are not necessarily built with the same compiler.
 3 | #ifdef _MSC_VER
 4 | # include <float.h>
 5 | # define skl_isfinite _finite
 6 | #else
 7 | # include <numpy/npy_math.h>
 8 | # define skl_isfinite npy_isfinite
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/doc/model_selection.rst:
--------------------------------------------------------------------------------
 1 | .. include:: includes/big_toc_css.rst
 2 | 
 3 | .. _model_selection:
 4 | 
 5 | Model selection and evaluation
 6 | ------------------------------
 7 | 
 8 | .. toctree::
 9 | 
10 |     modules/cross_validation
11 |     modules/grid_search
12 |     modules/model_evaluation
13 |     modules/model_persistence
14 |     modules/learning_curve
15 | 


--------------------------------------------------------------------------------
/sklearn/datasets/data/linnerud_physiological.csv:
--------------------------------------------------------------------------------
 1 | Weight Waist Pulse
 2 | 191 36 50
 3 | 189 37 52
 4 | 193 38 58
 5 | 162 35 62
 6 | 189 35 46
 7 | 182 36 56
 8 | 211 38 56
 9 | 167 34 60
10 | 176 31 74
11 | 154 33 56
12 | 169 34 50
13 | 166 33 52
14 | 154 34 64
15 | 247 46 50
16 | 193 36 46
17 | 202 37 62
18 | 176 37 54
19 | 157 32 52
20 | 156 33 54
21 | 138 33 68
22 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/test_common.py:
--------------------------------------------------------------------------------
 1 | """Test loaders for common functionality.
 2 | """
 3 | 
 4 | 
 5 | def check_return_X_y(bunch, fetch_func_partial):
 6 |     X_y_tuple = fetch_func_partial(return_X_y=True)
 7 |     assert(isinstance(X_y_tuple, tuple))
 8 |     assert(X_y_tuple[0].shape == bunch.data.shape)
 9 |     assert(X_y_tuple[1].shape == bunch.target.shape)
10 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/test_bench.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import datetime
 3 | 
 4 | from sklearn.utils.bench import total_seconds
 5 | from sklearn.utils.testing import assert_equal
 6 | 
 7 | 
 8 | def test_total_seconds():
 9 |     delta = (datetime.datetime(2012, 1, 1, 5, 5, 1)
10 |              - datetime.datetime(2012, 1, 1, 5, 5, 4))
11 |     assert_equal(86397, total_seconds(delta))
12 | 


--------------------------------------------------------------------------------
/doc/templates/class.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}==============
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | .. autoclass:: {{ objname }}
 7 | 
 8 |    {% block methods %}
 9 |    .. automethod:: __init__
10 |    {% endblock %}
11 | 
12 | .. include:: {{module}}.{{objname}}.examples
13 | 
14 | .. raw:: html
15 | 
16 |     <div class="clearer"></div>
17 | 


--------------------------------------------------------------------------------
/sklearn/tests/test_check_build.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Smoke Test the check_build module
 3 | """
 4 | 
 5 | # Author: G Varoquaux
 6 | # License: BSD 3 clause
 7 | 
 8 | from sklearn.__check_build import raise_build_error
 9 | 
10 | from sklearn.utils.testing import assert_raises
11 | 
12 | 
13 | def test_raise_build_error():
14 |     assert_raises(ImportError, raise_build_error, ImportError())
15 | 


--------------------------------------------------------------------------------
/sklearn/utils/_random.pxd:
--------------------------------------------------------------------------------
 1 | # Authors: Arnaud Joly
 2 | #
 3 | # License: BSD 3 clause
 4 | 
 5 | 
 6 | import numpy as np
 7 | cimport numpy as np
 8 | 
 9 | 
10 | cpdef sample_without_replacement(np.int_t n_population,
11 |                                  np.int_t n_samples,
12 |                                  method=*,
13 |                                  random_state=*)
14 | 
15 | 


--------------------------------------------------------------------------------
/doc/templates/deprecated_function.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}====================
 3 | 
 4 | .. meta::
 5 |    :robots: noindex
 6 | 
 7 | .. warning::
 8 |    **DEPRECATED**
 9 | 
10 | 
11 | .. currentmodule:: {{ module }}
12 | 
13 | .. autofunction:: {{ objname }}
14 | 
15 | .. include:: {{module}}.{{objname}}.examples
16 | 
17 | .. raw:: html
18 | 
19 |     <div class="clearer"></div>
20 | 


--------------------------------------------------------------------------------
/doc/themes/scikit-learn/static/js/extra.js:
--------------------------------------------------------------------------------
 1 | // Miscellaneous enhancements to doc display
 2 | 
 3 | 
 4 | $(document).ready(function() {
 5 | 	/*** Add permalink buttons next to glossary terms ***/
 6 | 
 7 | 	$('dl.glossary > dt[id]').append(function() {
 8 | 		return ('<a class="headerlink" href="#' +
 9 | 			    this.getAttribute('id') +
10 | 			    '" title="Permalink to this term">¶</a>');
11 | 	})
12 | });
13 | 


--------------------------------------------------------------------------------
/doc/templates/deprecated_class_without_init.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}==============
 3 | 
 4 | .. meta::
 5 |    :robots: noindex
 6 | 
 7 | .. warning::
 8 |    **DEPRECATED**
 9 | 
10 | 
11 | .. currentmodule:: {{ module }}
12 | 
13 | .. autoclass:: {{ objname }}
14 | 
15 | .. include:: {{module}}.{{objname}}.examples
16 | 
17 | .. raw:: html
18 | 
19 |     <div class="clearer"></div>
20 | 


--------------------------------------------------------------------------------
/doc/templates/class_with_call.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}===============
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | .. autoclass:: {{ objname }}
 7 | 
 8 |    {% block methods %}
 9 |    .. automethod:: __init__
10 |    .. automethod:: __call__
11 |    {% endblock %}
12 | 
13 | .. include:: {{module}}.{{objname}}.examples
14 | 
15 | .. raw:: html
16 | 
17 |     <div class="clearer"></div>
18 | 


--------------------------------------------------------------------------------
/sklearn/neural_network/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.neural_network` module includes models based on neural
 3 | networks.
 4 | """
 5 | 
 6 | # License: BSD 3 clause
 7 | 
 8 | from .rbm import BernoulliRBM
 9 | 
10 | from .multilayer_perceptron import MLPClassifier
11 | from .multilayer_perceptron import MLPRegressor
12 | 
13 | __all__ = ["BernoulliRBM",
14 |            "MLPClassifier",
15 |            "MLPRegressor"]
16 | 


--------------------------------------------------------------------------------
/sklearn/semi_supervised/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.semi_supervised` module implements semi-supervised learning
 3 | algorithms. These algorithms utilized small amounts of labeled data and large
 4 | amounts of unlabeled data for classification tasks. This module includes Label
 5 | Propagation.
 6 | """
 7 | 
 8 | from .label_propagation import LabelPropagation, LabelSpreading
 9 | 
10 | __all__ = ['LabelPropagation', 'LabelSpreading']
11 | 


--------------------------------------------------------------------------------
/sklearn/externals/joblib/externals/loky/backend/compat_posix.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | ###############################################################################
 3 | # Compat file to load the correct wait function
 4 | #
 5 | # author: Thomas Moreau and Olivier grisel
 6 | #
 7 | import sys
 8 | 
 9 | # Compat wait
10 | if sys.version_info < (3, 3):
11 |     from ._posix_wait import wait
12 | else:
13 |     from multiprocessing.connection import wait
14 | 


--------------------------------------------------------------------------------
/doc/unsupervised_learning.rst:
--------------------------------------------------------------------------------
 1 | .. include:: includes/big_toc_css.rst
 2 | 
 3 | .. _unsupervised-learning:
 4 | 
 5 | Unsupervised learning
 6 | -----------------------
 7 | 
 8 | .. toctree::
 9 | 
10 |     modules/mixture
11 |     modules/manifold
12 |     modules/clustering
13 |     modules/biclustering
14 |     modules/decomposition
15 |     modules/covariance
16 |     modules/outlier_detection
17 |     modules/density
18 |     modules/neural_networks_unsupervised
19 | 


--------------------------------------------------------------------------------
/sklearn/externals/README:
--------------------------------------------------------------------------------
 1 | This directory contains bundled external dependencies that are updated
 2 | every once in a while.
 3 | 
 4 | Note to developers and advanced users: setting the SKLEARN_SITE_JOBLIB to
 5 | a non null value will force scikit-learn to use the site joblib.
 6 | 
 7 | Note for distribution packagers: if you want to remove the duplicated
 8 | code and depend on a packaged version, we suggest that you simply do a
 9 | symbolic link in this directory.
10 | 
11 | 


--------------------------------------------------------------------------------
/doc/templates/deprecated_class.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}==============
 3 | 
 4 | .. meta::
 5 |    :robots: noindex
 6 | 
 7 | .. warning::
 8 |    **DEPRECATED**
 9 | 
10 | 
11 | .. currentmodule:: {{ module }}
12 | 
13 | .. autoclass:: {{ objname }}
14 | 
15 |    {% block methods %}
16 |    .. automethod:: __init__
17 |    {% endblock %}
18 | 
19 | .. include:: {{module}}.{{objname}}.examples
20 | 
21 | .. raw:: html
22 | 
23 |     <div class="clearer"></div>
24 | 


--------------------------------------------------------------------------------
/doc/user_guide.rst:
--------------------------------------------------------------------------------
 1 | .. title:: User guide: contents
 2 | 
 3 | .. _user_guide:
 4 | 
 5 | ==========
 6 | User Guide
 7 | ==========
 8 | 
 9 | .. include:: includes/big_toc_css.rst
10 | 
11 | .. nice layout in the toc
12 | 
13 | .. include:: tune_toc.rst
14 | 
15 | .. toctree::
16 |    :numbered:
17 | 
18 |    supervised_learning.rst
19 |    unsupervised_learning.rst
20 |    model_selection.rst
21 |    data_transforms.rst
22 |    Dataset loading utilities <datasets/index.rst>
23 |    modules/computing.rst
24 | 


--------------------------------------------------------------------------------
/sklearn/feature_extraction/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.feature_extraction` module deals with feature extraction
 3 | from raw data. It currently includes methods to extract features from text and
 4 | images.
 5 | """
 6 | 
 7 | from .dict_vectorizer import DictVectorizer
 8 | from .hashing import FeatureHasher
 9 | from .image import img_to_graph, grid_to_graph
10 | from . import text
11 | 
12 | __all__ = ['DictVectorizer', 'image', 'img_to_graph', 'grid_to_graph', 'text',
13 |            'FeatureHasher']
14 | 


--------------------------------------------------------------------------------
/sklearn/compose/__init__.py:
--------------------------------------------------------------------------------
 1 | """Meta-estimators for building composite models with transformers
 2 | 
 3 | In addition to its current contents, this module will eventually be home to
 4 | refurbished versions of Pipeline and FeatureUnion.
 5 | 
 6 | """
 7 | 
 8 | from ._column_transformer import ColumnTransformer, make_column_transformer
 9 | from ._target import TransformedTargetRegressor
10 | 
11 | 
12 | __all__ = [
13 |     'ColumnTransformer',
14 |     'make_column_transformer',
15 |     'TransformedTargetRegressor',
16 | ]
17 | 


--------------------------------------------------------------------------------
/doc/templates/deprecated_class_with_call.rst:
--------------------------------------------------------------------------------
 1 | :mod:`{{module}}`.{{objname}}
 2 | {{ underline }}===============
 3 | 
 4 | .. meta::
 5 |    :robots: noindex
 6 | 
 7 | .. warning::
 8 |    **DEPRECATED**
 9 | 
10 | 
11 | .. currentmodule:: {{ module }}
12 | 
13 | .. autoclass:: {{ objname }}
14 | 
15 |    {% block methods %}
16 |    .. automethod:: __init__
17 |    .. automethod:: __call__
18 |    {% endblock %}
19 | 
20 | .. include:: {{module}}.{{objname}}.examples
21 | 
22 | .. raw:: html
23 | 
24 |     <div class="clearer"></div>
25 | 


--------------------------------------------------------------------------------
/sklearn/manifold/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.manifold` module implements data embedding techniques.
 3 | """
 4 | 
 5 | from .locally_linear import locally_linear_embedding, LocallyLinearEmbedding
 6 | from .isomap import Isomap
 7 | from .mds import MDS, smacof
 8 | from .spectral_embedding_ import SpectralEmbedding, spectral_embedding
 9 | from .t_sne import TSNE
10 | 
11 | __all__ = ['locally_linear_embedding', 'LocallyLinearEmbedding', 'Isomap',
12 |            'MDS', 'smacof', 'SpectralEmbedding', 'spectral_embedding', "TSNE"]
13 | 


--------------------------------------------------------------------------------
/sklearn/externals/joblib/_compat.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Compatibility layer for Python 3/Python 2 single codebase
 3 | """
 4 | import sys
 5 | 
 6 | PY3_OR_LATER = sys.version_info[0] >= 3
 7 | PY27 = sys.version_info[:2] == (2, 7)
 8 | 
 9 | try:
10 |     _basestring = basestring
11 |     _bytes_or_unicode = (str, unicode)
12 | except NameError:
13 |     _basestring = str
14 |     _bytes_or_unicode = (bytes, str)
15 | 
16 | 
17 | def with_metaclass(meta, *bases):
18 |     """Create a base class with a metaclass."""
19 |     return meta("NewBase", bases, {})
20 | 


--------------------------------------------------------------------------------
/sklearn/externals/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | def configuration(parent_package='', top_path=None):
 5 |     from numpy.distutils.misc_util import Configuration
 6 |     config = Configuration('externals', parent_package, top_path)
 7 |     config.add_subpackage('joblib')
 8 |     config.add_subpackage('joblib/externals')
 9 |     config.add_subpackage('joblib/externals/loky')
10 |     config.add_subpackage('joblib/externals/loky/backend')
11 |     config.add_subpackage('joblib/externals/cloudpickle')
12 | 
13 |     return config
14 | 


--------------------------------------------------------------------------------
/sklearn/tree/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.tree` module includes decision tree-based models for
 3 | classification and regression.
 4 | """
 5 | 
 6 | from .tree import DecisionTreeClassifier
 7 | from .tree import DecisionTreeRegressor
 8 | from .tree import ExtraTreeClassifier
 9 | from .tree import ExtraTreeRegressor
10 | from .export import export_graphviz, plot_tree
11 | 
12 | __all__ = ["DecisionTreeClassifier", "DecisionTreeRegressor",
13 |            "ExtraTreeClassifier", "ExtraTreeRegressor", "export_graphviz",
14 |            "plot_tree"]
15 | 


--------------------------------------------------------------------------------
/sklearn/neighbors/typedefs.pxd:
--------------------------------------------------------------------------------
 1 | #!python
 2 | cimport numpy as np
 3 | 
 4 | # Floating point/data type
 5 | ctypedef np.float64_t DTYPE_t  # WARNING: should match DTYPE in typedefs.pyx
 6 | 
 7 | cdef enum:
 8 |     DTYPECODE = np.NPY_FLOAT64
 9 |     ITYPECODE = np.NPY_INTP
10 | 
11 | # Index/integer type.
12 | #  WARNING: ITYPE_t must be a signed integer type or you will have a bad time!
13 | ctypedef np.intp_t ITYPE_t  # WARNING: should match ITYPE in typedefs.pyx
14 | 
15 | # Fused type for certain operations
16 | ctypedef fused DITYPE_t:
17 |     ITYPE_t
18 |     DTYPE_t
19 | 


--------------------------------------------------------------------------------
/sklearn/externals/joblib/externals/loky/backend/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | from .context import get_context
 5 | 
 6 | LOKY_PICKLER = os.environ.get("LOKY_PICKLER")
 7 | 
 8 | if sys.version_info > (3, 4):
 9 | 
10 |     def _make_name():
11 |         name = '/loky-%i-%s' % (os.getpid(), next(synchronize.SemLock._rand))
12 |         return name
13 | 
14 |     # monkey patch the name creation for multiprocessing
15 |     from multiprocessing import synchronize
16 |     synchronize.SemLock._make_name = staticmethod(_make_name)
17 | 
18 | __all__ = ["get_context"]
19 | 


--------------------------------------------------------------------------------
/sklearn/src/cblas/atlas_ptalias2.h:
--------------------------------------------------------------------------------
 1 | #ifndef ATLAS_PTALIAS2_H
 2 | #define ATLAS_PTALIAS2_H
 3 | /*
 4 |  * Real BLAS
 5 |  */
 6 |    #define ATL_sger    ATL_stger
 7 |    #define ATL_sgemv   ATL_stgemv
 8 | 
 9 |    #define ATL_dger    ATL_dtger
10 |    #define ATL_dgemv   ATL_dtgemv
11 | 
12 | /*
13 |  * Complex BLAS
14 |  */
15 |    #define ATL_cgemv     ATL_ctgemv
16 |    #define ATL_cgerc     ATL_ctgerc
17 |    #define ATL_cgeru     ATL_ctgeru
18 | 
19 |    #define ATL_zgemv     ATL_ztgemv
20 |    #define ATL_zgerc     ATL_ztgerc
21 |    #define ATL_zgeru     ATL_ztgeru
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/sklearn/ensemble/setup.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | from numpy.distutils.misc_util import Configuration
 3 | 
 4 | 
 5 | def configuration(parent_package="", top_path=None):
 6 |     config = Configuration("ensemble", parent_package, top_path)
 7 |     config.add_extension("_gradient_boosting",
 8 |                          sources=["_gradient_boosting.pyx"],
 9 |                          include_dirs=[numpy.get_include()])
10 | 
11 |     config.add_subpackage("tests")
12 | 
13 |     return config
14 | 
15 | if __name__ == "__main__":
16 |     from numpy.distutils.core import setup
17 |     setup(**configuration().todict())
18 | 


--------------------------------------------------------------------------------
/sklearn/tests/test_init.py:
--------------------------------------------------------------------------------
 1 | # Basic unittests to test functioning of module's top-level
 2 | 
 3 | from sklearn.utils.testing import assert_equal
 4 | 
 5 | __author__ = 'Yaroslav Halchenko'
 6 | __license__ = 'BSD'
 7 | 
 8 | 
 9 | try:
10 |     from sklearn import *  # noqa
11 |     _top_import_error = None
12 | except Exception as e:
13 |     _top_import_error = e
14 | 
15 | 
16 | def test_import_skl():
17 |     # Test either above import has failed for some reason
18 |     # "import *" is discouraged outside of the module level, hence we
19 |     # rely on setting up the variable above
20 |     assert_equal(_top_import_error, None)
21 | 


--------------------------------------------------------------------------------
/sklearn/utils/bench.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helper functions for benchmarking
 3 | """
 4 | 
 5 | 
 6 | def total_seconds(delta):
 7 |     """
 8 |     helper function to emulate function total_seconds,
 9 |     introduced in python2.7
10 | 
11 |     https://docs.python.org/library/datetime.html\
12 | #datetime.timedelta.total_seconds
13 | 
14 |     Parameters
15 |     ----------
16 |     delta : datetime object
17 | 
18 |     Returns
19 |     -------
20 |     int
21 |         The number of seconds contained in delta
22 |     """
23 | 
24 |     mu_sec = 1e-6  # number of seconds in one microseconds
25 | 
26 |     return delta.seconds + delta.microseconds * mu_sec
27 | 


--------------------------------------------------------------------------------
/sklearn/utils/fast_dict.pxd:
--------------------------------------------------------------------------------
 1 | """
 2 | Uses C++ map containers for fast dict-like behavior with keys being
 3 | integers, and values float.
 4 | """
 5 | # Author: Gael Varoquaux
 6 | # License: BSD
 7 | 
 8 | from libcpp.map cimport map as cpp_map
 9 | 
10 | # Import the C-level symbols of numpy
11 | cimport numpy as np
12 | 
13 | ctypedef np.float64_t DTYPE_t
14 | 
15 | ctypedef np.intp_t ITYPE_t
16 | 
17 | ###############################################################################
18 | # An object to be used in Python
19 | 
20 | cdef class IntFloatDict:
21 |     cdef cpp_map[ITYPE_t, DTYPE_t] my_map
22 |     cdef _to_arrays(self, ITYPE_t [:] keys, DTYPE_t [:] values)
23 | 


--------------------------------------------------------------------------------
/sklearn/__check_build/setup.py:
--------------------------------------------------------------------------------
 1 | # Author: Virgile Fritsch <virgile.fritsch@inria.fr>
 2 | # License: BSD 3 clause
 3 | 
 4 | import numpy
 5 | 
 6 | 
 7 | def configuration(parent_package='', top_path=None):
 8 |     from numpy.distutils.misc_util import Configuration
 9 |     config = Configuration('__check_build', parent_package, top_path)
10 |     config.add_extension('_check_build',
11 |                          sources=['_check_build.pyx'],
12 |                          include_dirs=[numpy.get_include()])
13 | 
14 |     return config
15 | 
16 | if __name__ == '__main__':
17 |     from numpy.distutils.core import setup
18 |     setup(**configuration(top_path='').todict())
19 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [aliases]
 2 | test = pytest
 3 | 
 4 | [tool:pytest]
 5 | # disable-pytest-warnings should be removed once we rewrite tests
 6 | # using yield with parametrize
 7 | addopts =
 8 |     --ignore build_tools
 9 |     --ignore benchmarks
10 |     --ignore doc
11 |     --ignore examples
12 |     --doctest-modules
13 |     --disable-pytest-warnings
14 |     -rs
15 | 
16 | [wheelhouse_uploader]
17 | artifact_indexes=
18 |     # Wheels built by travis (only for specific tags):
19 |     # https://github.com/MacPython/scikit-learn-wheels
20 |     http://wheels.scipy.org
21 | 
22 | [flake8]
23 | # Default flake8 3.5 ignored flags
24 | ignore=E121,E123,E126,E226,E24,E704,W503,W504
25 | 


--------------------------------------------------------------------------------
/sklearn/preprocessing/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def configuration(parent_package='', top_path=None):
 5 |     import numpy
 6 |     from numpy.distutils.misc_util import Configuration
 7 | 
 8 |     config = Configuration('preprocessing', parent_package, top_path)
 9 |     libraries = []
10 |     if os.name == 'posix':
11 |         libraries.append('m')
12 | 
13 |     config.add_extension('_csr_polynomial_expansion',
14 |                          sources=['_csr_polynomial_expansion.pyx'],
15 |                          include_dirs=[numpy.get_include()],
16 |                          libraries=libraries)
17 | 
18 |     config.add_subpackage('tests')
19 | 
20 |     return config
21 | 


--------------------------------------------------------------------------------
/sklearn/src/cblas/README.txt:
--------------------------------------------------------------------------------
 1 | This is a stripped-down version of CBLAS (C-interface to the Basic Linear
 2 | Algebra Subroutines), containing only those parts used by scikit-learn's
 3 | C/C++/Cython extensions. It is used when no CBLAS implementation is available
 4 | at build time.
 5 | 
 6 | Sources here are taken from the reference implementation in ATLAS. To add new
 7 | algorithms, the only thing that should be done is to copy the reference
 8 | implementation from ${ATLAS}/src/blas/reference/level* into this directory.
 9 | 
10 | Header files are taken from ${ATLAS}/include, the only change being the
11 | inclusion of "atlas_refalias*.h" into its respective "atlas_level*.h" file.
12 | 


--------------------------------------------------------------------------------
/doc/tutorial/text_analytics/.gitignore:
--------------------------------------------------------------------------------
 1 | # cruft
 2 | .*.swp
 3 | *.pyc
 4 | .DS_Store
 5 | *.pdf
 6 | 
 7 | # folder to be used for working on the exercises
 8 | workspace
 9 | 
10 | # output of the sphinx build of the documentation
11 | tutorial/_build
12 | 
13 | # datasets to be fetched from the web and cached locally
14 | data/twenty_newsgroups/20news-bydate.tar.gz
15 | data/twenty_newsgroups/20news-bydate-train
16 | data/twenty_newsgroups/20news-bydate-test
17 | 
18 | data/movie_reviews/txt_sentoken
19 | data/movie_reviews/poldata.README.2.0
20 | 
21 | data/languages/paragraphs
22 | data/languages/short_paragraphs
23 | data/languages/html
24 | 
25 | data/labeled_faces_wild/lfw_preprocessed/
26 | 


--------------------------------------------------------------------------------
/sklearn/datasets/descr/linnerud.rst:
--------------------------------------------------------------------------------
 1 | .. _linnerrud_dataset:
 2 | 
 3 | Linnerrud dataset
 4 | -----------------
 5 | 
 6 | **Data Set Characteristics:**
 7 | 
 8 |     :Number of Instances: 20
 9 |     :Number of Attributes: 3
10 |     :Missing Attribute Values: None
11 | 
12 | The Linnerud dataset constains two small dataset:
13 | 
14 | - *physiological* - CSV containing 20 observations on 3 exercise variables:
15 |    Weight, Waist and Pulse.
16 | 
17 | - *exercise* - CSV containing 20 observations on 3 physiological variables:
18 |    Chins, Situps and Jumps.
19 | 
20 | .. topic:: References
21 | 
22 |   * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris: Editions Technic.
23 | 


--------------------------------------------------------------------------------
/doc/preface.rst:
--------------------------------------------------------------------------------
 1 | .. This helps define the TOC ordering for "about us" sections. Particularly
 2 |    useful for PDF output as this section is not linked from elsewhere.
 3 | 
 4 | .. _preface_menu:
 5 | 
 6 | .. include:: includes/big_toc_css.rst
 7 | .. include:: tune_toc.rst
 8 | 
 9 | .. top level heading needed for LaTeX TOC in sphinx<=1.3.1
10 | 
11 | ************
12 | scikit-learn
13 | ************
14 | 
15 | =======================
16 | Welcome to scikit-learn
17 | =======================
18 | 
19 | |
20 | 
21 | .. toctree::
22 |     :maxdepth: 2
23 | 
24 |     install
25 |     faq
26 |     support
27 |     related_projects
28 |     about
29 |     testimonials/testimonials
30 |     whats_new
31 | 
32 | |
33 | 


--------------------------------------------------------------------------------
/doc/supervised_learning.rst:
--------------------------------------------------------------------------------
 1 | .. include:: includes/big_toc_css.rst
 2 | 
 3 | .. _supervised-learning:
 4 | 
 5 | Supervised learning
 6 | -----------------------
 7 | 
 8 | .. toctree::
 9 | 
10 |     modules/linear_model
11 |     modules/lda_qda.rst
12 |     modules/kernel_ridge.rst
13 |     modules/svm
14 |     modules/sgd
15 |     modules/neighbors
16 |     modules/gaussian_process
17 |     modules/cross_decomposition.rst
18 |     modules/naive_bayes
19 |     modules/tree
20 |     modules/ensemble
21 |     modules/multiclass
22 |     modules/feature_selection.rst
23 |     modules/label_propagation.rst
24 |     modules/isotonic.rst
25 |     modules/calibration.rst
26 |     modules/neural_networks_supervised
27 | 


--------------------------------------------------------------------------------
/sklearn/mixture/tests/test_mixture.py:
--------------------------------------------------------------------------------
 1 | # Author: Guillaume Lemaitre <g.lemaitre58@gmail.com>
 2 | # License: BSD 3 clause
 3 | 
 4 | import pytest
 5 | import numpy as np
 6 | 
 7 | from sklearn.mixture import GaussianMixture
 8 | from sklearn.mixture import BayesianGaussianMixture
 9 | 
10 | 
11 | @pytest.mark.parametrize(
12 |     "estimator",
13 |     [GaussianMixture(),
14 |      BayesianGaussianMixture()]
15 | )
16 | def test_gaussian_mixture_n_iter(estimator):
17 |     # check that n_iter is the number of iteration performed.
18 |     rng = np.random.RandomState(0)
19 |     X = rng.rand(10, 5)
20 |     max_iter = 1
21 |     estimator.set_params(max_iter=max_iter)
22 |     estimator.fit(X)
23 |     assert estimator.n_iter_ == max_iter
24 | 


--------------------------------------------------------------------------------
/sklearn/utils/stats.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from sklearn.utils.extmath import stable_cumsum
 4 | 
 5 | 
 6 | def _weighted_percentile(array, sample_weight, percentile=50):
 7 |     """
 8 |     Compute the weighted ``percentile`` of ``array`` with ``sample_weight``.
 9 |     """
10 |     sorted_idx = np.argsort(array)
11 | 
12 |     # Find index of median prediction for each sample
13 |     weight_cdf = stable_cumsum(sample_weight[sorted_idx])
14 |     percentile_idx = np.searchsorted(
15 |         weight_cdf, (percentile / 100.) * weight_cdf[-1])
16 |     # in rare cases, percentile_idx equals to len(sorted_idx)
17 |     percentile_idx = np.clip(percentile_idx, 0, len(sorted_idx)-1)
18 |     return array[sorted_idx[percentile_idx]]
19 | 


--------------------------------------------------------------------------------
/sklearn/feature_extraction/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import platform
 3 | 
 4 | 
 5 | def configuration(parent_package='', top_path=None):
 6 |     import numpy
 7 |     from numpy.distutils.misc_util import Configuration
 8 | 
 9 |     config = Configuration('feature_extraction', parent_package, top_path)
10 |     libraries = []
11 |     if os.name == 'posix':
12 |         libraries.append('m')
13 | 
14 |     if platform.python_implementation() != 'PyPy':
15 |         config.add_extension('_hashing',
16 |                              sources=['_hashing.pyx'],
17 |                              include_dirs=[numpy.get_include()],
18 |                              libraries=libraries)
19 |     config.add_subpackage("tests")
20 | 
21 |     return config
22 | 


--------------------------------------------------------------------------------
/.codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         # Commits pushed to master should not make the overall
 8 |         # project coverage decrease by more than 1%:
 9 |         target: auto
10 |         threshold: 1%
11 |     patch:
12 |       default:
13 |         # Be tolerant on slight code coverage diff on PRs to limit
14 |         # noisy red coverage status on github PRs.
15 |         # Note The coverage stats are still uploaded
16 |         # to codecov so that PR reviewers can see uncovered lines
17 |         # in the github diff if they install the codecov browser
18 |         # extension:
19 |         # https://github.com/codecov/browser-extension
20 |         target: auto
21 |         threshold: 1%
22 |  
23 | 


--------------------------------------------------------------------------------
/sklearn/model_selection/tests/common.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Common utilities for testing model selection.
 3 | """
 4 | 
 5 | import numpy as np
 6 | 
 7 | from sklearn.model_selection import KFold
 8 | 
 9 | 
10 | class OneTimeSplitter:
11 |     """A wrapper to make KFold single entry cv iterator"""
12 |     def __init__(self, n_splits=4, n_samples=99):
13 |         self.n_splits = n_splits
14 |         self.n_samples = n_samples
15 |         self.indices = iter(KFold(n_splits=n_splits).split(np.ones(n_samples)))
16 | 
17 |     def split(self, X=None, y=None, groups=None):
18 |         """Split can be called only once"""
19 |         for index in self.indices:
20 |             yield index
21 | 
22 |     def get_n_splits(self, X=None, y=None, groups=None):
23 |         return self.n_splits
24 | 


--------------------------------------------------------------------------------
/sklearn/gaussian_process/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Author: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
 4 | #         Vincent Dubourg <vincent.dubourg@gmail.com>
 5 | #         (mostly translation, see implementation details)
 6 | # License: BSD 3 clause
 7 | 
 8 | """
 9 | The :mod:`sklearn.gaussian_process` module implements Gaussian Process
10 | based regression and classification.
11 | """
12 | 
13 | from .gpr import GaussianProcessRegressor
14 | from .gpc import GaussianProcessClassifier
15 | from . import kernels
16 | 
17 | from . import correlation_models
18 | from . import regression_models
19 | 
20 | __all__ = ['correlation_models', 'regression_models',
21 |            'GaussianProcessRegressor', 'GaussianProcessClassifier',
22 |            'kernels']
23 | 


--------------------------------------------------------------------------------
/sklearn/metrics/cluster/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy
 4 | from numpy.distutils.misc_util import Configuration
 5 | 
 6 | 
 7 | def configuration(parent_package="", top_path=None):
 8 |     config = Configuration("cluster", parent_package, top_path)
 9 |     libraries = []
10 |     if os.name == 'posix':
11 |         libraries.append('m')
12 |     config.add_extension("expected_mutual_info_fast",
13 |                          sources=["expected_mutual_info_fast.pyx"],
14 |                          include_dirs=[numpy.get_include()],
15 |                          libraries=libraries)
16 | 
17 |     config.add_subpackage("tests")
18 | 
19 |     return config
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     from numpy.distutils.core import setup
24 |     setup(**configuration().todict())
25 | 


--------------------------------------------------------------------------------
/sklearn/externals/joblib/externals/loky/backend/compat.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | ###############################################################################
 3 | # Compat file to import the correct modules for each platform and python
 4 | # version.
 5 | #
 6 | # author: Thomas Moreau and Olivier grisel
 7 | #
 8 | import sys
 9 | 
10 | if sys.version_info[:2] >= (3, 3):
11 |     import queue
12 | else:
13 |     import Queue as queue
14 | 
15 | from pickle import PicklingError
16 | 
17 | if sys.version_info >= (3, 4):
18 |     from multiprocessing.process import BaseProcess
19 | else:
20 |     from multiprocessing.process import Process as BaseProcess
21 | 
22 | # Platform specific compat
23 | if sys.platform == "win32":
24 |     from .compat_win32 import *
25 | else:
26 |     from .compat_posix import *
27 | 


--------------------------------------------------------------------------------
/sklearn/datasets/images/README.txt:
--------------------------------------------------------------------------------
 1 | Image: china.jpg
 2 | Released under a creative commons license. [1]
 3 | Attribution: Some rights reserved by danielbuechele [2]
 4 | Retrieved 21st August, 2011 from [3] by Robert Layton
 5 | 
 6 | [1] https://creativecommons.org/licenses/by/2.0/
 7 | [2] https://www.flickr.com/photos/danielbuechele/
 8 | [3] https://www.flickr.com/photos/danielbuechele/6061409035/sizes/z/in/photostream/
 9 | 
10 | 
11 | Image: flower.jpg
12 | Released under a creative commons license. [1]
13 | Attribution: Some rights reserved by danielbuechele [2]
14 | Retrieved 21st August, 2011 from [3] by Robert Layton
15 | 
16 | [1] https://creativecommons.org/licenses/by/2.0/
17 | [2] https://www.flickr.com/photos/vultilion/
18 | [3] https://www.flickr.com/photos/vultilion/6056698931/sizes/z/in/photostream/
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/sklearn/svm/src/liblinear/tron.h:
--------------------------------------------------------------------------------
 1 | #ifndef _TRON_H
 2 | #define _TRON_H
 3 | 
 4 | class function
 5 | {
 6 | public:
 7 | 	virtual double fun(double *w) = 0 ;
 8 | 	virtual void grad(double *w, double *g) = 0 ;
 9 | 	virtual void Hv(double *s, double *Hs) = 0 ;
10 | 
11 | 	virtual int get_nr_variable(void) = 0 ;
12 | 	virtual ~function(void){}
13 | };
14 | 
15 | class TRON
16 | {
17 | public:
18 | 	TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000);
19 | 	~TRON();
20 | 
21 | 	int tron(double *w);
22 | 	void set_print_string(void (*i_print) (const char *buf));
23 | 
24 | private:
25 | 	int trcg(double delta, double *g, double *s, double *r);
26 | 	double norm_inf(int n, double *x);
27 | 
28 | 	double eps;
29 | 	int max_iter;
30 | 	function *fun_obj;
31 | 	void info(const char *fmt,...);
32 | 	void (*tron_print_string)(const char *buf);
33 | };
34 | #endif
35 | 


--------------------------------------------------------------------------------
/sklearn/utils/_logistic_sigmoid.pyx:
--------------------------------------------------------------------------------
 1 | #cython: boundscheck=False
 2 | #cython: cdivision=True
 3 | #cython: wraparound=False
 4 | 
 5 | from libc.math cimport log, exp
 6 | 
 7 | import numpy as np
 8 | cimport numpy as np
 9 | 
10 | ctypedef np.float64_t DTYPE_t
11 | 
12 | 
13 | cdef DTYPE_t _inner_log_logistic_sigmoid(DTYPE_t x):
14 |     """Log of the logistic sigmoid function log(1 / (1 + e ** -x))"""
15 |     if x > 0:
16 |         return -log(1 + exp(-x))
17 |     else:
18 |         return x - log(1 + exp(x))
19 | 
20 | 
21 | def _log_logistic_sigmoid(int n_samples, int n_features, 
22 |                            np.ndarray[DTYPE_t, ndim=2] X,
23 |                            np.ndarray[DTYPE_t, ndim=2] out):
24 |     for i in range(n_samples):
25 |         for j in range(n_features):
26 |             out[i, j] = _inner_log_logistic_sigmoid(X[i, j])
27 |     return out
28 | 


--------------------------------------------------------------------------------
/sklearn/neighbors/typedefs.pyx:
--------------------------------------------------------------------------------
 1 | #!python
 2 | 
 3 | import numpy as np
 4 | cimport numpy as np
 5 | from libc.math cimport sqrt
 6 | 
 7 | # use a hack to determine the associated numpy data types
 8 | # NOTE: the following requires the buffer interface, only available in
 9 | #       numpy 1.5+.  We'll choose the DTYPE by hand instead.
10 | #cdef ITYPE_t idummy
11 | #cdef ITYPE_t[:] idummy_view = <ITYPE_t[:1]> &idummy
12 | #ITYPE = np.asarray(idummy_view).dtype
13 | ITYPE = np.intp  # WARNING: this should match ITYPE_t in typedefs.pxd
14 | 
15 | #cdef DTYPE_t ddummy
16 | #cdef DTYPE_t[:] ddummy_view = <DTYPE_t[:1]> &ddummy
17 | #DTYPE = np.asarray(ddummy_view).dtype
18 | DTYPE = np.float64  # WARNING: this should match DTYPE_t in typedefs.pxd
19 | 
20 | # some handy constants
21 | cdef DTYPE_t INF = np.inf
22 | cdef DTYPE_t PI = np.pi
23 | cdef DTYPE_t ROOT_2PI = sqrt(2 * PI)
24 | 


--------------------------------------------------------------------------------
/sklearn/externals/copy_joblib.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # Script to do a local install of joblib
 3 | set +x
 4 | export LC_ALL=C
 5 | INSTALL_FOLDER=tmp/joblib_install
 6 | rm -rf joblib $INSTALL_FOLDER
 7 | if [ -z "$1" ]
 8 | then
 9 |         JOBLIB=joblib
10 | else
11 |         JOBLIB=$1
12 | fi
13 | 
14 | pip install --no-cache $JOBLIB --target $INSTALL_FOLDER
15 | cp -r $INSTALL_FOLDER/joblib joblib
16 | rm -rf $INSTALL_FOLDER
17 | 
18 | # Needed to rewrite the doctests
19 | # Note: BSD sed -i needs an argument unders OSX
20 | # so first renaming to .bak and then deleting backup files
21 | find joblib -name "*.py" | xargs sed -i.bak "s/from joblib/from sklearn.externals.joblib/"
22 | find joblib -name "*.bak" | xargs rm
23 | 
24 | # Remove the tests folders to speed-up test time for scikit-learn.
25 | # joblib is already tested on its own CI infrastructure upstream.
26 | rm -r joblib/test
27 | 


--------------------------------------------------------------------------------
/doc/tutorial/index.rst:
--------------------------------------------------------------------------------
 1 | .. _tutorial_menu:
 2 | 
 3 | 
 4 | 
 5 | .. include:: ../includes/big_toc_css.rst
 6 | .. include:: ../tune_toc.rst
 7 | 
 8 | ======================
 9 | scikit-learn Tutorials
10 | ======================
11 | 
12 | |
13 | 
14 | .. toctree::
15 |    :maxdepth: 2
16 | 
17 |    basic/tutorial.rst
18 |    statistical_inference/index.rst
19 |    text_analytics/working_with_text_data.rst
20 |    machine_learning_map/index
21 |    ../presentations
22 | 
23 | |
24 | 
25 | .. note:: **Doctest Mode**
26 | 
27 |    The code-examples in the above tutorials are written in a
28 |    *python-console* format. If you wish to easily execute these examples
29 |    in **IPython**, use::
30 | 
31 | 	%doctest_mode
32 | 
33 |    in the IPython-console. You can then simply copy and paste the examples
34 |    directly into IPython without having to worry about removing the **>>>**
35 |    manually.
36 | 


--------------------------------------------------------------------------------
/sklearn/datasets/setup.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy
 3 | import os
 4 | import platform
 5 | 
 6 | 
 7 | def configuration(parent_package='', top_path=None):
 8 |     from numpy.distutils.misc_util import Configuration
 9 |     config = Configuration('datasets', parent_package, top_path)
10 |     config.add_data_dir('data')
11 |     config.add_data_dir('descr')
12 |     config.add_data_dir('images')
13 |     config.add_data_dir(os.path.join('tests', 'data'))
14 |     if platform.python_implementation() != 'PyPy':
15 |         config.add_extension('_svmlight_format',
16 |                              sources=['_svmlight_format.pyx'],
17 |                              include_dirs=[numpy.get_include()])
18 |     config.add_subpackage('tests')
19 |     return config
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     from numpy.distutils.core import setup
24 |     setup(**configuration(top_path='').todict())
25 | 


--------------------------------------------------------------------------------
/sklearn/utils/_joblib.py:
--------------------------------------------------------------------------------
 1 | # We need the absolute_import to avoid the local joblib to override the
 2 | # site one
 3 | from __future__ import absolute_import
 4 | import os as _os
 5 | import warnings as _warnings
 6 | 
 7 | # An environment variable to use the site joblib
 8 | if _os.environ.get('SKLEARN_SITE_JOBLIB', False):
 9 |     with _warnings.catch_warnings():
10 |         _warnings.simplefilter("ignore")
11 |         # joblib imports may raise DeprecationWarning on certain Python
12 |         # versions
13 |         from joblib import __all__
14 |         from joblib import *  # noqa
15 |         from joblib import __version__
16 |         from joblib import logger
17 | else:
18 |     from ..externals.joblib import __all__   # noqa
19 |     from ..externals.joblib import *  # noqa
20 |     from ..externals.joblib import __version__  # noqa
21 |     from ..externals.joblib import logger  # noqa
22 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/test_california_housing.py:
--------------------------------------------------------------------------------
 1 | """Test the california_housing loader.
 2 | 
 3 | Skipped if california_housing is not already downloaded to data_home.
 4 | """
 5 | 
 6 | from sklearn.datasets import fetch_california_housing
 7 | from sklearn.utils.testing import SkipTest
 8 | from sklearn.datasets.tests.test_common import check_return_X_y
 9 | from functools import partial
10 | 
11 | 
12 | def fetch(*args, **kwargs):
13 |     return fetch_california_housing(*args, download_if_missing=False, **kwargs)
14 | 
15 | 
16 | def test_fetch():
17 |     try:
18 |         data = fetch()
19 |     except IOError:
20 |         raise SkipTest("California housing dataset can not be loaded.")
21 |     assert((20640, 8) == data.data.shape)
22 |     assert((20640, ) == data.target.shape)
23 | 
24 |     # test return_X_y option
25 |     fetch_func = partial(fetch)
26 |     check_return_X_y(data, fetch_func)
27 | 


--------------------------------------------------------------------------------
/doc/modules/isotonic.rst:
--------------------------------------------------------------------------------
 1 | .. _isotonic:
 2 | 
 3 | ===================
 4 | Isotonic regression
 5 | ===================
 6 | 
 7 | .. currentmodule:: sklearn.isotonic
 8 | 
 9 | The class :class:`IsotonicRegression` fits a non-decreasing function to data.
10 | It solves the following problem:
11 | 
12 |   minimize :math:`\sum_i w_i (y_i - \hat{y}_i)^2`
13 | 
14 |   subject to :math:`\hat{y}_{min} = \hat{y}_1 \le \hat{y}_2 ... \le \hat{y}_n = \hat{y}_{max}`
15 | 
16 | where each :math:`w_i` is strictly positive and each :math:`y_i` is an
17 | arbitrary real number. It yields the vector which is composed of non-decreasing
18 | elements the closest in terms of mean squared error. In practice this list
19 | of elements forms a function that is piecewise linear.
20 | 
21 | .. figure:: ../auto_examples/images/sphx_glr_plot_isotonic_regression_001.png
22 |    :target: ../auto_examples/plot_isotonic_regression.html
23 |    :align: center
24 | 


--------------------------------------------------------------------------------
/examples/decomposition/plot_beta_divergence.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ==============================
 3 | Beta-divergence loss functions
 4 | ==============================
 5 | 
 6 | A plot that compares the various Beta-divergence loss functions supported by
 7 | the Multiplicative-Update ('mu') solver in :class:`sklearn.decomposition.NMF`.
 8 | """
 9 | import numpy as np
10 | import matplotlib.pyplot as plt
11 | from sklearn.decomposition.nmf import _beta_divergence
12 | 
13 | print(__doc__)
14 | 
15 | x = np.linspace(0.001, 4, 1000)
16 | y = np.zeros(x.shape)
17 | 
18 | colors = 'mbgyr'
19 | for j, beta in enumerate((0., 0.5, 1., 1.5, 2.)):
20 |     for i, xi in enumerate(x):
21 |         y[i] = _beta_divergence(1, xi, 1, beta)
22 |     name = "beta = %1.1f" % beta
23 |     plt.plot(x, y, label=name, color=colors[j])
24 | 
25 | plt.xlabel("x")
26 | plt.title("beta-divergence(1, x)")
27 | plt.legend(loc=0)
28 | plt.axis([0, 4, 0, 3])
29 | plt.show()
30 | 


--------------------------------------------------------------------------------
/doc/includes/big_toc_css.rst:
--------------------------------------------------------------------------------
 1 | ..  
 2 |     File to ..include in a document with a big table of content, to give
 3 |     it 'style'
 4 | 
 5 | .. raw:: html
 6 | 
 7 |   <style type="text/css">
 8 |     div.bodywrapper blockquote {
 9 |         margin: 0 ;
10 |     }
11 | 
12 |     div.toctree-wrapper ul {
13 | 	margin-top: 0 ;
14 | 	margin-bottom: 0 ;
15 | 	padding-left: 10px ;
16 |     }
17 | 
18 |     li.toctree-l1 {
19 |         padding: 0 0 0.5em 0 ;
20 |         list-style-type: none;
21 |         font-size: 150% ;
22 | 	font-weight: bold;
23 |         }
24 | 
25 |     li.toctree-l1 ul {
26 | 	padding-left: 40px ;
27 |     }
28 | 
29 |     li.toctree-l2 {
30 |         font-size: 70% ;
31 |         list-style-type: square;
32 | 	font-weight: normal;
33 |         }
34 | 
35 |     li.toctree-l3 {
36 |         font-size: 85% ;
37 |         list-style-type: circle;
38 | 	font-weight: normal;
39 |         }
40 |  
41 |   </style>
42 | 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/test_optimize.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from sklearn.utils.optimize import newton_cg
 4 | from scipy.optimize import fmin_ncg
 5 | 
 6 | from sklearn.utils.testing import assert_array_almost_equal
 7 | 
 8 | 
 9 | def test_newton_cg():
10 |     # Test that newton_cg gives same result as scipy's fmin_ncg
11 | 
12 |     rng = np.random.RandomState(0)
13 |     A = rng.normal(size=(10, 10))
14 |     x0 = np.ones(10)
15 | 
16 |     def func(x):
17 |         Ax = A.dot(x)
18 |         return .5 * (Ax).dot(Ax)
19 | 
20 |     def grad(x):
21 |         return A.T.dot(A.dot(x))
22 | 
23 |     def hess(x, p):
24 |         return p.dot(A.T.dot(A.dot(x.all())))
25 | 
26 |     def grad_hess(x):
27 |         return grad(x), lambda x: A.T.dot(A.dot(x))
28 | 
29 |     assert_array_almost_equal(
30 |         newton_cg(grad_hess, func, grad, x0, tol=1e-10)[0],
31 |         fmin_ncg(f=func, x0=x0, fprime=grad, fhess_p=hess)
32 |         )
33 | 


--------------------------------------------------------------------------------
/sklearn/svm/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.svm` module includes Support Vector Machine algorithms.
 3 | """
 4 | 
 5 | # See http://scikit-learn.sourceforge.net/modules/svm.html for complete
 6 | # documentation.
 7 | 
 8 | # Author: Fabian Pedregosa <fabian.pedregosa@inria.fr> with help from
 9 | #         the scikit-learn community. LibSVM and LibLinear are copyright
10 | #         of their respective owners.
11 | # License: BSD 3 clause (C) INRIA 2010
12 | 
13 | from .classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, \
14 |         LinearSVR
15 | from .bounds import l1_min_c
16 | from . import libsvm, liblinear, libsvm_sparse
17 | 
18 | __all__ = ['LinearSVC',
19 |            'LinearSVR',
20 |            'NuSVC',
21 |            'NuSVR',
22 |            'OneClassSVM',
23 |            'SVC',
24 |            'SVR',
25 |            'l1_min_c',
26 |            'liblinear',
27 |            'libsvm',
28 |            'libsvm_sparse']
29 | 


--------------------------------------------------------------------------------
/sklearn/linear_model/sgd_fast.pxd:
--------------------------------------------------------------------------------
 1 | """Helper to load LossFunction from sgd_fast.pyx to sag_fast.pyx"""
 2 | # License: BSD 3 clause
 3 | 
 4 | cdef class LossFunction:
 5 |     cdef double loss(self, double p, double y) nogil
 6 |     cdef double _dloss(self, double p, double y) nogil
 7 | 
 8 | 
 9 | cdef class Regression(LossFunction):
10 |     cdef double loss(self, double p, double y) nogil
11 |     cdef double _dloss(self, double p, double y) nogil
12 | 
13 | 
14 | cdef class Classification(LossFunction):
15 |     cdef double loss(self, double p, double y) nogil
16 |     cdef double _dloss(self, double p, double y) nogil
17 | 
18 | 
19 | cdef class Log(Classification):
20 |     cdef double loss(self, double p, double y) nogil
21 |     cdef double _dloss(self, double p, double y) nogil
22 | 
23 | 
24 | cdef class SquaredLoss(Regression):
25 |     cdef double loss(self, double p, double y) nogil
26 |     cdef double _dloss(self, double p, double y) nogil
27 | 


--------------------------------------------------------------------------------
/sklearn/utils/murmurhash.pxd:
--------------------------------------------------------------------------------
 1 | """Export fast murmurhash C/C++ routines + cython wrappers"""
 2 | 
 3 | cimport numpy as np
 4 | 
 5 | # The C API is disabled for now, since it requires -I flags to get
 6 | # compilation to work even when these functions are not used.
 7 | #cdef extern from "MurmurHash3.h":
 8 | #    void MurmurHash3_x86_32(void* key, int len, unsigned int seed,
 9 | #                            void* out)
10 | #
11 | #    void MurmurHash3_x86_128(void* key, int len, unsigned int seed,
12 | #                             void* out)
13 | #
14 | #    void MurmurHash3_x64_128(void* key, int len, unsigned int seed,
15 | #                             void* out)
16 | 
17 | 
18 | cpdef np.uint32_t murmurhash3_int_u32(int key, unsigned int seed)
19 | cpdef np.int32_t murmurhash3_int_s32(int key, unsigned int seed)
20 | cpdef np.uint32_t murmurhash3_bytes_u32(bytes key, unsigned int seed)
21 | cpdef np.int32_t murmurhash3_bytes_s32(bytes key, unsigned int seed)
22 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/test_show_versions.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from sklearn.utils._show_versions import _get_sys_info
 3 | from sklearn.utils._show_versions import _get_deps_info
 4 | from sklearn.utils._show_versions import show_versions
 5 | 
 6 | 
 7 | def test_get_sys_info():
 8 |     sys_info = _get_sys_info()
 9 | 
10 |     assert 'python' in sys_info
11 |     assert 'executable' in sys_info
12 |     assert 'machine' in sys_info
13 | 
14 | 
15 | def test_get_deps_info():
16 |     deps_info = _get_deps_info()
17 | 
18 |     assert 'pip' in deps_info
19 |     assert 'setuptools' in deps_info
20 |     assert 'sklearn' in deps_info
21 |     assert 'numpy' in deps_info
22 |     assert 'scipy' in deps_info
23 |     assert 'Cython' in deps_info
24 |     assert 'pandas' in deps_info
25 | 
26 | 
27 | def test_show_versions_with_blas(capsys):
28 |     show_versions()
29 |     out, err = capsys.readouterr()
30 |     assert 'python' in out
31 |     assert 'numpy' in out
32 |     assert 'BLAS' in out
33 | 


--------------------------------------------------------------------------------
/examples/model_selection/plot_cv_predict.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ====================================
 3 | Plotting Cross-Validated Predictions
 4 | ====================================
 5 | 
 6 | This example shows how to use `cross_val_predict` to visualize prediction
 7 | errors.
 8 | 
 9 | """
10 | from sklearn import datasets
11 | from sklearn.model_selection import cross_val_predict
12 | from sklearn import linear_model
13 | import matplotlib.pyplot as plt
14 | 
15 | lr = linear_model.LinearRegression()
16 | boston = datasets.load_boston()
17 | y = boston.target
18 | 
19 | # cross_val_predict returns an array of the same size as `y` where each entry
20 | # is a prediction obtained by cross validation:
21 | predicted = cross_val_predict(lr, boston.data, y, cv=10)
22 | 
23 | fig, ax = plt.subplots()
24 | ax.scatter(y, predicted, edgecolors=(0, 0, 0))
25 | ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
26 | ax.set_xlabel('Measured')
27 | ax.set_ylabel('Predicted')
28 | plt.show()
29 | 


--------------------------------------------------------------------------------
/benchmarks/plot_tsne_mnist.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | import os.path as op
 4 | 
 5 | import argparse
 6 | 
 7 | 
 8 | LOG_DIR = "mnist_tsne_output"
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     parser = argparse.ArgumentParser('Plot benchmark results for t-SNE')
13 |     parser.add_argument(
14 |         '--labels', type=str,
15 |         default=op.join(LOG_DIR, 'mnist_original_labels_10000.npy'),
16 |         help='1D integer numpy array for labels')
17 |     parser.add_argument(
18 |         '--embedding', type=str,
19 |         default=op.join(LOG_DIR, 'mnist_sklearn_TSNE_10000.npy'),
20 |         help='2D float numpy array for embedded data')
21 |     args = parser.parse_args()
22 | 
23 |     X = np.load(args.embedding)
24 |     y = np.load(args.labels)
25 | 
26 |     for i in np.unique(y):
27 |         mask = y == i
28 |         plt.scatter(X[mask, 0], X[mask, 1], alpha=0.2, label=int(i))
29 |     plt.legend(loc='best')
30 |     plt.show()
31 | 


--------------------------------------------------------------------------------
/doc/tutorial/text_analytics/data/movie_reviews/fetch_data.py:
--------------------------------------------------------------------------------
 1 | """Script to download the movie review dataset"""
 2 | 
 3 | import os
 4 | import tarfile
 5 | from contextlib import closing
 6 | try:
 7 |     from urllib import urlopen
 8 | except ImportError:
 9 |     from urllib.request import urlopen
10 | 
11 | 
12 | URL = ("http://www.cs.cornell.edu/people/pabo/"
13 |        "movie-review-data/review_polarity.tar.gz")
14 | 
15 | ARCHIVE_NAME = URL.rsplit('/', 1)[1]
16 | DATA_FOLDER = "txt_sentoken"
17 | 
18 | 
19 | if not os.path.exists(DATA_FOLDER):
20 | 
21 |     if not os.path.exists(ARCHIVE_NAME):
22 |         print("Downloading dataset from %s (3 MB)" % URL)
23 |         opener = urlopen(URL)
24 |         with open(ARCHIVE_NAME, 'wb') as archive:
25 |             archive.write(opener.read())
26 | 
27 |     print("Decompressing %s" % ARCHIVE_NAME)
28 |     with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive:
29 |         archive.extractall(path='.')
30 |     os.remove(ARCHIVE_NAME)
31 | 


--------------------------------------------------------------------------------
/sklearn/cluster/tests/common.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Common utilities for testing clustering.
 3 | 
 4 | """
 5 | 
 6 | import numpy as np
 7 | 
 8 | 
 9 | ###############################################################################
10 | # Generate sample data
11 | 
12 | def generate_clustered_data(seed=0, n_clusters=3, n_features=2,
13 |                             n_samples_per_cluster=20, std=.4):
14 |     prng = np.random.RandomState(seed)
15 | 
16 |     # the data is voluntary shifted away from zero to check clustering
17 |     # algorithm robustness with regards to non centered data
18 |     means = np.array([[1, 1, 1, 0],
19 |                       [-1, -1, 0, 1],
20 |                       [1, -1, 1, 1],
21 |                       [-1, 1, 1, 0],
22 |                      ]) + 10
23 | 
24 |     X = np.empty((0, n_features))
25 |     for i in range(n_clusters):
26 |         X = np.r_[X, means[i][:n_features]
27 |                   + std * prng.randn(n_samples_per_cluster, n_features)]
28 |     return X
29 | 


--------------------------------------------------------------------------------
/sklearn/decomposition/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy
 3 | from numpy.distutils.misc_util import Configuration
 4 | 
 5 | 
 6 | def configuration(parent_package="", top_path=None):
 7 |     config = Configuration("decomposition", parent_package, top_path)
 8 | 
 9 |     libraries = []
10 |     if os.name == 'posix':
11 |         libraries.append('m')
12 | 
13 |     config.add_extension("_online_lda",
14 |                          sources=["_online_lda.pyx"],
15 |                          include_dirs=[numpy.get_include()],
16 |                          libraries=libraries)
17 | 
18 |     config.add_extension('cdnmf_fast',
19 |                          sources=['cdnmf_fast.pyx'],
20 |                          include_dirs=[numpy.get_include()],
21 |                          libraries=libraries)
22 | 
23 |     config.add_subpackage("tests")
24 | 
25 |     return config
26 | 
27 | if __name__ == "__main__":
28 |     from numpy.distutils.core import setup
29 |     setup(**configuration().todict())
30 | 


--------------------------------------------------------------------------------
/sklearn/utils/weight_vector.pxd:
--------------------------------------------------------------------------------
 1 | """Efficient (dense) parameter vector implementation for linear models. """
 2 | 
 3 | cimport numpy as np
 4 | 
 5 | 
 6 | cdef extern from "math.h":
 7 |     cdef extern double sqrt(double x)
 8 | 
 9 | 
10 | cdef class WeightVector(object):
11 |     cdef np.ndarray w
12 |     cdef np.ndarray aw
13 |     cdef double *w_data_ptr
14 |     cdef double *aw_data_ptr
15 |     cdef double wscale
16 |     cdef double average_a
17 |     cdef double average_b
18 |     cdef int n_features
19 |     cdef double sq_norm
20 | 
21 |     cdef void add(self,  double *x_data_ptr, int *x_ind_ptr,
22 |                   int xnnz, double c) nogil
23 |     cdef void add_average(self,  double *x_data_ptr, int *x_ind_ptr,
24 |                           int xnnz, double c, double num_iter) nogil
25 |     cdef double dot(self, double *x_data_ptr, int *x_ind_ptr,
26 |                     int xnnz) nogil
27 |     cdef void scale(self, double c) nogil
28 |     cdef void reset_wscale(self) nogil
29 |     cdef double norm(self) nogil
30 | 


--------------------------------------------------------------------------------
/sklearn/externals/joblib/externals/loky/__init__.py:
--------------------------------------------------------------------------------
 1 | r"""The :mod:`loky` module manages a pool of worker that can be re-used across time.
 2 | It provides a robust and dynamic implementation os the
 3 | :class:`ProcessPoolExecutor` and a function :func:`get_reusable_executor` which
 4 | hide the pool management under the hood.
 5 | """
 6 | from ._base import Executor, Future
 7 | from ._base import wait, as_completed
 8 | from ._base import TimeoutError, CancelledError
 9 | from ._base import ALL_COMPLETED, FIRST_COMPLETED, FIRST_EXCEPTION
10 | 
11 | from .backend.context import cpu_count
12 | from .reusable_executor import get_reusable_executor
13 | from .process_executor import BrokenProcessPool, ProcessPoolExecutor
14 | 
15 | 
16 | __all__ = ["get_reusable_executor", "cpu_count", "wait", "as_completed",
17 |            "Future", "Executor", "ProcessPoolExecutor",
18 |            "BrokenProcessPool", "CancelledError", "TimeoutError",
19 |            "FIRST_COMPLETED", "FIRST_EXCEPTION", "ALL_COMPLETED", ]
20 | 
21 | 
22 | __version__ = '2.3.1'
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.so
 3 | *.pyd
 4 | *~
 5 | .#*
 6 | *.lprof
 7 | *.swp
 8 | *.swo
 9 | .DS_Store
10 | build
11 | sklearn/datasets/__config__.py
12 | sklearn/**/*.html
13 | 
14 | dist/
15 | MANIFEST
16 | doc/_build/
17 | doc/auto_examples/
18 | doc/modules/generated/
19 | doc/datasets/generated/
20 | *.pdf
21 | pip-log.txt
22 | scikit_learn.egg-info/
23 | .coverage
24 | coverage
25 | *.py,cover
26 | .tags*
27 | tags
28 | covtype.data.gz
29 | 20news-18828/
30 | 20news-18828.tar.gz
31 | coverages.zip
32 | samples.zip
33 | doc/coverages.zip
34 | doc/samples.zip
35 | coverages
36 | samples
37 | doc/coverages
38 | doc/samples
39 | *.prof
40 | .tox/
41 | .coverage
42 | 
43 | lfw_preprocessed/
44 | nips2010_pdf/
45 | 
46 | *.nt.bz2
47 | *.tar.gz
48 | *.tgz
49 | 
50 | examples/cluster/joblib
51 | reuters/
52 | benchmarks/bench_covertype_data/
53 | 
54 | *.prefs
55 | .pydevproject
56 | .idea
57 | .vscode
58 | 
59 | *.c
60 | *.cpp
61 | 
62 | !*/src/*.c
63 | !*/src/*.cpp
64 | *.sln
65 | *.pyproj
66 | 
67 | # Used by py.test
68 | .cache
69 | .pytest_cache/
70 | _configtest.o.d
71 | 


--------------------------------------------------------------------------------
/doc/whats_new.rst:
--------------------------------------------------------------------------------
 1 | .. currentmodule:: sklearn
 2 | .. include:: includes/big_toc_css.rst
 3 | .. include:: whats_new/_contributors.rst
 4 | 
 5 | Release History
 6 | ===============
 7 | 
 8 | Release notes for current and recent releases are detailed on this page, with
 9 | :ref:`previous releases <previous_releases_whats_new>` linked below.
10 | 
11 | **Tip:** `Subscribe to scikit-learn releases <https://libraries.io/pypi/scikit-learn>`__
12 | on libraries.io to be notified when new versions are released.
13 | 
14 | .. include:: whats_new/v0.21.rst
15 | .. include:: whats_new/v0.20.rst
16 | 
17 | .. _previous_releases_whats_new:
18 | 
19 | Previous Releases
20 | =================
21 | .. toctree::
22 |     :maxdepth: 1
23 | 
24 |     Version 0.19 <whats_new/v0.19.rst>
25 |     Version 0.18 <whats_new/v0.18.rst>
26 |     Version 0.17 <whats_new/v0.17.rst>
27 |     Version 0.16 <whats_new/v0.16.rst>
28 |     Version 0.15 <whats_new/v0.15.rst>
29 |     Version 0.14 <whats_new/v0.14.rst>
30 |     Version 0.13 <whats_new/v0.13.rst>
31 |     Older Versions <whats_new/older_versions.rst>
32 | 


--------------------------------------------------------------------------------
/doc/tutorial/text_analytics/data/twenty_newsgroups/fetch_data.py:
--------------------------------------------------------------------------------
 1 | """Script to download the 20 newsgroups text classification set"""
 2 | 
 3 | import os
 4 | import tarfile
 5 | from contextlib import closing
 6 | 
 7 | try:
 8 |     from urllib import urlopen
 9 | except ImportError:
10 |     from urllib.request import urlopen
11 | 
12 | URL = ("http://people.csail.mit.edu/jrennie/"
13 |        "20Newsgroups/20news-bydate.tar.gz")
14 | 
15 | ARCHIVE_NAME = URL.rsplit('/', 1)[1]
16 | TRAIN_FOLDER = "20news-bydate-train"
17 | TEST_FOLDER = "20news-bydate-test"
18 | 
19 | 
20 | if not os.path.exists(TRAIN_FOLDER) or not os.path.exists(TEST_FOLDER):
21 | 
22 |     if not os.path.exists(ARCHIVE_NAME):
23 |         print("Downloading dataset from %s (14 MB)" % URL)
24 |         opener = urlopen(URL)
25 |         with open(ARCHIVE_NAME, 'wb') as archive:
26 |             archive.write(opener.read())
27 | 
28 |     print("Decompressing %s" % ARCHIVE_NAME)
29 |     with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive:
30 |         archive.extractall(path='.')
31 |     os.remove(ARCHIVE_NAME)
32 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/test_fast_dict.py:
--------------------------------------------------------------------------------
 1 | """ Test fast_dict.
 2 | """
 3 | import numpy as np
 4 | 
 5 | from sklearn.utils.fast_dict import IntFloatDict, argmin
 6 | from sklearn.utils.testing import assert_equal
 7 | from sklearn.externals.six.moves import xrange
 8 | 
 9 | 
10 | def test_int_float_dict():
11 |     rng = np.random.RandomState(0)
12 |     keys = np.unique(rng.randint(100, size=10).astype(np.intp))
13 |     values = rng.rand(len(keys))
14 | 
15 |     d = IntFloatDict(keys, values)
16 |     for key, value in zip(keys, values):
17 |         assert_equal(d[key], value)
18 |     assert_equal(len(d), len(keys))
19 | 
20 |     d.append(120, 3.)
21 |     assert_equal(d[120], 3.0)
22 |     assert_equal(len(d), len(keys) + 1)
23 |     for i in xrange(2000):
24 |         d.append(i + 1000, 4.0)
25 |     assert_equal(d[1100], 4.0)
26 | 
27 | 
28 | def test_int_float_dict_argmin():
29 |     # Test the argmin implementation on the IntFloatDict
30 |     keys = np.arange(100, dtype=np.intp)
31 |     values = np.arange(100, dtype=np.float64)
32 |     d = IntFloatDict(keys, values)
33 |     assert_equal(argmin(d), (0, 0))
34 | 


--------------------------------------------------------------------------------
/examples/feature_selection/plot_rfe_digits.py:
--------------------------------------------------------------------------------
 1 | """
 2 | =============================
 3 | Recursive feature elimination
 4 | =============================
 5 | 
 6 | A recursive feature elimination example showing the relevance of pixels in
 7 | a digit classification task.
 8 | 
 9 | .. note::
10 | 
11 |     See also :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py`
12 | 
13 | """
14 | print(__doc__)
15 | 
16 | from sklearn.svm import SVC
17 | from sklearn.datasets import load_digits
18 | from sklearn.feature_selection import RFE
19 | import matplotlib.pyplot as plt
20 | 
21 | # Load the digits dataset
22 | digits = load_digits()
23 | X = digits.images.reshape((len(digits.images), -1))
24 | y = digits.target
25 | 
26 | # Create the RFE object and rank each pixel
27 | svc = SVC(kernel="linear", C=1)
28 | rfe = RFE(estimator=svc, n_features_to_select=1, step=1)
29 | rfe.fit(X, y)
30 | ranking = rfe.ranking_.reshape(digits.images[0].shape)
31 | 
32 | # Plot pixel ranking
33 | plt.matshow(ranking, cmap=plt.cm.Blues)
34 | plt.colorbar()
35 | plt.title("Ranking of pixels with RFE")
36 | plt.show()
37 | 


--------------------------------------------------------------------------------
/examples/datasets/plot_digits_last_image.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | =========================================================
 6 | The Digit Dataset
 7 | =========================================================
 8 | 
 9 | This dataset is made up of 1797 8x8 images. Each image,
10 | like the one shown below, is of a hand-written digit.
11 | In order to utilize an 8x8 figure like this, we'd have to
12 | first transform it into a feature vector with length 64.
13 | 
14 | See `here
15 | <https://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits>`_
16 | for more information about this dataset.
17 | """
18 | print(__doc__)
19 | 
20 | 
21 | # Code source: Gaël Varoquaux
22 | # Modified for documentation by Jaques Grobler
23 | # License: BSD 3 clause
24 | 
25 | from sklearn import datasets
26 | 
27 | import matplotlib.pyplot as plt
28 | 
29 | #Load the digits dataset
30 | digits = datasets.load_digits()
31 | 
32 | #Display the first digit
33 | plt.figure(1, figsize=(3, 3))
34 | plt.imshow(digits.images[-1], cmap=plt.cm.gray_r, interpolation='nearest')
35 | plt.show()
36 | 


--------------------------------------------------------------------------------
/sklearn/feature_selection/tests/test_variance_threshold.py:
--------------------------------------------------------------------------------
 1 | from sklearn.utils.testing import (assert_array_equal, assert_equal,
 2 |                                    assert_raises)
 3 | 
 4 | from scipy.sparse import bsr_matrix, csc_matrix, csr_matrix
 5 | 
 6 | from sklearn.feature_selection import VarianceThreshold
 7 | 
 8 | data = [[0, 1, 2, 3, 4],
 9 |         [0, 2, 2, 3, 5],
10 |         [1, 1, 2, 4, 0]]
11 | 
12 | 
13 | def test_zero_variance():
14 |     # Test VarianceThreshold with default setting, zero variance.
15 | 
16 |     for X in [data, csr_matrix(data), csc_matrix(data), bsr_matrix(data)]:
17 |         sel = VarianceThreshold().fit(X)
18 |         assert_array_equal([0, 1, 3, 4], sel.get_support(indices=True))
19 | 
20 |     assert_raises(ValueError, VarianceThreshold().fit, [[0, 1, 2, 3]])
21 |     assert_raises(ValueError, VarianceThreshold().fit, [[0, 1], [0, 1]])
22 | 
23 | 
24 | def test_variance_threshold():
25 |     # Test VarianceThreshold with custom variance.
26 |     for X in [data, csr_matrix(data)]:
27 |         X = VarianceThreshold(threshold=.4).fit_transform(X)
28 |         assert_equal((len(data), 1), X.shape)
29 | 


--------------------------------------------------------------------------------
/sklearn/datasets/descr/covtype.rst:
--------------------------------------------------------------------------------
 1 | .. _covtype_dataset:
 2 | 
 3 | Forest covertypes
 4 | -----------------
 5 | 
 6 | The samples in this dataset correspond to 30×30m patches of forest in the US,
 7 | collected for the task of predicting each patch's cover type,
 8 | i.e. the dominant species of tree.
 9 | There are seven covertypes, making this a multiclass classification problem.
10 | Each sample has 54 features, described on the
11 | `dataset's homepage <https://archive.ics.uci.edu/ml/datasets/Covertype>`__.
12 | Some of the features are boolean indicators,
13 | while others are discrete or continuous measurements.
14 | 
15 | **Data Set Characteristics:**
16 | 
17 |     =================   ============
18 |     Classes                        7
19 |     Samples total             581012
20 |     Dimensionality                54
21 |     Features                     int
22 |     =================   ============
23 | 
24 | :func:`sklearn.datasets.fetch_covtype` will load the covertype dataset;
25 | it returns a dictionary-like object
26 | with the feature matrix in the ``data`` member
27 | and the target values in ``target``.
28 | The dataset will be downloaded from the web if necessary.
29 | 


--------------------------------------------------------------------------------
/sklearn/datasets/tests/test_covtype.py:
--------------------------------------------------------------------------------
 1 | """Test the covtype loader.
 2 | 
 3 | Skipped if covtype is not already downloaded to data_home.
 4 | """
 5 | 
 6 | from sklearn.datasets import fetch_covtype
 7 | from sklearn.utils.testing import assert_equal, SkipTest
 8 | from sklearn.datasets.tests.test_common import check_return_X_y
 9 | from functools import partial
10 | 
11 | 
12 | def fetch(*args, **kwargs):
13 |     return fetch_covtype(*args, download_if_missing=False, **kwargs)
14 | 
15 | 
16 | def test_fetch():
17 |     try:
18 |         data1 = fetch(shuffle=True, random_state=42)
19 |     except IOError:
20 |         raise SkipTest("Covertype dataset can not be loaded.")
21 | 
22 |     data2 = fetch(shuffle=True, random_state=37)
23 | 
24 |     X1, X2 = data1['data'], data2['data']
25 |     assert_equal((581012, 54), X1.shape)
26 |     assert_equal(X1.shape, X2.shape)
27 | 
28 |     assert_equal(X1.sum(), X2.sum())
29 | 
30 |     y1, y2 = data1['target'], data2['target']
31 |     assert_equal((X1.shape[0],), y1.shape)
32 |     assert_equal((X1.shape[0],), y2.shape)
33 | 
34 |     # test return_X_y option
35 |     fetch_func = partial(fetch)
36 |     check_return_X_y(data1, fetch_func)
37 | 


--------------------------------------------------------------------------------
/doc/tutorial/text_analytics/solutions/generate_skeletons.py:
--------------------------------------------------------------------------------
 1 | """Generate skeletons from the example code"""
 2 | import os
 3 | 
 4 | exercise_dir = os.path.dirname(__file__)
 5 | if exercise_dir == '':
 6 |     exercise_dir = '.'
 7 | 
 8 | skeleton_dir = os.path.abspath(os.path.join(exercise_dir, '..', 'skeletons'))
 9 | if not os.path.exists(skeleton_dir):
10 |     os.makedirs(skeleton_dir)
11 | 
12 | solutions = os.listdir(exercise_dir)
13 | 
14 | for f in solutions:
15 |     if not f.endswith('.py'):
16 |         continue
17 | 
18 |     if f == os.path.basename(__file__):
19 |         continue
20 | 
21 |     print("Generating skeleton for %s" % f)
22 | 
23 |     input_file = open(os.path.join(exercise_dir, f))
24 |     output_file = open(os.path.join(skeleton_dir, f), 'w')
25 | 
26 |     in_exercise_region = False
27 | 
28 |     for line in input_file:
29 |         linestrip = line.strip()
30 |         if len(linestrip) == 0:
31 |             in_exercise_region = False
32 |         elif linestrip.startswith('# TASK:'):
33 |             in_exercise_region = True
34 | 
35 |         if not in_exercise_region or linestrip.startswith('#'):
36 |             output_file.write(line)
37 | 
38 |     output_file.close()
39 | 


--------------------------------------------------------------------------------
/sklearn/neighbors/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.neighbors` module implements the k-nearest neighbors
 3 | algorithm.
 4 | """
 5 | 
 6 | from .ball_tree import BallTree
 7 | from .kd_tree import KDTree
 8 | from .dist_metrics import DistanceMetric
 9 | from .graph import kneighbors_graph, radius_neighbors_graph
10 | from .unsupervised import NearestNeighbors
11 | from .classification import KNeighborsClassifier, RadiusNeighborsClassifier
12 | from .regression import KNeighborsRegressor, RadiusNeighborsRegressor
13 | from .nearest_centroid import NearestCentroid
14 | from .kde import KernelDensity
15 | from .lof import LocalOutlierFactor
16 | from .base import VALID_METRICS, VALID_METRICS_SPARSE
17 | 
18 | __all__ = ['BallTree',
19 |            'DistanceMetric',
20 |            'KDTree',
21 |            'KNeighborsClassifier',
22 |            'KNeighborsRegressor',
23 |            'NearestCentroid',
24 |            'NearestNeighbors',
25 |            'RadiusNeighborsClassifier',
26 |            'RadiusNeighborsRegressor',
27 |            'kneighbors_graph',
28 |            'radius_neighbors_graph',
29 |            'KernelDensity',
30 |            'LocalOutlierFactor',
31 |            'VALID_METRICS',
32 |            'VALID_METRICS_SPARSE']
33 | 


--------------------------------------------------------------------------------
/examples/exercises/plot_digits_classification_exercise.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ================================
 3 | Digits Classification Exercise
 4 | ================================
 5 | 
 6 | A tutorial exercise regarding the use of classification techniques on
 7 | the Digits dataset.
 8 | 
 9 | This exercise is used in the :ref:`clf_tut` part of the
10 | :ref:`supervised_learning_tut` section of the
11 | :ref:`stat_learn_tut_index`.
12 | """
13 | print(__doc__)
14 | 
15 | from sklearn import datasets, neighbors, linear_model
16 | 
17 | digits = datasets.load_digits()
18 | X_digits = digits.data / digits.data.max()
19 | y_digits = digits.target
20 | 
21 | n_samples = len(X_digits)
22 | 
23 | X_train = X_digits[:int(.9 * n_samples)]
24 | y_train = y_digits[:int(.9 * n_samples)]
25 | X_test = X_digits[int(.9 * n_samples):]
26 | y_test = y_digits[int(.9 * n_samples):]
27 | 
28 | knn = neighbors.KNeighborsClassifier()
29 | logistic = linear_model.LogisticRegression(solver='lbfgs', max_iter=1000,
30 |                                            multi_class='multinomial')
31 | 
32 | print('KNN score: %f' % knn.fit(X_train, y_train).score(X_test, y_test))
33 | print('LogisticRegression score: %f'
34 |       % logistic.fit(X_train, y_train).score(X_test, y_test))
35 | 


--------------------------------------------------------------------------------
/examples/feature_selection/plot_feature_selection_pipeline.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ==================
 3 | Pipeline Anova SVM
 4 | ==================
 5 | 
 6 | Simple usage of Pipeline that runs successively a univariate
 7 | feature selection with anova and then a C-SVM of the selected features.
 8 | """
 9 | from sklearn import svm
10 | from sklearn.datasets import samples_generator
11 | from sklearn.feature_selection import SelectKBest, f_regression
12 | from sklearn.pipeline import make_pipeline
13 | from sklearn.model_selection import train_test_split
14 | from sklearn.metrics import classification_report
15 | 
16 | print(__doc__)
17 | 
18 | # import some data to play with
19 | X, y = samples_generator.make_classification(
20 |     n_features=20, n_informative=3, n_redundant=0, n_classes=4,
21 |     n_clusters_per_class=2)
22 | 
23 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
24 | 
25 | # ANOVA SVM-C
26 | # 1) anova filter, take 3 best ranked features
27 | anova_filter = SelectKBest(f_regression, k=3)
28 | # 2) svm
29 | clf = svm.SVC(kernel='linear')
30 | 
31 | anova_svm = make_pipeline(anova_filter, clf)
32 | anova_svm.fit(X_train, y_train)
33 | y_pred = anova_svm.predict(X_test)
34 | print(classification_report(y_test, y_pred))
35 | 


--------------------------------------------------------------------------------
/sklearn/metrics/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path
 3 | 
 4 | import numpy
 5 | from numpy.distutils.misc_util import Configuration
 6 | 
 7 | from sklearn._build_utils import get_blas_info
 8 | 
 9 | 
10 | def configuration(parent_package="", top_path=None):
11 |     config = Configuration("metrics", parent_package, top_path)
12 | 
13 |     cblas_libs, blas_info = get_blas_info()
14 |     if os.name == 'posix':
15 |         cblas_libs.append('m')
16 | 
17 |     config.add_subpackage('cluster')
18 |     config.add_extension("pairwise_fast",
19 |                          sources=["pairwise_fast.pyx"],
20 |                          include_dirs=[os.path.join('..', 'src', 'cblas'),
21 |                                        numpy.get_include(),
22 |                                        blas_info.pop('include_dirs', [])],
23 |                          libraries=cblas_libs,
24 |                          extra_compile_args=blas_info.pop('extra_compile_args',
25 |                                                           []),
26 |                          **blas_info)
27 |     config.add_subpackage('tests')
28 | 
29 |     return config
30 | 
31 | if __name__ == "__main__":
32 |     from numpy.distutils.core import setup
33 |     setup(**configuration().todict())
34 | 


--------------------------------------------------------------------------------
/examples/linear_model/plot_lasso_lars.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | =====================
 4 | Lasso path using LARS
 5 | =====================
 6 | 
 7 | Computes Lasso Path along the regularization parameter using the LARS
 8 | algorithm on the diabetes dataset. Each color represents a different
 9 | feature of the coefficient vector, and this is displayed as a function
10 | of the regularization parameter.
11 | 
12 | """
13 | print(__doc__)
14 | 
15 | # Author: Fabian Pedregosa <fabian.pedregosa@inria.fr>
16 | #         Alexandre Gramfort <alexandre.gramfort@inria.fr>
17 | # License: BSD 3 clause
18 | 
19 | import numpy as np
20 | import matplotlib.pyplot as plt
21 | 
22 | from sklearn import linear_model
23 | from sklearn import datasets
24 | 
25 | diabetes = datasets.load_diabetes()
26 | X = diabetes.data
27 | y = diabetes.target
28 | 
29 | print("Computing regularization path using the LARS ...")
30 | _, _, coefs = linear_model.lars_path(X, y, method='lasso', verbose=True)
31 | 
32 | xx = np.sum(np.abs(coefs.T), axis=1)
33 | xx /= xx[-1]
34 | 
35 | plt.plot(xx, coefs.T)
36 | ymin, ymax = plt.ylim()
37 | plt.vlines(xx, ymin, ymax, linestyle='dashed')
38 | plt.xlabel('|coef| / max|coef|')
39 | plt.ylabel('Coefficients')
40 | plt.title('LASSO Path')
41 | plt.axis('tight')
42 | plt.show()
43 | 


--------------------------------------------------------------------------------
/doc/includes/bigger_toc_css.rst:
--------------------------------------------------------------------------------
 1 | ..  
 2 |     File to ..include in a document with a very big table of content, to 
 3 |     give it 'style'
 4 | 
 5 | .. raw:: html
 6 | 
 7 |   <style type="text/css">
 8 |     div.bodywrapper blockquote {
 9 |         margin: 0 ;
10 |     }
11 | 
12 |     div.toctree-wrapper ul {
13 | 	margin: 0 ;
14 | 	padding-left: 0px ;
15 |     }
16 | 
17 |     li.toctree-l1 {
18 |         padding: 0 ;
19 |         list-style-type: none;
20 |         font-size: 150% ;
21 | 	font-family: Arial, sans-serif;
22 | 	background-color: #BED4EB;
23 | 	font-weight: normal;
24 | 	color: #212224;
25 | 	margin-left : 0;
26 | 	font-weight: bold;
27 |         }
28 | 
29 |     li.toctree-l1 a {
30 |         padding: 0 0 0 10px ;
31 |     }
32 |  
33 |     li.toctree-l2 {
34 |         padding: 0.25em 0 0.25em 0 ;
35 |         list-style-type: none;
36 | 	background-color: #FFFFFF;
37 |         font-size: 90% ;
38 | 	font-weight: bold;
39 |         }
40 | 
41 |     li.toctree-l2 ul {
42 | 	padding-left: 40px ;
43 |     }
44 | 
45 |     li.toctree-l3 {
46 |         font-size: 70% ;
47 |         list-style-type: none;
48 | 	font-weight: normal;
49 |         }
50 | 
51 |     li.toctree-l4 {
52 |         font-size: 85% ;
53 |         list-style-type: none;
54 | 	font-weight: normal;
55 |         }
56 |  
57 |   </style>
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/examples/svm/plot_svm_nonlinear.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ==============
 3 | Non-linear SVM
 4 | ==============
 5 | 
 6 | Perform binary classification using non-linear SVC
 7 | with RBF kernel. The target to predict is a XOR of the
 8 | inputs.
 9 | 
10 | The color map illustrates the decision function learned by the SVC.
11 | """
12 | print(__doc__)
13 | 
14 | import numpy as np
15 | import matplotlib.pyplot as plt
16 | from sklearn import svm
17 | 
18 | xx, yy = np.meshgrid(np.linspace(-3, 3, 500),
19 |                      np.linspace(-3, 3, 500))
20 | np.random.seed(0)
21 | X = np.random.randn(300, 2)
22 | Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0)
23 | 
24 | # fit the model
25 | clf = svm.NuSVC()
26 | clf.fit(X, Y)
27 | 
28 | # plot the decision function for each datapoint on the grid
29 | Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
30 | Z = Z.reshape(xx.shape)
31 | 
32 | plt.imshow(Z, interpolation='nearest',
33 |            extent=(xx.min(), xx.max(), yy.min(), yy.max()), aspect='auto',
34 |            origin='lower', cmap=plt.cm.PuOr_r)
35 | contours = plt.contour(xx, yy, Z, levels=[0], linewidths=2,
36 |                        linetypes='--')
37 | plt.scatter(X[:, 0], X[:, 1], s=30, c=Y, cmap=plt.cm.Paired,
38 |             edgecolors='k')
39 | plt.xticks(())
40 | plt.yticks(())
41 | plt.axis([-3, 3, -3, 3])
42 | plt.show()
43 | 


--------------------------------------------------------------------------------
/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | Thanks for contributing a pull request! Please ensure you have taken a look at
 3 | the contribution guidelines: https://github.com/scikit-learn/scikit-learn/blob/master/CONTRIBUTING.md#pull-request-checklist
 4 | -->
 5 | 
 6 | #### Reference Issues/PRs
 7 | <!--
 8 | Example: Fixes #1234. See also #3456.
 9 | Please use keywords (e.g., Fixes) to create link to the issues or pull requests
10 | you resolved, so that they will automatically be closed when your pull request
11 | is merged. See https://github.com/blog/1506-closing-issues-via-pull-requests
12 | -->
13 | 
14 | 
15 | #### What does this implement/fix? Explain your changes.
16 | 
17 | 
18 | #### Any other comments?
19 | 
20 | 
21 | <!--
22 | Please be aware that we are a loose team of volunteers so patience is
23 | necessary; assistance handling other issues is very welcome. We value
24 | all user contributions, no matter how minor they are. If we are slow to
25 | review, either the pull request needs some benchmarking, tinkering,
26 | convincing, etc. or more likely the reviewers are simply busy. In either
27 | case, we ask for your understanding during the review process.
28 | For more information, see our FAQ on this topic:
29 | http://scikit-learn.org/dev/faq.html#why-is-my-pull-request-not-getting-any-attention.
30 | 
31 | Thanks for contributing!
32 | -->
33 | 


--------------------------------------------------------------------------------
/sklearn/utils/src/MurmurHash3.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public
 3 | // domain. The author hereby disclaims copyright to this source code.
 4 | 
 5 | #ifndef _MURMURHASH3_H_
 6 | #define _MURMURHASH3_H_
 7 | 
 8 | //-----------------------------------------------------------------------------
 9 | // Platform-specific functions and macros
10 | 
11 | // Microsoft Visual Studio
12 | 
13 | #if defined(_MSC_VER)
14 | 
15 | typedef unsigned char uint8_t;
16 | typedef unsigned long uint32_t;
17 | typedef unsigned __int64 uint64_t;
18 | 
19 | // Other compilers
20 | 
21 | #else	// defined(_MSC_VER)
22 | 
23 | #include <stdint.h>
24 | 
25 | #endif // !defined(_MSC_VER)
26 | 
27 | //-----------------------------------------------------------------------------
28 | #ifdef __cplusplus
29 | extern "C" {
30 | #endif
31 | 
32 | 
33 | void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
34 | 
35 | void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
36 | 
37 | void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
38 | 
39 | #ifdef __cplusplus
40 | }
41 | #endif
42 | 
43 | //-----------------------------------------------------------------------------
44 | 
45 | #endif // _MURMURHASH3_H_
46 | 


--------------------------------------------------------------------------------
/sklearn/decomposition/cdnmf_fast.pyx:
--------------------------------------------------------------------------------
 1 | # cython: cdivision=True
 2 | # cython: boundscheck=False
 3 | # cython: wraparound=False
 4 | 
 5 | # Author: Mathieu Blondel, Tom Dupre la Tour
 6 | # License: BSD 3 clause
 7 | 
 8 | cimport cython
 9 | from libc.math cimport fabs
10 | 
11 | 
12 | def _update_cdnmf_fast(double[:, ::1] W, double[:, :] HHt, double[:, :] XHt,
13 |                        Py_ssize_t[::1] permutation):
14 |     cdef double violation = 0
15 |     cdef Py_ssize_t n_components = W.shape[1]
16 |     cdef Py_ssize_t n_samples = W.shape[0]  # n_features for H update
17 |     cdef double grad, pg, hess
18 |     cdef Py_ssize_t i, r, s, t
19 | 
20 |     with nogil:
21 |         for s in range(n_components):
22 |             t = permutation[s]
23 | 
24 |             for i in range(n_samples):
25 |                 # gradient = GW[t, i] where GW = np.dot(W, HHt) - XHt
26 |                 grad = -XHt[i, t]
27 | 
28 |                 for r in range(n_components):
29 |                     grad += HHt[t, r] * W[i, r]
30 | 
31 |                 # projected gradient
32 |                 pg = min(0., grad) if W[i, t] == 0 else grad
33 |                 violation += fabs(pg)
34 | 
35 |                 # Hessian
36 |                 hess = HHt[t, t]
37 | 
38 |                 if hess != 0:
39 |                     W[i, t] = max(W[i, t] - grad / hess, 0.)
40 |                 
41 |     return violation
42 | 


--------------------------------------------------------------------------------
/sklearn/datasets/descr/diabetes.rst:
--------------------------------------------------------------------------------
 1 | .. _diabetes_dataset:
 2 | 
 3 | Diabetes dataset
 4 | ----------------
 5 | 
 6 | Ten baseline variables, age, sex, body mass index, average blood
 7 | pressure, and six blood serum measurements were obtained for each of n =
 8 | 442 diabetes patients, as well as the response of interest, a
 9 | quantitative measure of disease progression one year after baseline.
10 | 
11 | **Data Set Characteristics:**
12 | 
13 |   :Number of Instances: 442
14 | 
15 |   :Number of Attributes: First 10 columns are numeric predictive values
16 | 
17 |   :Target: Column 11 is a quantitative measure of disease progression one year after baseline
18 | 
19 |   :Attribute Information:
20 |       - Age
21 |       - Sex
22 |       - Body mass index
23 |       - Average blood pressure
24 |       - S1
25 |       - S2
26 |       - S3
27 |       - S4
28 |       - S5
29 |       - S6
30 | 
31 | Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).
32 | 
33 | Source URL:
34 | https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html
35 | 
36 | For more information see:
37 | Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) "Least Angle Regression," Annals of Statistics (with discussion), 407-499.
38 | (https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)


--------------------------------------------------------------------------------
/doc/tutorial/statistical_inference/finding_help.rst:
--------------------------------------------------------------------------------
 1 | Finding help
 2 | ============
 3 | 
 4 | 
 5 | The project mailing list
 6 | ------------------------
 7 | 
 8 | If you encounter a bug with ``scikit-learn`` or something that needs
 9 | clarification in the docstring or the online documentation, please feel free to
10 | ask on the `Mailing List <http://scikit-learn.org/stable/support.html>`_
11 | 
12 | 
13 | Q&A communities with Machine Learning practitioners
14 | ----------------------------------------------------
15 | 
16 |   :Quora.com:
17 | 
18 |     Quora has a topic for Machine Learning related questions that
19 |     also features some interesting discussions:
20 |     https://www.quora.com/topic/Machine-Learning
21 | 
22 |   :Stack Exchange:
23 | 
24 |     The Stack Exchange family of sites hosts `multiple subdomains for Machine Learning questions`_.
25 | 
26 | .. _`How do I learn machine learning?`: https://www.quora.com/How-do-I-learn-machine-learning-1
27 | 
28 | .. _`multiple subdomains for Machine Learning questions`: https://meta.stackexchange.com/q/130524
29 | 
30 | -- _'An excellent free online course for Machine Learning taught by Professor Andrew Ng of Stanford': https://www.coursera.org/learn/machine-learning
31 | 
32 | -- _'Another excellent free online course that takes a more general approach to Artificial Intelligence': https://www.udacity.com/course/intro-to-artificial-intelligence--cs271
33 | 


--------------------------------------------------------------------------------
/sklearn/externals/joblib/externals/loky/backend/fork_exec.py:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Launch a subprocess using forkexec and make sure only the needed fd are
 3 | # shared in the two process.
 4 | #
 5 | # author: Thomas Moreau and Olivier Grisel
 6 | #
 7 | import os
 8 | import sys
 9 | 
10 | if sys.platform == "darwin" and sys.version_info < (3, 3):
11 |     FileNotFoundError = OSError
12 | 
13 | 
14 | def close_fds(keep_fds):  # pragma: no cover
15 |     """Close all the file descriptors except those in keep_fds."""
16 | 
17 |     # Make sure to keep stdout and stderr open for logging purpose
18 |     keep_fds = set(keep_fds).union([1, 2])
19 | 
20 |     # We try to retrieve all the open fds
21 |     try:
22 |         open_fds = set(int(fd) for fd in os.listdir('/proc/self/fd'))
23 |     except FileNotFoundError:
24 |         import resource
25 |         max_nfds = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
26 |         open_fds = set(fd for fd in range(3, max_nfds))
27 |         open_fds.add(0)
28 | 
29 |     for i in open_fds - keep_fds:
30 |         try:
31 |             os.close(i)
32 |         except OSError:
33 |             pass
34 | 
35 | 
36 | def fork_exec(cmd, keep_fds):
37 | 
38 |     pid = os.fork()
39 |     if pid == 0:  # pragma: no cover
40 |         close_fds(keep_fds)
41 |         os.execv(sys.executable, cmd)
42 |     else:
43 |         return pid
44 | 


--------------------------------------------------------------------------------
/examples/exercises/plot_cv_digits.py:
--------------------------------------------------------------------------------
 1 | """
 2 | =============================================
 3 | Cross-validation on Digits Dataset Exercise
 4 | =============================================
 5 | 
 6 | A tutorial exercise using Cross-validation with an SVM on the Digits dataset.
 7 | 
 8 | This exercise is used in the :ref:`cv_generators_tut` part of the
 9 | :ref:`model_selection_tut` section of the :ref:`stat_learn_tut_index`.
10 | """
11 | print(__doc__)
12 | 
13 | 
14 | import numpy as np
15 | from sklearn.model_selection import cross_val_score
16 | from sklearn import datasets, svm
17 | 
18 | digits = datasets.load_digits()
19 | X = digits.data
20 | y = digits.target
21 | 
22 | svc = svm.SVC(kernel='linear')
23 | C_s = np.logspace(-10, 0, 10)
24 | 
25 | scores = list()
26 | scores_std = list()
27 | for C in C_s:
28 |     svc.C = C
29 |     this_scores = cross_val_score(svc, X, y, cv=5, n_jobs=1)
30 |     scores.append(np.mean(this_scores))
31 |     scores_std.append(np.std(this_scores))
32 | 
33 | # Do the plotting
34 | import matplotlib.pyplot as plt
35 | plt.figure(1, figsize=(4, 3))
36 | plt.clf()
37 | plt.semilogx(C_s, scores)
38 | plt.semilogx(C_s, np.array(scores) + np.array(scores_std), 'b--')
39 | plt.semilogx(C_s, np.array(scores) - np.array(scores_std), 'b--')
40 | locs, labels = plt.yticks()
41 | plt.yticks(locs, list(map(lambda x: "%g" % x, locs)))
42 | plt.ylabel('CV score')
43 | plt.xlabel('Parameter C')
44 | plt.ylim(0, 1.1)
45 | plt.show()
46 | 


--------------------------------------------------------------------------------
/examples/linear_model/plot_sgd_loss_functions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ==========================
 3 | SGD: convex loss functions
 4 | ==========================
 5 | 
 6 | A plot that compares the various convex loss functions supported by
 7 | :class:`sklearn.linear_model.SGDClassifier` .
 8 | """
 9 | print(__doc__)
10 | 
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | 
14 | 
15 | def modified_huber_loss(y_true, y_pred):
16 |     z = y_pred * y_true
17 |     loss = -4 * z
18 |     loss[z >= -1] = (1 - z[z >= -1]) ** 2
19 |     loss[z >= 1.] = 0
20 |     return loss
21 | 
22 | 
23 | xmin, xmax = -4, 4
24 | xx = np.linspace(xmin, xmax, 100)
25 | lw = 2
26 | plt.plot([xmin, 0, 0, xmax], [1, 1, 0, 0], color='gold', lw=lw,
27 |          label="Zero-one loss")
28 | plt.plot(xx, np.where(xx < 1, 1 - xx, 0), color='teal', lw=lw,
29 |          label="Hinge loss")
30 | plt.plot(xx, -np.minimum(xx, 0), color='yellowgreen', lw=lw,
31 |          label="Perceptron loss")
32 | plt.plot(xx, np.log2(1 + np.exp(-xx)), color='cornflowerblue', lw=lw,
33 |          label="Log loss")
34 | plt.plot(xx, np.where(xx < 1, 1 - xx, 0) ** 2, color='orange', lw=lw,
35 |          label="Squared hinge loss")
36 | plt.plot(xx, modified_huber_loss(xx, 1), color='darkorchid', lw=lw,
37 |          linestyle='--', label="Modified Huber loss")
38 | plt.ylim((0, 8))
39 | plt.legend(loc="upper right")
40 | plt.xlabel(r"Decision function $f(x)$")
41 | plt.ylabel("$L(y=1, f(x))$")
42 | plt.show()
43 | 


--------------------------------------------------------------------------------
/sklearn/manifold/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from os.path import join
 3 | 
 4 | import numpy
 5 | from numpy.distutils.misc_util import Configuration
 6 | from sklearn._build_utils import get_blas_info
 7 | 
 8 | 
 9 | def configuration(parent_package="", top_path=None):
10 |     config = Configuration("manifold", parent_package, top_path)
11 |     libraries = []
12 |     if os.name == 'posix':
13 |         libraries.append('m')
14 |     config.add_extension("_utils",
15 |                          sources=["_utils.pyx"],
16 |                          include_dirs=[numpy.get_include()],
17 |                          libraries=libraries,
18 |                          extra_compile_args=["-O3"])
19 |     cblas_libs, blas_info = get_blas_info()
20 |     eca = blas_info.pop('extra_compile_args', [])
21 |     eca.append("-O4")
22 |     config.add_extension("_barnes_hut_tsne",
23 |                          libraries=cblas_libs,
24 |                          sources=["_barnes_hut_tsne.pyx"],
25 |                          include_dirs=[join('..', 'src', 'cblas'),
26 |                                        numpy.get_include(),
27 |                                        blas_info.pop('include_dirs', [])],
28 |                          extra_compile_args=eca, **blas_info)
29 | 
30 |     config.add_subpackage('tests')
31 | 
32 |     return config
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     from numpy.distutils.core import setup
37 |     setup(**configuration().todict())
38 | 


--------------------------------------------------------------------------------
/doc/data_transforms.rst:
--------------------------------------------------------------------------------
 1 | .. include:: includes/big_toc_css.rst
 2 | 
 3 | .. _data-transforms:
 4 | 
 5 | Dataset transformations
 6 | -----------------------
 7 | 
 8 | scikit-learn provides a library of transformers, which may clean (see
 9 | :ref:`preprocessing`), reduce (see :ref:`data_reduction`), expand (see
10 | :ref:`kernel_approximation`) or generate (see :ref:`feature_extraction`)
11 | feature representations.
12 | 
13 | Like other estimators, these are represented by classes with a ``fit`` method,
14 | which learns model parameters (e.g. mean and standard deviation for
15 | normalization) from a training set, and a ``transform`` method which applies
16 | this transformation model to unseen data. ``fit_transform`` may be more
17 | convenient and efficient for modelling and transforming the training data
18 | simultaneously.
19 | 
20 | Combining such transformers, either in parallel or series is covered in
21 | :ref:`combining_estimators`. :ref:`metrics` covers transforming feature
22 | spaces into affinity matrices, while :ref:`preprocessing_targets` considers
23 | transformations of the target space (e.g. categorical labels) for use in
24 | scikit-learn.
25 | 
26 | .. toctree::
27 | 
28 |     modules/compose
29 |     modules/feature_extraction
30 |     modules/preprocessing
31 |     modules/impute
32 |     modules/unsupervised_reduction
33 |     modules/random_projection
34 |     modules/kernel_approximation
35 |     modules/metrics
36 |     modules/preprocessing_targets
37 | 


--------------------------------------------------------------------------------
/benchmarks/bench_plot_ward.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Benchmark scikit-learn's Ward implement compared to SciPy's
 3 | """
 4 | 
 5 | import time
 6 | 
 7 | import numpy as np
 8 | from scipy.cluster import hierarchy
 9 | import matplotlib.pyplot as plt
10 | 
11 | from sklearn.cluster import AgglomerativeClustering
12 | 
13 | ward = AgglomerativeClustering(n_clusters=3, linkage='ward')
14 | 
15 | n_samples = np.logspace(.5, 3, 9)
16 | n_features = np.logspace(1, 3.5, 7)
17 | N_samples, N_features = np.meshgrid(n_samples,
18 |                                     n_features)
19 | scikits_time = np.zeros(N_samples.shape)
20 | scipy_time = np.zeros(N_samples.shape)
21 | 
22 | for i, n in enumerate(n_samples):
23 |     for j, p in enumerate(n_features):
24 |         X = np.random.normal(size=(n, p))
25 |         t0 = time.time()
26 |         ward.fit(X)
27 |         scikits_time[j, i] = time.time() - t0
28 |         t0 = time.time()
29 |         hierarchy.ward(X)
30 |         scipy_time[j, i] = time.time() - t0
31 | 
32 | ratio = scikits_time / scipy_time
33 | 
34 | plt.figure("scikit-learn Ward's method benchmark results")
35 | plt.imshow(np.log(ratio), aspect='auto', origin="lower")
36 | plt.colorbar()
37 | plt.contour(ratio, levels=[1, ], colors='k')
38 | plt.yticks(range(len(n_features)), n_features.astype(np.int))
39 | plt.ylabel('N features')
40 | plt.xticks(range(len(n_samples)), n_samples.astype(np.int))
41 | plt.xlabel('N samples')
42 | plt.title("Scikit's time, in units of scipy time (log)")
43 | plt.show()
44 | 


--------------------------------------------------------------------------------
/sklearn/covariance/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.covariance` module includes methods and algorithms to
 3 | robustly estimate the covariance of features given a set of points. The
 4 | precision matrix defined as the inverse of the covariance is also estimated.
 5 | Covariance estimation is closely related to the theory of Gaussian Graphical
 6 | Models.
 7 | """
 8 | 
 9 | from .empirical_covariance_ import empirical_covariance, EmpiricalCovariance, \
10 |     log_likelihood
11 | from .shrunk_covariance_ import shrunk_covariance, ShrunkCovariance, \
12 |     ledoit_wolf, ledoit_wolf_shrinkage, \
13 |     LedoitWolf, oas, OAS
14 | from .robust_covariance import fast_mcd, MinCovDet
15 | from .graph_lasso_ import graph_lasso, GraphLasso, GraphLassoCV,\
16 |     graphical_lasso, GraphicalLasso, GraphicalLassoCV
17 | from .elliptic_envelope import EllipticEnvelope
18 | 
19 | 
20 | __all__ = ['EllipticEnvelope',
21 |            'EmpiricalCovariance',
22 |            'GraphLasso',
23 |            'GraphLassoCV',
24 |            'GraphicalLasso',
25 |            'GraphicalLassoCV',
26 |            'LedoitWolf',
27 |            'MinCovDet',
28 |            'OAS',
29 |            'ShrunkCovariance',
30 |            'empirical_covariance',
31 |            'fast_mcd',
32 |            'graph_lasso',
33 |            'graphical_lasso',
34 |            'ledoit_wolf',
35 |            'ledoit_wolf_shrinkage',
36 |            'log_likelihood',
37 |            'oas',
38 |            'shrunk_covariance']
39 | 


--------------------------------------------------------------------------------
/examples/linear_model/plot_sgd_separating_hyperplane.py:
--------------------------------------------------------------------------------
 1 | """
 2 | =========================================
 3 | SGD: Maximum margin separating hyperplane
 4 | =========================================
 5 | 
 6 | Plot the maximum margin separating hyperplane within a two-class
 7 | separable dataset using a linear Support Vector Machines classifier
 8 | trained using SGD.
 9 | """
10 | print(__doc__)
11 | 
12 | import numpy as np
13 | import matplotlib.pyplot as plt
14 | from sklearn.linear_model import SGDClassifier
15 | from sklearn.datasets.samples_generator import make_blobs
16 | 
17 | # we create 50 separable points
18 | X, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60)
19 | 
20 | # fit the model
21 | clf = SGDClassifier(loss="hinge", alpha=0.01, max_iter=200, fit_intercept=True)
22 | clf.fit(X, Y)
23 | 
24 | # plot the line, the points, and the nearest vectors to the plane
25 | xx = np.linspace(-1, 5, 10)
26 | yy = np.linspace(-1, 5, 10)
27 | 
28 | X1, X2 = np.meshgrid(xx, yy)
29 | Z = np.empty(X1.shape)
30 | for (i, j), val in np.ndenumerate(X1):
31 |     x1 = val
32 |     x2 = X2[i, j]
33 |     p = clf.decision_function([[x1, x2]])
34 |     Z[i, j] = p[0]
35 | levels = [-1.0, 0.0, 1.0]
36 | linestyles = ['dashed', 'solid', 'dashed']
37 | colors = 'k'
38 | plt.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles)
39 | plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired,
40 |             edgecolor='black', s=20)
41 | 
42 | plt.axis('tight')
43 | plt.show()
44 | 


--------------------------------------------------------------------------------
/sklearn/cluster/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.cluster` module gathers popular unsupervised clustering
 3 | algorithms.
 4 | """
 5 | 
 6 | from .spectral import spectral_clustering, SpectralClustering
 7 | from .mean_shift_ import (mean_shift, MeanShift,
 8 |                           estimate_bandwidth, get_bin_seeds)
 9 | from .affinity_propagation_ import affinity_propagation, AffinityPropagation
10 | from .hierarchical import (ward_tree, AgglomerativeClustering, linkage_tree,
11 |                            FeatureAgglomeration)
12 | from .k_means_ import k_means, KMeans, MiniBatchKMeans
13 | from .dbscan_ import dbscan, DBSCAN
14 | from .optics_ import OPTICS, optics
15 | from .bicluster import SpectralBiclustering, SpectralCoclustering
16 | from .birch import Birch
17 | 
18 | __all__ = ['AffinityPropagation',
19 |            'AgglomerativeClustering',
20 |            'Birch',
21 |            'DBSCAN',
22 |            'OPTICS',
23 |            'KMeans',
24 |            'FeatureAgglomeration',
25 |            'MeanShift',
26 |            'MiniBatchKMeans',
27 |            'SpectralClustering',
28 |            'affinity_propagation',
29 |            'dbscan',
30 |            'estimate_bandwidth',
31 |            'get_bin_seeds',
32 |            'k_means',
33 |            'linkage_tree',
34 |            'mean_shift',
35 |            'optics',
36 |            'spectral_clustering',
37 |            'ward_tree',
38 |            'SpectralBiclustering',
39 |            'SpectralCoclustering']
40 | 


--------------------------------------------------------------------------------
/sklearn/svm/liblinear.pxd:
--------------------------------------------------------------------------------
 1 | cimport numpy as np
 2 | 
 3 | 
 4 | cdef extern from "src/liblinear/linear.h":
 5 |     cdef struct feature_node
 6 |     cdef struct problem
 7 |     cdef struct model
 8 |     cdef struct parameter
 9 |     ctypedef problem* problem_const_ptr "problem const *"
10 |     ctypedef parameter* parameter_const_ptr "parameter const *"
11 |     ctypedef char* char_const_ptr "char const *"
12 |     char_const_ptr check_parameter(problem_const_ptr prob, parameter_const_ptr param)
13 |     model *train(problem_const_ptr prob, parameter_const_ptr param) nogil
14 |     int get_nr_feature (model *model)
15 |     int get_nr_class (model *model)
16 |     void get_n_iter (model *model, int *n_iter)
17 |     void free_and_destroy_model (model **)
18 |     void destroy_param (parameter *)
19 | 
20 | cdef extern from "src/liblinear/liblinear_helper.c":
21 |     void copy_w(void *, model *, int)
22 |     parameter *set_parameter(int, double, double, int, char *, char *, int, int, double)
23 |     problem *set_problem (char *, char *, np.npy_intp *, double, char *)
24 |     problem *csr_set_problem (char *values, np.npy_intp *n_indices,
25 |         char *indices, np.npy_intp *n_indptr, char *indptr, char *Y,
26 |         np.npy_intp n_features, double bias, char *)
27 | 
28 |     model *set_model(parameter *, char *, np.npy_intp *, char *, double)
29 | 
30 |     double get_bias(model *)
31 |     void free_problem (problem *)
32 |     void free_parameter (parameter *)
33 |     void set_verbosity(int)
34 | 


--------------------------------------------------------------------------------
/benchmarks/bench_plot_parallel_pairwise.py:
--------------------------------------------------------------------------------
 1 | # Author: Mathieu Blondel <mathieu@mblondel.org>
 2 | # License: BSD 3 clause
 3 | import time
 4 | 
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | from sklearn.utils import check_random_state
 8 | from sklearn.metrics.pairwise import pairwise_distances
 9 | from sklearn.metrics.pairwise import pairwise_kernels
10 | 
11 | def plot(func):
12 |     random_state = check_random_state(0)
13 |     one_core = []
14 |     multi_core = []
15 |     sample_sizes = range(1000, 6000, 1000)
16 | 
17 |     for n_samples in sample_sizes:
18 |         X = random_state.rand(n_samples, 300)
19 | 
20 |         start = time.time()
21 |         func(X, n_jobs=1)
22 |         one_core.append(time.time() - start)
23 | 
24 |         start = time.time()
25 |         func(X, n_jobs=-1)
26 |         multi_core.append(time.time() - start)
27 | 
28 |     plt.figure('scikit-learn parallel %s benchmark results' % func.__name__)
29 |     plt.plot(sample_sizes, one_core, label="one core")
30 |     plt.plot(sample_sizes, multi_core, label="multi core")
31 |     plt.xlabel('n_samples')
32 |     plt.ylabel('Time (s)')
33 |     plt.title('Parallel %s' % func.__name__)
34 |     plt.legend()
35 | 
36 | 
37 | def euclidean_distances(X, n_jobs):
38 |     return pairwise_distances(X, metric="euclidean", n_jobs=n_jobs)
39 | 
40 | 
41 | def rbf_kernels(X, n_jobs):
42 |     return pairwise_kernels(X, metric="rbf", n_jobs=n_jobs, gamma=0.1)
43 | 
44 | plot(euclidean_distances)
45 | plot(rbf_kernels)
46 | plt.show()
47 | 


--------------------------------------------------------------------------------
/sklearn/feature_selection/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.feature_selection` module implements feature selection
 3 | algorithms. It currently includes univariate filter selection methods and the
 4 | recursive feature elimination algorithm.
 5 | """
 6 | 
 7 | from .univariate_selection import chi2
 8 | from .univariate_selection import f_classif
 9 | from .univariate_selection import f_oneway
10 | from .univariate_selection import f_regression
11 | from .univariate_selection import SelectPercentile
12 | from .univariate_selection import SelectKBest
13 | from .univariate_selection import SelectFpr
14 | from .univariate_selection import SelectFdr
15 | from .univariate_selection import SelectFwe
16 | from .univariate_selection import GenericUnivariateSelect
17 | 
18 | from .variance_threshold import VarianceThreshold
19 | 
20 | from .rfe import RFE
21 | from .rfe import RFECV
22 | 
23 | from .from_model import SelectFromModel
24 | 
25 | from .mutual_info_ import mutual_info_regression, mutual_info_classif
26 | 
27 | 
28 | __all__ = ['GenericUnivariateSelect',
29 |            'RFE',
30 |            'RFECV',
31 |            'SelectFdr',
32 |            'SelectFpr',
33 |            'SelectFwe',
34 |            'SelectKBest',
35 |            'SelectFromModel',
36 |            'SelectPercentile',
37 |            'VarianceThreshold',
38 |            'chi2',
39 |            'f_classif',
40 |            'f_oneway',
41 |            'f_regression',
42 |            'mutual_info_classif',
43 |            'mutual_info_regression']
44 | 


--------------------------------------------------------------------------------
/examples/manifold/plot_swissroll.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===================================
 3 | Swiss Roll reduction with LLE
 4 | ===================================
 5 | 
 6 | An illustration of Swiss Roll reduction
 7 | with locally linear embedding
 8 | """
 9 | 
10 | # Author: Fabian Pedregosa -- <fabian.pedregosa@inria.fr>
11 | # License: BSD 3 clause (C) INRIA 2011
12 | 
13 | print(__doc__)
14 | 
15 | import matplotlib.pyplot as plt
16 | 
17 | # This import is needed to modify the way figure behaves
18 | from mpl_toolkits.mplot3d import Axes3D
19 | Axes3D
20 | 
21 | #----------------------------------------------------------------------
22 | # Locally linear embedding of the swiss roll
23 | 
24 | from sklearn import manifold, datasets
25 | X, color = datasets.samples_generator.make_swiss_roll(n_samples=1500)
26 | 
27 | print("Computing LLE embedding")
28 | X_r, err = manifold.locally_linear_embedding(X, n_neighbors=12,
29 |                                              n_components=2)
30 | print("Done. Reconstruction error: %g" % err)
31 | 
32 | #----------------------------------------------------------------------
33 | # Plot result
34 | 
35 | fig = plt.figure()
36 | 
37 | ax = fig.add_subplot(211, projection='3d')
38 | ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral)
39 | 
40 | ax.set_title("Original data")
41 | ax = fig.add_subplot(212)
42 | ax.scatter(X_r[:, 0], X_r[:, 1], c=color, cmap=plt.cm.Spectral)
43 | plt.axis('tight')
44 | plt.xticks([]), plt.yticks([])
45 | plt.title('Projected data')
46 | plt.show()
47 | 


--------------------------------------------------------------------------------
/examples/svm/plot_separating_hyperplane.py:
--------------------------------------------------------------------------------
 1 | """
 2 | =========================================
 3 | SVM: Maximum margin separating hyperplane
 4 | =========================================
 5 | 
 6 | Plot the maximum margin separating hyperplane within a two-class
 7 | separable dataset using a Support Vector Machine classifier with
 8 | linear kernel.
 9 | """
10 | print(__doc__)
11 | 
12 | import numpy as np
13 | import matplotlib.pyplot as plt
14 | from sklearn import svm
15 | from sklearn.datasets import make_blobs
16 | 
17 | 
18 | # we create 40 separable points
19 | X, y = make_blobs(n_samples=40, centers=2, random_state=6)
20 | 
21 | # fit the model, don't regularize for illustration purposes
22 | clf = svm.SVC(kernel='linear', C=1000)
23 | clf.fit(X, y)
24 | 
25 | plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired)
26 | 
27 | # plot the decision function
28 | ax = plt.gca()
29 | xlim = ax.get_xlim()
30 | ylim = ax.get_ylim()
31 | 
32 | # create grid to evaluate model
33 | xx = np.linspace(xlim[0], xlim[1], 30)
34 | yy = np.linspace(ylim[0], ylim[1], 30)
35 | YY, XX = np.meshgrid(yy, xx)
36 | xy = np.vstack([XX.ravel(), YY.ravel()]).T
37 | Z = clf.decision_function(xy).reshape(XX.shape)
38 | 
39 | # plot decision boundary and margins
40 | ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
41 |            linestyles=['--', '-', '--'])
42 | # plot support vectors
43 | ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100,
44 |            linewidth=1, facecolors='none', edgecolors='k')
45 | plt.show()
46 | 


--------------------------------------------------------------------------------
/sklearn/ensemble/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.ensemble` module includes ensemble-based methods for
 3 | classification, regression and anomaly detection.
 4 | """
 5 | 
 6 | from .base import BaseEnsemble
 7 | from .forest import RandomForestClassifier
 8 | from .forest import RandomForestRegressor
 9 | from .forest import RandomTreesEmbedding
10 | from .forest import ExtraTreesClassifier
11 | from .forest import ExtraTreesRegressor
12 | from .bagging import BaggingClassifier
13 | from .bagging import BaggingRegressor
14 | from .iforest import IsolationForest
15 | from .weight_boosting import AdaBoostClassifier
16 | from .weight_boosting import AdaBoostRegressor
17 | from .gradient_boosting import GradientBoostingClassifier
18 | from .gradient_boosting import GradientBoostingRegressor
19 | from .voting_classifier import VotingClassifier
20 | 
21 | from . import bagging
22 | from . import forest
23 | from . import weight_boosting
24 | from . import gradient_boosting
25 | from . import partial_dependence
26 | 
27 | __all__ = ["BaseEnsemble",
28 |            "RandomForestClassifier", "RandomForestRegressor",
29 |            "RandomTreesEmbedding", "ExtraTreesClassifier",
30 |            "ExtraTreesRegressor", "BaggingClassifier",
31 |            "BaggingRegressor", "IsolationForest", "GradientBoostingClassifier",
32 |            "GradientBoostingRegressor", "AdaBoostClassifier",
33 |            "AdaBoostRegressor", "VotingClassifier",
34 |            "bagging", "forest", "gradient_boosting",
35 |            "partial_dependence", "weight_boosting"]
36 | 


--------------------------------------------------------------------------------
/sklearn/datasets/descr/california_housing.rst:
--------------------------------------------------------------------------------
 1 | .. _california_housing_dataset:
 2 | 
 3 | California Housing dataset
 4 | --------------------------
 5 | 
 6 | **Data Set Characteristics:**
 7 | 
 8 |     :Number of Instances: 20640
 9 | 
10 |     :Number of Attributes: 8 numeric, predictive attributes and the target
11 | 
12 |     :Attribute Information:
13 |         - MedInc        median income in block
14 |         - HouseAge      median house age in block
15 |         - AveRooms      average number of rooms
16 |         - AveBedrms     average number of bedrooms
17 |         - Population    block population
18 |         - AveOccup      average house occupancy
19 |         - Latitude      house block latitude
20 |         - Longitude     house block longitude
21 | 
22 |     :Missing Attribute Values: None
23 | 
24 | This dataset was obtained from the StatLib repository.
25 | http://lib.stat.cmu.edu/datasets/
26 | 
27 | The target variable is the median house value for California districts.
28 | 
29 | This dataset was derived from the 1990 U.S. census, using one row per census
30 | block group. A block group is the smallest geographical unit for which the U.S.
31 | Census Bureau publishes sample data (a block group typically has a population
32 | of 600 to 3,000 people).
33 | 
34 | It can be downloaded/loaded using the
35 | :func:`sklearn.datasets.fetch_california_housing` function.
36 | 
37 | .. topic:: References
38 | 
39 |     - Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions,
40 |       Statistics and Probability Letters, 33 (1997) 291-297
41 | 


--------------------------------------------------------------------------------
/doc/tutorial/statistical_inference/index.rst:
--------------------------------------------------------------------------------
 1 | .. _stat_learn_tut_index:
 2 | 
 3 | ==========================================================================
 4 | A tutorial on statistical-learning for scientific data processing
 5 | ==========================================================================
 6 | 
 7 | .. topic:: Statistical learning 
 8 | 
 9 |     `Machine learning <https://en.wikipedia.org/wiki/Machine_learning>`_ is
10 |     a technique with a growing importance, as the
11 |     size of the datasets experimental sciences are facing is rapidly
12 |     growing. Problems it tackles range from building a prediction function
13 |     linking different observations, to classifying observations, or
14 |     learning the structure in an unlabeled dataset. 
15 |     
16 |     This tutorial will explore *statistical learning*, the use of
17 |     machine learning techniques with the goal of `statistical inference 
18 |     <https://en.wikipedia.org/wiki/Statistical_inference>`_:
19 |     drawing conclusions on the data at hand.
20 | 
21 |     Scikit-learn is a Python module integrating classic machine
22 |     learning algorithms in the tightly-knit world of scientific Python
23 |     packages (`NumPy <https://www.numpy.org/>`_, `SciPy
24 |     <https://scipy.org/>`_, `matplotlib
25 |     <https://matplotlib.org/>`_).
26 | 
27 | .. include:: ../../includes/big_toc_css.rst
28 | 
29 | .. toctree::
30 |    :maxdepth: 2
31 | 
32 |    settings
33 |    supervised_learning
34 |    model_selection
35 |    unsupervised_learning
36 |    putting_together
37 |    finding_help
38 | 


--------------------------------------------------------------------------------
/sklearn/decomposition/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.decomposition` module includes matrix decomposition
 3 | algorithms, including among others PCA, NMF or ICA. Most of the algorithms of
 4 | this module can be regarded as dimensionality reduction techniques.
 5 | """
 6 | 
 7 | from .nmf import NMF, non_negative_factorization
 8 | from .pca import PCA
 9 | from .incremental_pca import IncrementalPCA
10 | from .kernel_pca import KernelPCA
11 | from .sparse_pca import SparsePCA, MiniBatchSparsePCA
12 | from .truncated_svd import TruncatedSVD
13 | from .fastica_ import FastICA, fastica
14 | from .dict_learning import (dict_learning, dict_learning_online, sparse_encode,
15 |                             DictionaryLearning, MiniBatchDictionaryLearning,
16 |                             SparseCoder)
17 | from .factor_analysis import FactorAnalysis
18 | from ..utils.extmath import randomized_svd
19 | from .online_lda import LatentDirichletAllocation
20 | 
21 | __all__ = ['DictionaryLearning',
22 |            'FastICA',
23 |            'IncrementalPCA',
24 |            'KernelPCA',
25 |            'MiniBatchDictionaryLearning',
26 |            'MiniBatchSparsePCA',
27 |            'NMF',
28 |            'PCA',
29 |            'SparseCoder',
30 |            'SparsePCA',
31 |            'dict_learning',
32 |            'dict_learning_online',
33 |            'fastica',
34 |            'non_negative_factorization',
35 |            'randomized_svd',
36 |            'sparse_encode',
37 |            'FactorAnalysis',
38 |            'TruncatedSVD',
39 |            'LatentDirichletAllocation']
40 | 


--------------------------------------------------------------------------------
/examples/feature_selection/plot_rfe_with_cross_validation.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===================================================
 3 | Recursive feature elimination with cross-validation
 4 | ===================================================
 5 | 
 6 | A recursive feature elimination example with automatic tuning of the
 7 | number of features selected with cross-validation.
 8 | """
 9 | print(__doc__)
10 | 
11 | import matplotlib.pyplot as plt
12 | from sklearn.svm import SVC
13 | from sklearn.model_selection import StratifiedKFold
14 | from sklearn.feature_selection import RFECV
15 | from sklearn.datasets import make_classification
16 | 
17 | # Build a classification task using 3 informative features
18 | X, y = make_classification(n_samples=1000, n_features=25, n_informative=3,
19 |                            n_redundant=2, n_repeated=0, n_classes=8,
20 |                            n_clusters_per_class=1, random_state=0)
21 | 
22 | # Create the RFE object and compute a cross-validated score.
23 | svc = SVC(kernel="linear")
24 | # The "accuracy" scoring is proportional to the number of correct
25 | # classifications
26 | rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(2),
27 |               scoring='accuracy')
28 | rfecv.fit(X, y)
29 | 
30 | print("Optimal number of features : %d" % rfecv.n_features_)
31 | 
32 | # Plot number of features VS. cross-validation scores
33 | plt.figure()
34 | plt.xlabel("Number of features selected")
35 | plt.ylabel("Cross validation score (nb of correct classifications)")
36 | plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)
37 | plt.show()
38 | 


--------------------------------------------------------------------------------
/sklearn/tree/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy
 4 | from numpy.distutils.misc_util import Configuration
 5 | 
 6 | 
 7 | def configuration(parent_package="", top_path=None):
 8 |     config = Configuration("tree", parent_package, top_path)
 9 |     libraries = []
10 |     if os.name == 'posix':
11 |         libraries.append('m')
12 |     config.add_extension("_tree",
13 |                          sources=["_tree.pyx"],
14 |                          include_dirs=[numpy.get_include()],
15 |                          libraries=libraries,
16 |                          extra_compile_args=["-O3"])
17 |     config.add_extension("_splitter",
18 |                          sources=["_splitter.pyx"],
19 |                          include_dirs=[numpy.get_include()],
20 |                          libraries=libraries,
21 |                          extra_compile_args=["-O3"])
22 |     config.add_extension("_criterion",
23 |                          sources=["_criterion.pyx"],
24 |                          include_dirs=[numpy.get_include()],
25 |                          libraries=libraries,
26 |                          extra_compile_args=["-O3"])
27 |     config.add_extension("_utils",
28 |                          sources=["_utils.pyx"],
29 |                          include_dirs=[numpy.get_include()],
30 |                          libraries=libraries,
31 |                          extra_compile_args=["-O3"])
32 | 
33 |     config.add_subpackage("tests")
34 | 
35 |     return config
36 | 
37 | if __name__ == "__main__":
38 |     from numpy.distutils.core import setup
39 |     setup(**configuration().todict())
40 | 


--------------------------------------------------------------------------------
/examples/linear_model/plot_sgd_penalties.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ==============
 3 | SGD: Penalties
 4 | ==============
 5 | 
 6 | Contours of where the penalty is equal to 1
 7 | for the three penalties L1, L2 and elastic-net.
 8 | 
 9 | All of the above are supported by
10 | :class:`sklearn.linear_model.stochastic_gradient`.
11 | 
12 | """
13 | print(__doc__)
14 | 
15 | import numpy as np
16 | import matplotlib.pyplot as plt
17 | 
18 | l1_color = "navy"
19 | l2_color = "c"
20 | elastic_net_color = "darkorange"
21 | 
22 | line = np.linspace(-1.5, 1.5, 1001)
23 | xx, yy = np.meshgrid(line, line)
24 | 
25 | l2 = xx ** 2 + yy ** 2
26 | l1 = np.abs(xx) + np.abs(yy)
27 | rho = 0.5
28 | elastic_net = rho * l1 + (1 - rho) * l2
29 | 
30 | plt.figure(figsize=(10, 10), dpi=100)
31 | ax = plt.gca()
32 | 
33 | elastic_net_contour = plt.contour(xx, yy, elastic_net, levels=[1],
34 |                                   colors=elastic_net_color)
35 | l2_contour = plt.contour(xx, yy, l2, levels=[1], colors=l2_color)
36 | l1_contour = plt.contour(xx, yy, l1, levels=[1], colors=l1_color)
37 | ax.set_aspect("equal")
38 | ax.spines['left'].set_position('center')
39 | ax.spines['right'].set_color('none')
40 | ax.spines['bottom'].set_position('center')
41 | ax.spines['top'].set_color('none')
42 | 
43 | plt.clabel(elastic_net_contour, inline=1, fontsize=18,
44 |            fmt={1.0: 'elastic-net'}, manual=[(-1, -1)])
45 | plt.clabel(l2_contour, inline=1, fontsize=18,
46 |            fmt={1.0: 'L2'}, manual=[(-1, -1)])
47 | plt.clabel(l1_contour, inline=1, fontsize=18,
48 |            fmt={1.0: 'L1'}, manual=[(-1, -1)])
49 | 
50 | plt.tight_layout()
51 | plt.show()
52 | 


--------------------------------------------------------------------------------
/sklearn/svm/src/liblinear/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright (c) 2007-2014 The LIBLINEAR Project.
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions
 7 | are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in the
14 | documentation and/or other materials provided with the distribution.
15 | 
16 | 3. Neither name of copyright holders nor the names of its contributors
17 | may be used to endorse or promote products derived from this software
18 | without specific prior written permission.
19 | 
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/sklearn/neighbors/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def configuration(parent_package='', top_path=None):
 5 |     import numpy
 6 |     from numpy.distutils.misc_util import Configuration
 7 | 
 8 |     config = Configuration('neighbors', parent_package, top_path)
 9 |     libraries = []
10 |     if os.name == 'posix':
11 |         libraries.append('m')
12 | 
13 |     config.add_extension('ball_tree',
14 |                          sources=['ball_tree.pyx'],
15 |                          include_dirs=[numpy.get_include()],
16 |                          libraries=libraries)
17 | 
18 |     config.add_extension('kd_tree',
19 |                          sources=['kd_tree.pyx'],
20 |                          include_dirs=[numpy.get_include()],
21 |                          libraries=libraries)
22 | 
23 |     config.add_extension('dist_metrics',
24 |                          sources=['dist_metrics.pyx'],
25 |                          include_dirs=[numpy.get_include(),
26 |                                        os.path.join(numpy.get_include(),
27 |                                                     'numpy')],
28 |                          libraries=libraries)
29 | 
30 |     config.add_extension('typedefs',
31 |                          sources=['typedefs.pyx'],
32 |                          include_dirs=[numpy.get_include()],
33 |                          libraries=libraries)
34 |     config.add_extension("quad_tree",
35 |                          sources=["quad_tree.pyx"],
36 |                          include_dirs=[numpy.get_include()],
37 |                          libraries=libraries)
38 | 
39 |     config.add_subpackage('tests')
40 | 
41 |     return config
42 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/test_linear_assignment.py:
--------------------------------------------------------------------------------
 1 | # Author: Brian M. Clapper, G Varoquaux
 2 | # License: BSD
 3 | 
 4 | import numpy as np
 5 | 
 6 | # XXX we should be testing the public API here
 7 | from sklearn.utils.linear_assignment_ import _hungarian
 8 | 
 9 | 
10 | def test_hungarian():
11 |     matrices = [
12 |         # Square
13 |         ([[400, 150, 400],
14 |           [400, 450, 600],
15 |           [300, 225, 300]],
16 |          850  # expected cost
17 |          ),
18 | 
19 |         # Rectangular variant
20 |         ([[400, 150, 400, 1],
21 |           [400, 450, 600, 2],
22 |           [300, 225, 300, 3]],
23 |          452  # expected cost
24 |          ),
25 | 
26 |         # Square
27 |         ([[10, 10,  8],
28 |           [9,  8,  1],
29 |           [9,  7,  4]],
30 |          18
31 |          ),
32 | 
33 |         # Rectangular variant
34 |         ([[10, 10,  8, 11],
35 |           [9, 8, 1, 1],
36 |           [9, 7, 4, 10]],
37 |          15
38 |          ),
39 | 
40 |         # n == 2, m == 0 matrix
41 |         ([[], []],
42 |          0
43 |          ),
44 |     ]
45 | 
46 |     for cost_matrix, expected_total in matrices:
47 |         cost_matrix = np.array(cost_matrix)
48 |         indexes = _hungarian(cost_matrix)
49 |         total_cost = 0
50 |         for r, c in indexes:
51 |             x = cost_matrix[r, c]
52 |             total_cost += x
53 |         assert expected_total == total_cost
54 | 
55 |         indexes = _hungarian(cost_matrix.T)
56 |         total_cost = 0
57 |         for c, r in indexes:
58 |             x = cost_matrix[r, c]
59 |             total_cost += x
60 |         assert expected_total == total_cost
61 | 


--------------------------------------------------------------------------------
/examples/neighbors/plot_regression.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ============================
 3 | Nearest Neighbors regression
 4 | ============================
 5 | 
 6 | Demonstrate the resolution of a regression problem
 7 | using a k-Nearest Neighbor and the interpolation of the
 8 | target using both barycenter and constant weights.
 9 | 
10 | """
11 | print(__doc__)
12 | 
13 | # Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
14 | #         Fabian Pedregosa <fabian.pedregosa@inria.fr>
15 | #
16 | # License: BSD 3 clause (C) INRIA
17 | 
18 | 
19 | # #############################################################################
20 | # Generate sample data
21 | import numpy as np
22 | import matplotlib.pyplot as plt
23 | from sklearn import neighbors
24 | 
25 | np.random.seed(0)
26 | X = np.sort(5 * np.random.rand(40, 1), axis=0)
27 | T = np.linspace(0, 5, 500)[:, np.newaxis]
28 | y = np.sin(X).ravel()
29 | 
30 | # Add noise to targets
31 | y[::5] += 1 * (0.5 - np.random.rand(8))
32 | 
33 | # #############################################################################
34 | # Fit regression model
35 | n_neighbors = 5
36 | 
37 | for i, weights in enumerate(['uniform', 'distance']):
38 |     knn = neighbors.KNeighborsRegressor(n_neighbors, weights=weights)
39 |     y_ = knn.fit(X, y).predict(T)
40 | 
41 |     plt.subplot(2, 1, i + 1)
42 |     plt.scatter(X, y, c='k', label='data')
43 |     plt.plot(T, y_, c='g', label='prediction')
44 |     plt.axis('tight')
45 |     plt.legend()
46 |     plt.title("KNeighborsRegressor (k = %i, weights = '%s')" % (n_neighbors,
47 |                                                                 weights))
48 | 
49 | plt.tight_layout()
50 | plt.show()
51 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # simple makefile to simplify repetitive build env management tasks under posix
 2 | 
 3 | # caution: testing won't work on windows, see README
 4 | 
 5 | PYTHON ?= python
 6 | CYTHON ?= cython
 7 | PYTEST ?= pytest
 8 | CTAGS ?= ctags
 9 | 
10 | # skip doctests on 32bit python
11 | BITS := $(shell python -c 'import struct; print(8 * struct.calcsize("P"))')
12 | 
13 | all: clean inplace test
14 | 
15 | clean-ctags:
16 | 	rm -f tags
17 | 
18 | clean: clean-ctags
19 | 	$(PYTHON) setup.py clean
20 | 	rm -rf dist
21 | 
22 | in: inplace # just a shortcut
23 | inplace:
24 | 	$(PYTHON) setup.py build_ext -i
25 | 
26 | test-code: in
27 | 	$(PYTEST) --showlocals -v sklearn --durations=20
28 | test-sphinxext:
29 | 	$(PYTEST) --showlocals -v doc/sphinxext/
30 | test-doc:
31 | ifeq ($(BITS),64)
32 | 	$(PYTEST) $(shell find doc -name '*.rst' | sort)
33 | endif
34 | 
35 | test-coverage:
36 | 	rm -rf coverage .coverage
37 | 	$(PYTEST) sklearn --showlocals -v --cov=sklearn --cov-report=html:coverage
38 | 
39 | test: test-code test-sphinxext test-doc
40 | 
41 | trailing-spaces:
42 | 	find sklearn -name "*.py" -exec perl -pi -e 's/[ \t]*$$//' {} \;
43 | 
44 | cython:
45 | 	python setup.py build_src
46 | 
47 | ctags:
48 | 	# make tags for symbol based navigation in emacs and vim
49 | 	# Install with: sudo apt-get install exuberant-ctags
50 | 	$(CTAGS) --python-kinds=-i -R sklearn
51 | 
52 | doc: inplace
53 | 	$(MAKE) -C doc html
54 | 
55 | doc-noplot: inplace
56 | 	$(MAKE) -C doc html-noplot
57 | 
58 | code-analysis:
59 | 	flake8 sklearn | grep -v __init__ | grep -v external
60 | 	pylint -E -i y sklearn/ -d E1103,E0611,E1101
61 | 
62 | flake8-diff:
63 | 	./build_tools/travis/flake8_diff.sh
64 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
 1 | New BSD License
 2 | 
 3 | Copyright (c) 2007–2018 The scikit-learn developers.
 4 | All rights reserved.
 5 | 
 6 | 
 7 | Redistribution and use in source and binary forms, with or without
 8 | modification, are permitted provided that the following conditions are met:
 9 | 
10 |   a. Redistributions of source code must retain the above copyright notice,
11 |      this list of conditions and the following disclaimer.
12 |   b. Redistributions in binary form must reproduce the above copyright
13 |      notice, this list of conditions and the following disclaimer in the
14 |      documentation and/or other materials provided with the distribution.
15 |   c. Neither the name of the Scikit-learn Developers  nor the names of
16 |      its contributors may be used to endorse or promote products
17 |      derived from this software without specific prior written
18 |      permission. 
19 | 
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 | DAMAGE.
32 | 
33 | 


--------------------------------------------------------------------------------
/examples/svm/plot_svm_regression.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===================================================================
 3 | Support Vector Regression (SVR) using linear and non-linear kernels
 4 | ===================================================================
 5 | 
 6 | Toy example of 1D regression using linear, polynomial and RBF kernels.
 7 | 
 8 | """
 9 | print(__doc__)
10 | 
11 | import numpy as np
12 | from sklearn.svm import SVR
13 | import matplotlib.pyplot as plt
14 | 
15 | # #############################################################################
16 | # Generate sample data
17 | X = np.sort(5 * np.random.rand(40, 1), axis=0)
18 | y = np.sin(X).ravel()
19 | 
20 | # #############################################################################
21 | # Add noise to targets
22 | y[::5] += 3 * (0.5 - np.random.rand(8))
23 | 
24 | # #############################################################################
25 | # Fit regression model
26 | svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
27 | svr_lin = SVR(kernel='linear', C=1e3)
28 | svr_poly = SVR(kernel='poly', C=1e3, degree=2)
29 | y_rbf = svr_rbf.fit(X, y).predict(X)
30 | y_lin = svr_lin.fit(X, y).predict(X)
31 | y_poly = svr_poly.fit(X, y).predict(X)
32 | 
33 | # #############################################################################
34 | # Look at the results
35 | lw = 2
36 | plt.scatter(X, y, color='darkorange', label='data')
37 | plt.plot(X, y_rbf, color='navy', lw=lw, label='RBF model')
38 | plt.plot(X, y_lin, color='c', lw=lw, label='Linear model')
39 | plt.plot(X, y_poly, color='cornflowerblue', lw=lw, label='Polynomial model')
40 | plt.xlabel('data')
41 | plt.ylabel('target')
42 | plt.title('Support Vector Regression')
43 | plt.legend()
44 | plt.show()
45 | 


--------------------------------------------------------------------------------
/examples/linear_model/plot_sgd_weighted_samples.py:
--------------------------------------------------------------------------------
 1 | """
 2 | =====================
 3 | SGD: Weighted samples
 4 | =====================
 5 | 
 6 | Plot decision function of a weighted dataset, where the size of points
 7 | is proportional to its weight.
 8 | """
 9 | print(__doc__)
10 | 
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | from sklearn import linear_model
14 | 
15 | # we create 20 points
16 | np.random.seed(0)
17 | X = np.r_[np.random.randn(10, 2) + [1, 1], np.random.randn(10, 2)]
18 | y = [1] * 10 + [-1] * 10
19 | sample_weight = 100 * np.abs(np.random.randn(20))
20 | # and assign a bigger weight to the last 10 samples
21 | sample_weight[:10] *= 10
22 | 
23 | # plot the weighted data points
24 | xx, yy = np.meshgrid(np.linspace(-4, 5, 500), np.linspace(-4, 5, 500))
25 | plt.figure()
26 | plt.scatter(X[:, 0], X[:, 1], c=y, s=sample_weight, alpha=0.9,
27 |             cmap=plt.cm.bone, edgecolor='black')
28 | 
29 | # fit the unweighted model
30 | clf = linear_model.SGDClassifier(alpha=0.01, max_iter=100)
31 | clf.fit(X, y)
32 | Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
33 | Z = Z.reshape(xx.shape)
34 | no_weights = plt.contour(xx, yy, Z, levels=[0], linestyles=['solid'])
35 | 
36 | # fit the weighted model
37 | clf = linear_model.SGDClassifier(alpha=0.01, max_iter=100)
38 | clf.fit(X, y, sample_weight=sample_weight)
39 | Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
40 | Z = Z.reshape(xx.shape)
41 | samples_weights = plt.contour(xx, yy, Z, levels=[0], linestyles=['dashed'])
42 | 
43 | plt.legend([no_weights.collections[0], samples_weights.collections[0]],
44 |            ["no weights", "with weights"], loc="lower left")
45 | 
46 | plt.xticks(())
47 | plt.yticks(())
48 | plt.show()
49 | 


--------------------------------------------------------------------------------
/examples/mixture/plot_gmm_pdf.py:
--------------------------------------------------------------------------------
 1 | """
 2 | =========================================
 3 | Density Estimation for a Gaussian mixture
 4 | =========================================
 5 | 
 6 | Plot the density estimation of a mixture of two Gaussians. Data is
 7 | generated from two Gaussians with different centers and covariance
 8 | matrices.
 9 | """
10 | 
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | from matplotlib.colors import LogNorm
14 | from sklearn import mixture
15 | 
16 | n_samples = 300
17 | 
18 | # generate random sample, two components
19 | np.random.seed(0)
20 | 
21 | # generate spherical data centered on (20, 20)
22 | shifted_gaussian = np.random.randn(n_samples, 2) + np.array([20, 20])
23 | 
24 | # generate zero centered stretched Gaussian data
25 | C = np.array([[0., -0.7], [3.5, .7]])
26 | stretched_gaussian = np.dot(np.random.randn(n_samples, 2), C)
27 | 
28 | # concatenate the two datasets into the final training set
29 | X_train = np.vstack([shifted_gaussian, stretched_gaussian])
30 | 
31 | # fit a Gaussian Mixture Model with two components
32 | clf = mixture.GaussianMixture(n_components=2, covariance_type='full')
33 | clf.fit(X_train)
34 | 
35 | # display predicted scores by the model as a contour plot
36 | x = np.linspace(-20., 30.)
37 | y = np.linspace(-20., 40.)
38 | X, Y = np.meshgrid(x, y)
39 | XX = np.array([X.ravel(), Y.ravel()]).T
40 | Z = -clf.score_samples(XX)
41 | Z = Z.reshape(X.shape)
42 | 
43 | CS = plt.contour(X, Y, Z, norm=LogNorm(vmin=1.0, vmax=1000.0),
44 |                  levels=np.logspace(0, 3, 10))
45 | CB = plt.colorbar(CS, shrink=0.8, extend='both')
46 | plt.scatter(X_train[:, 0], X_train[:, 1], .8)
47 | 
48 | plt.title('Negative log-likelihood predicted by a GMM')
49 | plt.axis('tight')
50 | plt.show()
51 | 


--------------------------------------------------------------------------------
/examples/ensemble/plot_forest_importances_faces.py:
--------------------------------------------------------------------------------
 1 | """
 2 | =================================================
 3 | Pixel importances with a parallel forest of trees
 4 | =================================================
 5 | 
 6 | This example shows the use of forests of trees to evaluate the importance
 7 | of the pixels in an image classification task (faces). The hotter the pixel,
 8 | the more important.
 9 | 
10 | The code below also illustrates how the construction and the computation
11 | of the predictions can be parallelized within multiple jobs.
12 | """
13 | print(__doc__)
14 | 
15 | from time import time
16 | import matplotlib.pyplot as plt
17 | 
18 | from sklearn.datasets import fetch_olivetti_faces
19 | from sklearn.ensemble import ExtraTreesClassifier
20 | 
21 | # Number of cores to use to perform parallel fitting of the forest model
22 | n_jobs = 1
23 | 
24 | # Load the faces dataset
25 | data = fetch_olivetti_faces()
26 | X = data.images.reshape((len(data.images), -1))
27 | y = data.target
28 | 
29 | mask = y < 5  # Limit to 5 classes
30 | X = X[mask]
31 | y = y[mask]
32 | 
33 | # Build a forest and compute the pixel importances
34 | print("Fitting ExtraTreesClassifier on faces data with %d cores..." % n_jobs)
35 | t0 = time()
36 | forest = ExtraTreesClassifier(n_estimators=1000,
37 |                               max_features=128,
38 |                               n_jobs=n_jobs,
39 |                               random_state=0)
40 | 
41 | forest.fit(X, y)
42 | print("done in %0.3fs" % (time() - t0))
43 | importances = forest.feature_importances_
44 | importances = importances.reshape(data.images[0].shape)
45 | 
46 | # Plot pixel importances
47 | plt.matshow(importances, cmap=plt.cm.hot)
48 | plt.title("Pixel importances with forests of trees")
49 | plt.show()
50 | 


--------------------------------------------------------------------------------
/benchmarks/bench_glm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A comparison of different methods in GLM
 3 | 
 4 | Data comes from a random square matrix.
 5 | 
 6 | """
 7 | from datetime import datetime
 8 | import numpy as np
 9 | from sklearn import linear_model
10 | from sklearn.utils.bench import total_seconds
11 | 
12 | 
13 | if __name__ == '__main__':
14 | 
15 |     import matplotlib.pyplot as plt
16 | 
17 |     n_iter = 40
18 | 
19 |     time_ridge = np.empty(n_iter)
20 |     time_ols = np.empty(n_iter)
21 |     time_lasso = np.empty(n_iter)
22 | 
23 |     dimensions = 500 * np.arange(1, n_iter + 1)
24 | 
25 |     for i in range(n_iter):
26 | 
27 |         print('Iteration %s of %s' % (i, n_iter))
28 | 
29 |         n_samples, n_features = 10 * i + 3, 10 * i + 3
30 | 
31 |         X = np.random.randn(n_samples, n_features)
32 |         Y = np.random.randn(n_samples)
33 | 
34 |         start = datetime.now()
35 |         ridge = linear_model.Ridge(alpha=1.)
36 |         ridge.fit(X, Y)
37 |         time_ridge[i] = total_seconds(datetime.now() - start)
38 | 
39 |         start = datetime.now()
40 |         ols = linear_model.LinearRegression()
41 |         ols.fit(X, Y)
42 |         time_ols[i] = total_seconds(datetime.now() - start)
43 | 
44 |         start = datetime.now()
45 |         lasso = linear_model.LassoLars()
46 |         lasso.fit(X, Y)
47 |         time_lasso[i] = total_seconds(datetime.now() - start)
48 | 
49 |     plt.figure('scikit-learn GLM benchmark results')
50 |     plt.xlabel('Dimensions')
51 |     plt.ylabel('Time (s)')
52 |     plt.plot(dimensions, time_ridge, color='r')
53 |     plt.plot(dimensions, time_ols, color='g')
54 |     plt.plot(dimensions, time_lasso, color='b')
55 | 
56 |     plt.legend(['Ridge', 'OLS', 'LassoLars'], loc='upper left')
57 |     plt.axis('tight')
58 |     plt.show()
59 | 


--------------------------------------------------------------------------------
/examples/ensemble/plot_adaboost_regression.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ======================================
 3 | Decision Tree Regression with AdaBoost
 4 | ======================================
 5 | 
 6 | A decision tree is boosted using the AdaBoost.R2 [1]_ algorithm on a 1D
 7 | sinusoidal dataset with a small amount of Gaussian noise.
 8 | 299 boosts (300 decision trees) is compared with a single decision tree
 9 | regressor. As the number of boosts is increased the regressor can fit more
10 | detail.
11 | 
12 | .. [1] H. Drucker, "Improving Regressors using Boosting Techniques", 1997.
13 | 
14 | """
15 | print(__doc__)
16 | 
17 | # Author: Noel Dawe <noel.dawe@gmail.com>
18 | #
19 | # License: BSD 3 clause
20 | 
21 | # importing necessary libraries
22 | import numpy as np
23 | import matplotlib.pyplot as plt
24 | from sklearn.tree import DecisionTreeRegressor
25 | from sklearn.ensemble import AdaBoostRegressor
26 | 
27 | # Create the dataset
28 | rng = np.random.RandomState(1)
29 | X = np.linspace(0, 6, 100)[:, np.newaxis]
30 | y = np.sin(X).ravel() + np.sin(6 * X).ravel() + rng.normal(0, 0.1, X.shape[0])
31 | 
32 | # Fit regression model
33 | regr_1 = DecisionTreeRegressor(max_depth=4)
34 | 
35 | regr_2 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4),
36 |                           n_estimators=300, random_state=rng)
37 | 
38 | regr_1.fit(X, y)
39 | regr_2.fit(X, y)
40 | 
41 | # Predict
42 | y_1 = regr_1.predict(X)
43 | y_2 = regr_2.predict(X)
44 | 
45 | # Plot the results
46 | plt.figure()
47 | plt.scatter(X, y, c="k", label="training samples")
48 | plt.plot(X, y_1, c="g", label="n_estimators=1", linewidth=2)
49 | plt.plot(X, y_2, c="r", label="n_estimators=300", linewidth=2)
50 | plt.xlabel("data")
51 | plt.ylabel("target")
52 | plt.title("Boosted Decision Tree Regression")
53 | plt.legend()
54 | plt.show()
55 | 


--------------------------------------------------------------------------------
/examples/feature_selection/plot_select_from_model_boston.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===================================================
 3 | Feature selection using SelectFromModel and LassoCV
 4 | ===================================================
 5 | 
 6 | Use SelectFromModel meta-transformer along with Lasso to select the best
 7 | couple of features from the Boston dataset.
 8 | """
 9 | # Author: Manoj Kumar <mks542@nyu.edu>
10 | # License: BSD 3 clause
11 | 
12 | print(__doc__)
13 | 
14 | import matplotlib.pyplot as plt
15 | import numpy as np
16 | 
17 | from sklearn.datasets import load_boston
18 | from sklearn.feature_selection import SelectFromModel
19 | from sklearn.linear_model import LassoCV
20 | 
21 | # Load the boston dataset.
22 | boston = load_boston()
23 | X, y = boston['data'], boston['target']
24 | 
25 | # We use the base estimator LassoCV since the L1 norm promotes sparsity of features.
26 | clf = LassoCV(cv=5)
27 | 
28 | # Set a minimum threshold of 0.25
29 | sfm = SelectFromModel(clf, threshold=0.25)
30 | sfm.fit(X, y)
31 | n_features = sfm.transform(X).shape[1]
32 | 
33 | # Reset the threshold till the number of features equals two.
34 | # Note that the attribute can be set directly instead of repeatedly
35 | # fitting the metatransformer.
36 | while n_features > 2:
37 |     sfm.threshold += 0.1
38 |     X_transform = sfm.transform(X)
39 |     n_features = X_transform.shape[1]
40 | 
41 | # Plot the selected two features from X.
42 | plt.title(
43 |     "Features selected from Boston using SelectFromModel with "
44 |     "threshold %0.3f." % sfm.threshold)
45 | feature1 = X_transform[:, 0]
46 | feature2 = X_transform[:, 1] 
47 | plt.plot(feature1, feature2, 'r.')
48 | plt.xlabel("Feature number 1")
49 | plt.ylabel("Feature number 2")
50 | plt.ylim([np.min(feature2), np.max(feature2)])
51 | plt.show()
52 | 


--------------------------------------------------------------------------------
/sklearn/metrics/cluster/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.metrics.cluster` submodule contains evaluation metrics for
 3 | cluster analysis results. There are two forms of evaluation:
 4 | 
 5 | - supervised, which uses a ground truth class values for each sample.
 6 | - unsupervised, which does not and measures the 'quality' of the model itself.
 7 | """
 8 | from .supervised import adjusted_mutual_info_score
 9 | from .supervised import normalized_mutual_info_score
10 | from .supervised import adjusted_rand_score
11 | from .supervised import completeness_score
12 | from .supervised import contingency_matrix
13 | from .supervised import expected_mutual_information
14 | from .supervised import homogeneity_completeness_v_measure
15 | from .supervised import homogeneity_score
16 | from .supervised import mutual_info_score
17 | from .supervised import v_measure_score
18 | from .supervised import fowlkes_mallows_score
19 | from .supervised import entropy
20 | from .unsupervised import silhouette_samples
21 | from .unsupervised import silhouette_score
22 | from .unsupervised import calinski_harabasz_score
23 | from .unsupervised import calinski_harabaz_score
24 | from .unsupervised import davies_bouldin_score
25 | from .bicluster import consensus_score
26 | 
27 | __all__ = ["adjusted_mutual_info_score", "normalized_mutual_info_score",
28 |            "adjusted_rand_score", "completeness_score", "contingency_matrix",
29 |            "expected_mutual_information", "homogeneity_completeness_v_measure",
30 |            "homogeneity_score", "mutual_info_score", "v_measure_score",
31 |            "fowlkes_mallows_score", "entropy", "silhouette_samples",
32 |            "silhouette_score", "calinski_harabaz_score",
33 |            "calinski_harabasz_score", "davies_bouldin_score",
34 |            "consensus_score"]
35 | 


--------------------------------------------------------------------------------
/examples/decomposition/plot_pca_iris.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | =========================================================
 6 | PCA example with Iris Data-set
 7 | =========================================================
 8 | 
 9 | Principal Component Analysis applied to the Iris dataset.
10 | 
11 | See `here <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_ for more
12 | information on this dataset.
13 | 
14 | """
15 | print(__doc__)
16 | 
17 | 
18 | # Code source: Gaël Varoquaux
19 | # License: BSD 3 clause
20 | 
21 | import numpy as np
22 | import matplotlib.pyplot as plt
23 | from mpl_toolkits.mplot3d import Axes3D
24 | 
25 | 
26 | from sklearn import decomposition
27 | from sklearn import datasets
28 | 
29 | np.random.seed(5)
30 | 
31 | centers = [[1, 1], [-1, -1], [1, -1]]
32 | iris = datasets.load_iris()
33 | X = iris.data
34 | y = iris.target
35 | 
36 | fig = plt.figure(1, figsize=(4, 3))
37 | plt.clf()
38 | ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
39 | 
40 | plt.cla()
41 | pca = decomposition.PCA(n_components=3)
42 | pca.fit(X)
43 | X = pca.transform(X)
44 | 
45 | for name, label in [('Setosa', 0), ('Versicolour', 1), ('Virginica', 2)]:
46 |     ax.text3D(X[y == label, 0].mean(),
47 |               X[y == label, 1].mean() + 1.5,
48 |               X[y == label, 2].mean(), name,
49 |               horizontalalignment='center',
50 |               bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))
51 | # Reorder the labels to have colors matching the cluster results
52 | y = np.choose(y, [1, 2, 0]).astype(np.float)
53 | ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.nipy_spectral,
54 |            edgecolor='k')
55 | 
56 | ax.w_xaxis.set_ticklabels([])
57 | ax.w_yaxis.set_ticklabels([])
58 | ax.w_zaxis.set_ticklabels([])
59 | 
60 | plt.show()
61 | 


--------------------------------------------------------------------------------
/examples/tree/plot_tree_regression.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ===================================================================
 3 | Decision Tree Regression
 4 | ===================================================================
 5 | 
 6 | A 1D regression with decision tree.
 7 | 
 8 | The :ref:`decision trees <tree>` is
 9 | used to fit a sine curve with addition noisy observation. As a result, it
10 | learns local linear regressions approximating the sine curve.
11 | 
12 | We can see that if the maximum depth of the tree (controlled by the
13 | `max_depth` parameter) is set too high, the decision trees learn too fine
14 | details of the training data and learn from the noise, i.e. they overfit.
15 | """
16 | print(__doc__)
17 | 
18 | # Import the necessary modules and libraries
19 | import numpy as np
20 | from sklearn.tree import DecisionTreeRegressor
21 | import matplotlib.pyplot as plt
22 | 
23 | # Create a random dataset
24 | rng = np.random.RandomState(1)
25 | X = np.sort(5 * rng.rand(80, 1), axis=0)
26 | y = np.sin(X).ravel()
27 | y[::5] += 3 * (0.5 - rng.rand(16))
28 | 
29 | # Fit regression model
30 | regr_1 = DecisionTreeRegressor(max_depth=2)
31 | regr_2 = DecisionTreeRegressor(max_depth=5)
32 | regr_1.fit(X, y)
33 | regr_2.fit(X, y)
34 | 
35 | # Predict
36 | X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
37 | y_1 = regr_1.predict(X_test)
38 | y_2 = regr_2.predict(X_test)
39 | 
40 | # Plot the results
41 | plt.figure()
42 | plt.scatter(X, y, s=20, edgecolor="black",
43 |             c="darkorange", label="data")
44 | plt.plot(X_test, y_1, color="cornflowerblue",
45 |          label="max_depth=2", linewidth=2)
46 | plt.plot(X_test, y_2, color="yellowgreen", label="max_depth=5", linewidth=2)
47 | plt.xlabel("data")
48 | plt.ylabel("target")
49 | plt.title("Decision Tree Regression")
50 | plt.legend()
51 | plt.show()
52 | 


--------------------------------------------------------------------------------
/examples/svm/plot_custom_kernel.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ======================
 3 | SVM with custom kernel
 4 | ======================
 5 | 
 6 | Simple usage of Support Vector Machines to classify a sample. It will
 7 | plot the decision surface and the support vectors.
 8 | 
 9 | """
10 | print(__doc__)
11 | 
12 | import numpy as np
13 | import matplotlib.pyplot as plt
14 | from sklearn import svm, datasets
15 | 
16 | # import some data to play with
17 | iris = datasets.load_iris()
18 | X = iris.data[:, :2]  # we only take the first two features. We could
19 |                       # avoid this ugly slicing by using a two-dim dataset
20 | Y = iris.target
21 | 
22 | 
23 | def my_kernel(X, Y):
24 |     """
25 |     We create a custom kernel:
26 | 
27 |                  (2  0)
28 |     k(X, Y) = X  (    ) Y.T
29 |                  (0  1)
30 |     """
31 |     M = np.array([[2, 0], [0, 1.0]])
32 |     return np.dot(np.dot(X, M), Y.T)
33 | 
34 | 
35 | h = .02  # step size in the mesh
36 | 
37 | # we create an instance of SVM and fit out data.
38 | clf = svm.SVC(kernel=my_kernel)
39 | clf.fit(X, Y)
40 | 
41 | # Plot the decision boundary. For that, we will assign a color to each
42 | # point in the mesh [x_min, x_max]x[y_min, y_max].
43 | x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
44 | y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
45 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
46 | Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
47 | 
48 | # Put the result into a color plot
49 | Z = Z.reshape(xx.shape)
50 | plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
51 | 
52 | # Plot also the training points
53 | plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired, edgecolors='k')
54 | plt.title('3-Class classification using Support Vector Machine with custom'
55 |           ' kernel')
56 | plt.axis('tight')
57 | plt.show()
58 | 


--------------------------------------------------------------------------------
/sklearn/utils/tests/test_deprecation.py:
--------------------------------------------------------------------------------
 1 | # Authors: Raghav RV <rvraghav93@gmail.com>
 2 | # License: BSD 3 clause
 3 | 
 4 | 
 5 | import sys
 6 | import pickle
 7 | 
 8 | from sklearn.utils.deprecation import _is_deprecated
 9 | from sklearn.utils.deprecation import deprecated
10 | from sklearn.utils.testing import assert_warns_message
11 | from sklearn.utils.testing import SkipTest
12 | 
13 | 
14 | @deprecated('qwerty')
15 | class MockClass1:
16 |     pass
17 | 
18 | 
19 | class MockClass2:
20 |     @deprecated('mockclass2_method')
21 |     def method(self):
22 |         pass
23 | 
24 | 
25 | class MockClass3:
26 |     @deprecated()
27 |     def __init__(self):
28 |         pass
29 | 
30 | 
31 | class MockClass4:
32 |     pass
33 | 
34 | 
35 | @deprecated()
36 | def mock_function():
37 |     return 10
38 | 
39 | 
40 | def test_deprecated():
41 |     assert_warns_message(DeprecationWarning, 'qwerty', MockClass1)
42 |     assert_warns_message(DeprecationWarning, 'mockclass2_method',
43 |                          MockClass2().method)
44 |     assert_warns_message(DeprecationWarning, 'deprecated', MockClass3)
45 |     val = assert_warns_message(DeprecationWarning, 'deprecated', mock_function)
46 |     assert val == 10
47 | 
48 | 
49 | def test_is_deprecated():
50 |     if sys.version_info < (3, 5):
51 |         raise SkipTest("This test will run only on python3.5 and above")
52 |     # Test if _is_deprecated helper identifies wrapping via deprecated
53 |     # NOTE it works only for class methods and functions
54 |     assert _is_deprecated(MockClass1.__init__)
55 |     assert _is_deprecated(MockClass2().method)
56 |     assert _is_deprecated(MockClass3.__init__)
57 |     assert not _is_deprecated(MockClass4.__init__)
58 |     assert _is_deprecated(mock_function)
59 | 
60 | 
61 | def test_pickle():
62 |     pickle.loads(pickle.dumps(mock_function))
63 | 


--------------------------------------------------------------------------------
/sklearn/externals/joblib/_multiprocessing_helpers.py:
--------------------------------------------------------------------------------
 1 | """Helper module to factorize the conditional multiprocessing import logic
 2 | 
 3 | We use a distinct module to simplify import statements and avoid introducing
 4 | circular dependencies (for instance for the assert_spawning name).
 5 | """
 6 | import os
 7 | import sys
 8 | import warnings
 9 | 
10 | 
11 | # Obtain possible configuration from the environment, assuming 1 (on)
12 | # by default, upon 0 set to None. Should instructively fail if some non
13 | # 0/1 value is set.
14 | mp = int(os.environ.get('JOBLIB_MULTIPROCESSING', 1)) or None
15 | if mp:
16 |     try:
17 |         import multiprocessing as mp
18 |     except ImportError:
19 |         mp = None
20 | 
21 | # 2nd stage: validate that locking is available on the system and
22 | #            issue a warning if not
23 | if mp is not None:
24 |     try:
25 |         # Use the spawn context
26 |         if sys.version_info < (3, 3):
27 |             Semaphore = mp.Semaphore
28 |         else:
29 |             # Using mp.Semaphore has a border effect and set the default
30 |             # backend for multiprocessing. To avoid that, we use the 'spawn'
31 |             # context which is available on all supported platforms.
32 |             ctx = mp.get_context('spawn')
33 |             Semaphore = ctx.Semaphore
34 |         _sem = Semaphore()
35 |         del _sem  # cleanup
36 |     except (ImportError, OSError) as e:
37 |         mp = None
38 |         warnings.warn('%s.  joblib will operate in serial mode' % (e,))
39 | 
40 | 
41 | # 3rd stage: backward compat for the assert_spawning helper
42 | if mp is not None:
43 |     try:
44 |         # Python 3.4+
45 |         from multiprocessing.context import assert_spawning
46 |     except ImportError:
47 |         from multiprocessing.forking import assert_spawning
48 | else:
49 |     assert_spawning = None
50 | 


--------------------------------------------------------------------------------
/doc/modules/cross_decomposition.rst:
--------------------------------------------------------------------------------
 1 | .. _cross_decomposition:
 2 | 
 3 | ===================
 4 | Cross decomposition
 5 | ===================
 6 | 
 7 | .. currentmodule:: sklearn.cross_decomposition
 8 | 
 9 | The cross decomposition module contains two main families of algorithms: the
10 | partial least squares (PLS) and the canonical correlation analysis (CCA).
11 | 
12 | These families of algorithms are useful to find linear relations between two
13 | multivariate datasets: the ``X`` and ``Y`` arguments of the ``fit`` method
14 | are 2D arrays.
15 | 
16 | .. figure:: ../auto_examples/cross_decomposition/images/sphx_glr_plot_compare_cross_decomposition_001.png
17 |    :target: ../auto_examples/cross_decomposition/plot_compare_cross_decomposition.html
18 |    :scale: 75%
19 |    :align: center
20 | 
21 | 
22 | Cross decomposition algorithms find the fundamental relations between two
23 | matrices (X and Y). They are latent variable approaches to modeling the
24 | covariance structures in these two spaces. They will try to find the
25 | multidimensional direction in the X space that explains the maximum
26 | multidimensional variance direction in the Y space. PLS-regression is
27 | particularly suited when the matrix of predictors has more variables than
28 | observations, and when there is multicollinearity among X values. By contrast,
29 | standard regression will fail in these cases.
30 | 
31 | Classes included in this module are :class:`PLSRegression`
32 | :class:`PLSCanonical`, :class:`CCA` and :class:`PLSSVD`
33 | 
34 | 
35 | .. topic:: Reference:
36 | 
37 |    * JA Wegelin
38 |      `A survey of Partial Least Squares (PLS) methods, with emphasis on the two-block case <https://www.stat.washington.edu/research/reports/2000/tr371.pdf>`_
39 | 
40 | .. topic:: Examples:
41 | 
42 |     * :ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py`
43 | 


--------------------------------------------------------------------------------
/sklearn/__check_build/__init__.py:
--------------------------------------------------------------------------------
 1 | """ Module to give helpful messages to the user that did not
 2 | compile scikit-learn properly.
 3 | """
 4 | import os
 5 | 
 6 | INPLACE_MSG = """
 7 | It appears that you are importing a local scikit-learn source tree. For
 8 | this, you need to have an inplace install. Maybe you are in the source
 9 | directory and you need to try from another location."""
10 | 
11 | STANDARD_MSG = """
12 | If you have used an installer, please check that it is suited for your
13 | Python version, your operating system and your platform."""
14 | 
15 | 
16 | def raise_build_error(e):
17 |     # Raise a comprehensible error and list the contents of the
18 |     # directory to help debugging on the mailing list.
19 |     local_dir = os.path.split(__file__)[0]
20 |     msg = STANDARD_MSG
21 |     if local_dir == "sklearn/__check_build":
22 |         # Picking up the local install: this will work only if the
23 |         # install is an 'inplace build'
24 |         msg = INPLACE_MSG
25 |     dir_content = list()
26 |     for i, filename in enumerate(os.listdir(local_dir)):
27 |         if ((i + 1) % 3):
28 |             dir_content.append(filename.ljust(26))
29 |         else:
30 |             dir_content.append(filename + '\n')
31 |     raise ImportError("""%s
32 | ___________________________________________________________________________
33 | Contents of %s:
34 | %s
35 | ___________________________________________________________________________
36 | It seems that scikit-learn has not been built correctly.
37 | 
38 | If you have installed scikit-learn from source, please do not forget
39 | to build the package before using it: run `python setup.py install` or
40 | `make` in the source directory.
41 | %s""" % (e, local_dir, ''.join(dir_content).strip(), msg))
42 | 
43 | try:
44 |     from ._check_build import check_build  # noqa
45 | except ImportError as e:
46 |     raise_build_error(e)
47 | 


--------------------------------------------------------------------------------
/sklearn/cluster/_dbscan_inner.pyx:
--------------------------------------------------------------------------------
 1 | # Fast inner loop for DBSCAN.
 2 | # Author: Lars Buitinck
 3 | # License: 3-clause BSD
 4 | 
 5 | cimport cython
 6 | from libcpp.vector cimport vector
 7 | cimport numpy as np
 8 | import numpy as np
 9 | 
10 | 
11 | # Work around Cython bug: C++ exceptions are not caught unless thrown within
12 | # a cdef function with an "except +" declaration.
13 | cdef inline void push(vector[np.npy_intp] &stack, np.npy_intp i) except +:
14 |     stack.push_back(i)
15 | 
16 | 
17 | @cython.boundscheck(False)
18 | @cython.wraparound(False)
19 | def dbscan_inner(np.ndarray[np.uint8_t, ndim=1, mode='c'] is_core,
20 |                  np.ndarray[object, ndim=1] neighborhoods,
21 |                  np.ndarray[np.npy_intp, ndim=1, mode='c'] labels):
22 |     cdef np.npy_intp i, label_num = 0, v
23 |     cdef np.ndarray[np.npy_intp, ndim=1] neighb
24 |     cdef vector[np.npy_intp] stack
25 | 
26 |     for i in range(labels.shape[0]):
27 |         if labels[i] != -1 or not is_core[i]:
28 |             continue
29 | 
30 |         # Depth-first search starting from i, ending at the non-core points.
31 |         # This is very similar to the classic algorithm for computing connected
32 |         # components, the difference being that we label non-core points as
33 |         # part of a cluster (component), but don't expand their neighborhoods.
34 |         while True:
35 |             if labels[i] == -1:
36 |                 labels[i] = label_num
37 |                 if is_core[i]:
38 |                     neighb = neighborhoods[i]
39 |                     for i in range(neighb.shape[0]):
40 |                         v = neighb[i]
41 |                         if labels[v] == -1:
42 |                             push(stack, v)
43 | 
44 |             if stack.size() == 0:
45 |                 break
46 |             i = stack.back()
47 |             stack.pop_back()
48 | 
49 |         label_num += 1
50 | 


--------------------------------------------------------------------------------
/sklearn/utils/seq_dataset.pxd:
--------------------------------------------------------------------------------
 1 | """Dataset abstractions for sequential data access. """
 2 | 
 3 | cimport numpy as np
 4 | 
 5 | # SequentialDataset and its two concrete subclasses are (optionally randomized)
 6 | # iterators over the rows of a matrix X and corresponding target values y.
 7 | 
 8 | cdef class SequentialDataset:
 9 |     cdef int current_index
10 |     cdef np.ndarray index
11 |     cdef int *index_data_ptr
12 |     cdef Py_ssize_t n_samples
13 |     cdef np.uint32_t seed
14 | 
15 |     cdef void shuffle(self, np.uint32_t seed) nogil
16 |     cdef int _get_next_index(self) nogil
17 |     cdef int _get_random_index(self) nogil
18 | 
19 |     cdef void _sample(self, double **x_data_ptr, int **x_ind_ptr,
20 |                       int *nnz, double *y, double *sample_weight,
21 |                       int current_index) nogil
22 |     cdef void next(self, double **x_data_ptr, int **x_ind_ptr,
23 |                    int *nnz, double *y, double *sample_weight) nogil
24 |     cdef int random(self, double **x_data_ptr, int **x_ind_ptr,
25 |                     int *nnz, double *y, double *sample_weight) nogil
26 | 
27 | 
28 | cdef class ArrayDataset(SequentialDataset):
29 |     cdef np.ndarray X
30 |     cdef np.ndarray Y
31 |     cdef np.ndarray sample_weights
32 |     cdef Py_ssize_t n_features
33 |     cdef np.npy_intp X_stride
34 |     cdef double *X_data_ptr
35 |     cdef double *Y_data_ptr
36 |     cdef np.ndarray feature_indices
37 |     cdef int *feature_indices_ptr
38 |     cdef double *sample_weight_data
39 | 
40 | 
41 | cdef class CSRDataset(SequentialDataset):
42 |     cdef np.ndarray X_data
43 |     cdef np.ndarray X_indptr
44 |     cdef np.ndarray X_indices
45 |     cdef np.ndarray Y
46 |     cdef np.ndarray sample_weights
47 |     cdef double *X_data_ptr
48 |     cdef int *X_indptr_ptr
49 |     cdef int *X_indices_ptr
50 |     cdef double *Y_data_ptr
51 |     cdef double *sample_weight_data
52 | 


--------------------------------------------------------------------------------