├── .gitignore
├── .mailmap
├── .travis.yml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── Cargo.toml
├── DEVELOPMENT.md
├── LICENSE.md
├── README.md
├── benches
    ├── examples
    │   ├── cross_validation.rs
    │   ├── k_means.rs
    │   ├── nnet.rs
    │   └── svm.rs
    └── lib.rs
├── examples
    ├── README.md
    ├── k-means_generating_cluster.rs
    ├── naive_bayes_dogs.rs
    ├── nnet-and_gate.rs
    └── svm-sign_learner.rs
├── src
    ├── analysis
    │   ├── confusion_matrix.rs
    │   ├── cross_validation.rs
    │   └── score.rs
    ├── data
    │   └── transforms
    │   │   ├── minmax.rs
    │   │   ├── mod.rs
    │   │   ├── normalize.rs
    │   │   ├── shuffle.rs
    │   │   └── standardize.rs
    ├── datasets
    │   ├── iris.rs
    │   ├── mod.rs
    │   └── trees.rs
    ├── learning
    │   ├── dbscan.rs
    │   ├── error.rs
    │   ├── glm.rs
    │   ├── gmm.rs
    │   ├── gp.rs
    │   ├── k_means.rs
    │   ├── knn
    │   │   ├── binary_tree.rs
    │   │   ├── brute_force.rs
    │   │   └── mod.rs
    │   ├── lin_reg.rs
    │   ├── logistic_reg.rs
    │   ├── naive_bayes.rs
    │   ├── nnet
    │   │   ├── mod.rs
    │   │   └── net_layer.rs
    │   ├── optim
    │   │   ├── fmincg.rs
    │   │   └── grad_desc.rs
    │   ├── pca.rs
    │   ├── svm.rs
    │   └── toolkit
    │   │   ├── activ_fn.rs
    │   │   ├── cost_fn.rs
    │   │   ├── kernel.rs
    │   │   ├── rand_utils.rs
    │   │   └── regularization.rs
    ├── lib.rs
    ├── prelude.rs
    └── stats
    │   └── dist
    │       ├── exponential.rs
    │       ├── gaussian.rs
    │       └── mod.rs
└── tests
    ├── datasets.rs
    ├── learning
        ├── dbscan.rs
        ├── gp.rs
        ├── k_means.rs
        ├── knn.rs
        ├── lin_reg.rs
        ├── optim
        │   └── grad_desc.rs
        └── pca.rs
    └── lib.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | # compiled code
2 | target/
3 | Cargo.lock
4 | *~
5 | *.swp
6 | *.swo
7 | .vscode
8 | 


--------------------------------------------------------------------------------
/.mailmap:
--------------------------------------------------------------------------------
1 | <LucasJ94@hotmail.co.uk> <AtheMathmo@users.noreply.github.com>
2 | <LucasJ94@hotmail.co.uk> <james.lucas@loopup.com>
3 | James Lucas <LucasJ94@hotmail.co.uk> <LucasJ94@hotmail.co.uk>
4 | <code@zackmdavis.net> <zackmdavis@users.noreply.github.com>
5 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: rust
 2 | rust:
 3 |   - stable
 4 |   - beta
 5 |   - nightly
 6 | matrix:
 7 |   allow_failures:
 8 |     - rust: nightly
 9 | script:
10 |   - cargo build --verbose
11 |   - cargo test --verbose
12 |   - cargo build --features stats
13 |   - cargo test --features stats
14 |   - cargo build --features datasets
15 |   - cargo test --features datasets
16 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to rusty-machine
 2 | 
 3 | First of all, thank you for your interest! I'm very keen to get more contributors onboard and am excited to help out in whichever
 4 | ways I can. This is an early stage, developed-too-fast, library which could really benefit from more contributors.
 5 | 
 6 | Contributing can take place in many forms, including but not limited to:
 7 | 
 8 | - [Bug Reports](#bug-reports)
 9 | - [Feature Requests](#feature-requests)
10 | - [Pull Requests](#pull-requests)
11 | 	- [Interested (but confused)?](#interested-but-confused)
12 | 	- [How can I test this?](#how-can-i-test-out-this-project)
13 | 
14 | Bug Reports and Feature Requests are easy and the project is happily accepting them now. Please fire away!
15 | 
16 | As for Pull Requests I am excited to take on new contributors who can help with the code. Please see the section below about getting started.
17 | 
18 | ---
19 | 
20 | ## Bug Reports
21 | 
22 | If you're using rusty-machine and run into what you believe to be a bug. Then please create an [issue](https://guides.github.com/features/issues/)
23 | to let me know. Even if you're not confident this is a bug I'd prefer to hear about it!
24 | 
25 | In the [issue](https://guides.github.com/features/issues/) please include a description of the bug, and the conditions needed to replicate it.
26 | Minimal conditions would be preferred but I understand this can often be a lot of work. If you can provide an example of the code
27 | producing the bug this would be really handy too!
28 | 
29 | ## Feature Requests
30 | 
31 | I strongly encourage feature requests! I'd love to get feedback and learn what the community wants to see next from this project.
32 | I have my own goals and planned features which can be seen in the [Development](DEVELOPMENT.md) document. Even if a feature is
33 | listed here please feel free to request it regardless - it may affect the order in which I implement things.
34 | 
35 | To request a feature please open an [issue](https://guides.github.com/features/issues/) with a description of the feature requested.
36 | If you can include some technical details and requirements this would be a big help.
37 | 
38 | ## Pull Requests
39 | 
40 | This section will cover the process for making code contributions to rusty-machine. Please feel free to make
41 | suggestions on how to improve this process (an issue on the repository will be fine).
42 | 
43 | ### Getting Started
44 | 
45 | We currently use a [fork](https://help.github.com/articles/fork-a-repo/) and
46 | [pull request](https://help.github.com/articles/using-pull-requests/) model to allow contributions to rusty-machine.
47 | 
48 | Please take a look through the code and [API documentation](https://AtheMathmo.github.io/rusty-machine/)
49 | to identify the areas you'd like to help out with. Take a look through the current issues and see if there's anything you'd like to tackle.
50 | Simple issues will (_should_) be tagged with the label `easy`.
51 | 
52 | If you decide you want to tackle an issue please comment on that issue stating that you would like to work on it.
53 | This will help us keep track of who is working on what.
54 | 
55 | ### Making Code Changes
56 | 
57 | So by now you should have the project forked and are ready to start working on the code.
58 | There are no hard conventions in place at the moment but please follow these general guidelines:
59 | 
60 | - Add comments to all private functions detailing what they do.
61 | - Small pull requests are preferred! If the required change is large consider breaking it up or seek guidance
62 | on how best to proceed.
63 | - Add new tests for any new functionality you add. This means examples within the documentation, unit tests and when
64 | relevant within the tests directory.
65 | - There is (currently) no strict format for commit messages. But please be descriptive about the functionality you have
66 | added.
67 | 
68 | ### Creating the PR
69 | 
70 | Once the issue has been resolved please create the PR from your fork into the `master` branch.
71 | In the comments please reference the issue that the PR addresses, something like: "This resolves #XXX".
72 | 
73 | Other contributors will then review and give feedback. Once accepted the PR will be merged.
74 | 
75 | ---
76 | 
77 | ### Interested but confused?
78 | 
79 | Even now the project is fairly large and a bit overwhelming to join. **I'm happy to help people onboard and will do my best to make the process smooth.**
80 | 
81 | For now I have no special measures in place to assist with this. Due to this I'm happy for potential contributors to create new
82 | issues detailing their interests and we can open a conversation about how you can help out. Additionally please feel free to
83 | comment on existing tickets detailing your interest and ask any questions you need to about how to proceed.
84 | 
85 | ### How can I test out this project?
86 | 
87 | **There are now some examples in the repository!**
88 | 
89 | The [examples](./examples) are designed to illustrate how you can use rusty-machine and will work out-of-the-box.
90 | If these are unclear then please feel free to open up an issue (it will hopefully motivate improvements!).
91 | 
92 | Otherwise I'd encourage you to write your own small projects to test out the algorithms. There are some great datasets [here](https://archive.ics.uci.edu/ml/datasets.html) from UCI. I've done some testing with the [Wisconsin breast cancer data](https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+(Diagnostic)). Please provide feedback based on your experience!
93 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rusty-machine"
 3 | version = "0.5.4"
 4 | authors = ["AtheMathmo"]
 5 | 
 6 | description = "A machine learning library."
 7 | repository = "https://github.com/AtheMathmo/rusty-machine"
 8 | documentation = "https://AtheMathmo.github.io/rusty-machine/"
 9 | keywords = ["machine","learning","stats","data","machine-learning"]
10 | categories = ["science"]
11 | readme = "README.md"
12 | license = "MIT"
13 | 
14 | [features]
15 | stats = []
16 | datasets = []
17 | 
18 | [dependencies]
19 | num = { version = "0.1.41", default-features = false }
20 | rand = "0.4.1"
21 | rulinalg = { git = "https://github.com/AtheMathmo/rulinalg", rev = "1ed8b937" }
22 | 


--------------------------------------------------------------------------------
/DEVELOPMENT.md:
--------------------------------------------------------------------------------
 1 | # Development
 2 | 
 3 | This document will (loosely) keep track of development goals for this project.
 4 | 
 5 | ---
 6 | 
 7 | ## Current Progress
 8 | 
 9 | The linear algebra library previously in rusty-machine is now a new crate - [Rulinalg](https://github.com/AtheMathmo/rulinalg).
10 | 
11 | For full information on what is currently available look at the [crate documentation](https://athemathmo.github.io/rusty-machine/rusty-machine/doc/rusty_machine/index.html).
12 | 
13 | ---
14 | 
15 | ## Goals
16 | 
17 | The table below details some planned features and the release version we are aiming for.
18 | We are actively developing and so expect to move through these at a good pace!
19 | 
20 | <table>
21 |     <tr>
22 |         <th>Version</th><th>Feature</th><th>Dependencies</th>
23 |     </tr>
24 |     <tr>
25 |         <td>0.5.X</td><td>Nearest Neighbours</td><td><ul><li>None</li></ul></td>
26 |     </tr>
27 |     <tr>
28 |         <td>0.6.0</td><td>Model API Improvements</td><td><ul><li>None</li></ul></td>
29 |     </tr>
30 |     <tr>
31 |         <td>0.6.0</td><td>Neural Net Improvements</td><td><ul><li>None</li></ul></td>
32 |     </tr>
33 | </table>
34 | 
35 | Whilst working on the above there will of course be ongoing development on the existing and new machine learning algorithms.
36 | 
37 | ### Unplanned:
38 | 
39 | - Convolutional and Recurrent neural nets.
40 | - SVM coordinate descent as per [this paper](http://www.loshchilov.com/publications/GECCO2011_AdaptiveCoordinateDescent.pdf).
41 | 
42 | ### Why aren't you working on X?
43 | 
44 | If you think there is an obvious feature missing from the library please open an issue about it.
45 | If you want to work on said feature then even better!
46 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 James Lucas
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # rusty-machine
  2 | 
  3 | **This library is no longer actively maintained.**
  4 | 
  5 | [![Join the chat at https://gitter.im/AtheMathmo/rusty-machine](https://badges.gitter.im/AtheMathmo/rusty-machine.svg)](https://gitter.im/AtheMathmo/rusty-machine?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Build Status](https://travis-ci.org/AtheMathmo/rusty-machine.svg?branch=master)](https://travis-ci.org/AtheMathmo/rusty-machine)
  6 | 
  7 | The crate is currently on version [0.5.4](https://crates.io/crates/rusty-machine/).
  8 | 
  9 | Read the [API Documentation](https://AtheMathmo.github.io/rusty-machine/) to learn more.
 10 | 
 11 | And here is a document detailing development efforts. Including a projected timeline for immediate features.
 12 | Please feel free to give feedback and let me know if there any features you believe should take precedence.
 13 | 
 14 | - [Development](DEVELOPMENT.md)
 15 | 
 16 | ---
 17 | 
 18 | ## Summary
 19 | 
 20 | Rusty-machine is a general purpose machine learning library implemented entirely in Rust.
 21 | It aims to combine speed and ease of use - without requiring a huge number of external dependencies.
 22 | 
 23 | This project began as a way for me to learn Rust and brush up on some less familiar machine learning algorithms and techniques.
 24 | Now the project aims to provide a complete, easy to use, machine learning library for Rust.
 25 | 
 26 | This library is still very much in early stages of development. Although there are a good number of algorithms many other
 27 | things are missing. Rusty-machine is probably not the best choice for any serious projects - but hopefully that can change in the near future!
 28 | 
 29 | #### Contributing
 30 | 
 31 | This project is currently looking for contributors of all capacities!
 32 | 
 33 | I have now created a dedicated page for [contributing](CONTRIBUTING.md). If you're interested please take a look.
 34 | 
 35 | ---
 36 | 
 37 | ## Implementation
 38 | 
 39 | This project is implemented using [Rust](https://www.rust-lang.org/). Currently there are no other dependencies!
 40 | Though, we are planning on introducing optional BLAS/LAPACK dependencies soon.
 41 | 
 42 | ---
 43 | 
 44 | ## Current Progress
 45 | 
 46 | Rusty-machine uses [rulinalg](https://github.com/AtheMathmo/rulinalg) for its linear algebra back end.
 47 | This is fairly complete but there is still lots of room for optimization and we should provide BLAS/LAPACK support.
 48 | 
 49 | ### Machine Learning
 50 | 
 51 | - Linear Regression
 52 | - Logistic Regression
 53 | - Generalized Linear Models
 54 | - K-Means Clustering
 55 | - Neural Networks
 56 | - Gaussian Process Regression
 57 | - Support Vector Machines
 58 | - Gaussian Mixture Models
 59 | - Naive Bayes Classifiers
 60 | - DBSCAN
 61 | - k-Nearest Neighbor Classifiers
 62 | - Principal Component Analysis
 63 | 
 64 | There is also a basic `stats` module behind a feature flag.
 65 | 
 66 | ---
 67 | 
 68 | ## Usage
 69 | 
 70 | The library usage is described well in the [API documentation](https://AtheMathmo.github.io/rusty-machine/) - including example code.
 71 | I will provide a brief overview of the library in it's current state and intended usage.
 72 | 
 73 | ### Installation
 74 | 
 75 | The library is most easily used with [cargo](http://doc.crates.io/guide.html). Simply include the following in your Cargo.toml file:
 76 | 
 77 | ```toml
 78 | [dependencies]
 79 | rusty-machine="0.5.4"
 80 | ```
 81 | 
 82 | And then import the library using:
 83 | 
 84 | ```rust
 85 | extern crate rusty_machine as rm;
 86 | ```
 87 | 
 88 | The library consists of two core components. The linear algebra module and the learning module.
 89 | 
 90 | #### Linalg
 91 | 
 92 | The linear algebra module contains reexports from the [rulinalg](https://github.com/AtheMathmo/rulinalg) crate. This is to
 93 | provide easy access to components which are used frequently within rusty-machine.
 94 | 
 95 | More detailed coverage can be found in the [API documentation](https://AtheMathmo.github.io/rusty-machine/).
 96 | 
 97 | #### Learning
 98 | 
 99 | The learning module contains machine learning models. The machine learning implementations are designed with
100 | simpicity and customization in mind. This means you can control the optimization algorithms but still retain
101 | the ease of using default values. This is an area I am actively trying to improve on!
102 | 
103 | The models all provide `predict` and `train` methods enforced by the `SupModel` and `UnSupModel` traits.
104 | 
105 | There are some examples within this repository that can help you familiarize yourself with the library.
106 | 


--------------------------------------------------------------------------------
/benches/examples/cross_validation.rs:
--------------------------------------------------------------------------------
 1 | use rusty_machine::linalg::{Matrix, BaseMatrix};
 2 | use rusty_machine::learning::{LearningResult, SupModel};
 3 | use rusty_machine::analysis::score::row_accuracy;
 4 | use rusty_machine::analysis::cross_validation::k_fold_validate;
 5 | use rand::{thread_rng, Rng};
 6 | use test::{Bencher, black_box};
 7 | 
 8 | fn generate_data(rows: usize, cols: usize) -> Matrix<f64> {
 9 |     let mut rng = thread_rng();
10 |     let mut data = Vec::with_capacity(rows * cols);
11 | 
12 |     for _ in 0..data.capacity() {
13 |         data.push(rng.gen_range(0f64, 1f64));
14 |     }
15 | 
16 |     Matrix::new(rows, cols, data)
17 | }
18 | 
19 | /// A very simple model that looks at all the data it's
20 | /// given but doesn't do anything useful.
21 | /// Stores the sum of all elements in the inputs and targets
22 | /// matrices when trained. Its prediction for each row is the
23 | /// sum of the row's elements plus the precalculated training sum.
24 | struct DummyModel {
25 |     sum: f64
26 | }
27 | 
28 | impl SupModel<Matrix<f64>, Matrix<f64>> for DummyModel {
29 |     fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
30 |         let predictions: Vec<f64> = inputs
31 |             .row_iter()
32 |             .map(|row| { self.sum + sum(row.iter()) })
33 |             .collect();
34 |         Ok(Matrix::new(inputs.rows(), 1, predictions))
35 |     }
36 | 
37 |     fn train(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
38 |         self.sum = sum(inputs.iter()) + sum(targets.iter());
39 |         Ok(())
40 |     }
41 | }
42 | 
43 | fn sum<'a, I: Iterator<Item=&'a f64>>(x: I) -> f64 {
44 |     x.fold(0f64, |acc, x| acc + x)
45 | }
46 | 
47 | macro_rules! bench {
48 |     ($name:ident: $params:expr) => {
49 |         #[bench]
50 |         fn $name(b: &mut Bencher) {
51 |             let (rows, cols, k) = $params;
52 |             let inputs = generate_data(rows, cols);
53 |             let targets = generate_data(rows, 1);
54 | 
55 |             b.iter(|| {
56 |                 let mut model = DummyModel { sum: 0f64 };
57 |                 let _ = black_box(
58 |                     k_fold_validate(&mut model, &inputs, &targets, k, row_accuracy)
59 |                 );
60 |             });
61 |         }
62 |     }
63 | }
64 | 
65 | bench!(bench_10_10_3: (10, 10, 3));
66 | bench!(bench_1000_10_3: (1000, 10, 3));
67 | bench!(bench_1000_10_10: (1000, 10, 10));
68 | bench!(bench_1000_10_100: (1000, 10, 100));
69 | 


--------------------------------------------------------------------------------
/benches/examples/k_means.rs:
--------------------------------------------------------------------------------
 1 | use rusty_machine::linalg::{Matrix, BaseMatrix};
 2 | use rusty_machine::learning::k_means::KMeansClassifier;
 3 | use rusty_machine::learning::UnSupModel;
 4 | 
 5 | use rand::thread_rng;
 6 | use rand::distributions::IndependentSample;
 7 | use rand::distributions::normal::Normal;
 8 | 
 9 | use test::{Bencher, black_box};
10 | 
11 | fn generate_data(centroids: &Matrix<f64>, points_per_centroid: usize, noise: f64) -> Matrix<f64> {
12 |     assert!(centroids.cols() > 0, "Centroids cannot be empty.");
13 |     assert!(centroids.rows() > 0, "Centroids cannot be empty.");
14 |     assert!(noise >= 0f64, "Noise must be non-negative.");
15 |     let mut raw_cluster_data = Vec::with_capacity(centroids.rows() * points_per_centroid *
16 |                                                   centroids.cols());
17 | 
18 |     let mut rng = thread_rng();
19 |     let normal_rv = Normal::new(0f64, noise);
20 | 
21 |     for _ in 0..points_per_centroid {
22 |         // Generate points from each centroid
23 |         for centroid in centroids.row_iter() {
24 |             // Generate a point randomly around the centroid
25 |             let mut point = Vec::with_capacity(centroids.cols());
26 |             for feature in centroid.iter() {
27 |                 point.push(feature + normal_rv.ind_sample(&mut rng));
28 |             }
29 | 
30 |             // Push point to raw_cluster_data
31 |             raw_cluster_data.extend(point);
32 |         }
33 |     }
34 | 
35 |     Matrix::new(centroids.rows() * points_per_centroid,
36 |                 centroids.cols(),
37 |                 raw_cluster_data)
38 | }
39 | 
40 | #[bench]
41 | fn k_means_train(b: &mut Bencher) {
42 | 
43 |     const SAMPLES_PER_CENTROID: usize = 2000;
44 |     // Choose two cluster centers, at (-0.5, -0.5) and (0, 0.5).
45 |     let centroids = Matrix::new(2, 2, vec![-0.5, -0.5, 0.0, 0.5]);
46 | 
47 |     // Generate some data randomly around the centroids
48 |     let samples = generate_data(&centroids, SAMPLES_PER_CENTROID, 0.4);
49 | 
50 |     b.iter(|| {
51 |         let mut model = black_box(KMeansClassifier::new(2));
52 |         let _ = black_box(model.train(&samples).unwrap());
53 |     });
54 | }
55 | 
56 | #[bench]
57 | fn k_means_predict(b: &mut Bencher) {
58 | 
59 |     const SAMPLES_PER_CENTROID: usize = 2000;
60 |     // Choose two cluster centers, at (-0.5, -0.5) and (0, 0.5).
61 |     let centroids = Matrix::new(2, 2, vec![-0.5, -0.5, 0.0, 0.5]);
62 | 
63 |     // Generate some data randomly around the centroids
64 |     let samples = generate_data(&centroids, SAMPLES_PER_CENTROID, 0.4);
65 | 
66 |     let mut model = KMeansClassifier::new(2);
67 |     let _ = model.train(&samples).unwrap();
68 |     b.iter(|| {
69 |         let _ = black_box(model.centroids().as_ref().unwrap());
70 |         let _ = black_box(model.predict(&samples).unwrap());
71 |     });
72 | }
73 | 


--------------------------------------------------------------------------------
/benches/examples/nnet.rs:
--------------------------------------------------------------------------------
 1 | use test::{Bencher, black_box};
 2 | 
 3 | use rand::{random, Closed01};
 4 | use std::vec::Vec;
 5 | 
 6 | use rusty_machine::learning::nnet::{NeuralNet, BCECriterion};
 7 | use rusty_machine::learning::toolkit::regularization::Regularization;
 8 | use rusty_machine::learning::toolkit::activ_fn::Sigmoid;
 9 | use rusty_machine::learning::optim::grad_desc::StochasticGD;
10 | 
11 | use rusty_machine::linalg::Matrix;
12 | use rusty_machine::learning::SupModel;
13 | 
14 | fn generate_data() -> (Matrix<f64>, Matrix<f64>, Matrix<f64>) {
15 |     const THRESHOLD: f64 = 0.7;
16 |     const SAMPLES: usize = 1000;
17 | 
18 |     let mut input_data = Vec::with_capacity(SAMPLES * 2);
19 |     let mut label_data = Vec::with_capacity(SAMPLES);
20 | 
21 |     for _ in 0..SAMPLES {
22 |         // The two inputs are "signals" between 0 and 1
23 |         let Closed01(left) = random::<Closed01<f64>>();
24 |         let Closed01(right) = random::<Closed01<f64>>();
25 |         input_data.push(left);
26 |         input_data.push(right);
27 |         if left > THRESHOLD && right > THRESHOLD {
28 |             label_data.push(1.0);
29 |         } else {
30 |             label_data.push(0.0)
31 |         }
32 |     }
33 | 
34 |     let inputs = Matrix::new(SAMPLES, 2, input_data);
35 |     let targets = Matrix::new(SAMPLES, 1, label_data);
36 | 
37 |     let test_cases = vec![
38 |         0.0, 0.0,
39 |         0.0, 1.0,
40 |         1.0, 1.0,
41 |         1.0, 0.0,
42 |         ];
43 |     let test_inputs = Matrix::new(test_cases.len() / 2, 2, test_cases);
44 | 
45 |     (inputs, targets, test_inputs)
46 | }
47 | 
48 | #[bench]
49 | fn nnet_and_gate_train(b: &mut Bencher) {
50 |     let (inputs, targets, _) = generate_data();
51 |     let layers = &[2, 1];
52 |     let criterion = BCECriterion::new(Regularization::L2(0.));
53 | 
54 |     b.iter(|| {
55 |         let mut model = black_box(NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid));
56 |         let _ = black_box(model.train(&inputs, &targets).unwrap());
57 |     })
58 | }
59 | 
60 | #[bench]
61 | fn nnet_and_gate_predict(b: &mut Bencher) {
62 |     let (inputs, targets, test_inputs) = generate_data();
63 |     let layers = &[2, 1];
64 |     let criterion = BCECriterion::new(Regularization::L2(0.));
65 | 
66 |     let mut model = NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid);
67 |     let _ = model.train(&inputs, &targets);
68 | 
69 |     b.iter(|| {
70 |         let _ = black_box(model.predict(&test_inputs));
71 |     })
72 | }
73 | 


--------------------------------------------------------------------------------
/benches/examples/svm.rs:
--------------------------------------------------------------------------------
 1 | use rusty_machine::learning::svm::SVM;
 2 | // Necessary for the training trait.
 3 | use rusty_machine::learning::SupModel;
 4 | use rusty_machine::learning::toolkit::kernel::HyperTan;
 5 | 
 6 | use rusty_machine::linalg::Matrix;
 7 | use rusty_machine::linalg::Vector;
 8 | 
 9 | use test::{Bencher, black_box};
10 | 
11 | fn generate_data() -> (Matrix<f64>, Vector<f64>) {
12 |     // Training data
13 |     let inputs = Matrix::new(11, 1, vec![
14 |                              -0.1, -2., -9., -101., -666.7,
15 |                              0., 0.1, 1., 11., 99., 456.7
16 |                              ]);
17 |     let targets = Vector::new(vec![
18 |                               -1., -1., -1., -1., -1.,
19 |                               1., 1., 1., 1., 1., 1.
20 |                               ]);
21 | 
22 |     (inputs, targets)
23 | }
24 | 
25 | // Sign learner:
26 | //   * Model input a float number
27 | //   * Model output: A float representing the input sign.
28 | //       If the input is positive, the output is close to 1.0.
29 | //       If the input is negative, the output is close to -1.0.
30 | //   * Model generated with the SVM API.
31 | #[bench]
32 | fn svm_sign_learner_train(b: &mut Bencher) {
33 |     let (inputs, targets) = generate_data();
34 | 
35 |     // Trainee
36 |     b.iter(|| {
37 |         let mut svm_mod = black_box(SVM::new(HyperTan::new(100., 0.), 0.3));
38 |         let _ = black_box(svm_mod.train(&inputs, &targets).unwrap());
39 |     });
40 | }
41 | 
42 | #[bench]
43 | fn svm_sign_learner_predict(b: &mut Bencher) {
44 |     let (inputs, targets) = generate_data();
45 | 
46 |     let test_data = (-1000..1000).filter(|&x| x % 100 == 0).map(|x| x as f64).collect::<Vec<_>>();
47 |     let test_inputs = Matrix::new(test_data.len(), 1, test_data);
48 |     let mut svm_mod = SVM::new(HyperTan::new(100., 0.), 0.3);
49 |     let _ = svm_mod.train(&inputs, &targets);
50 |     b.iter(|| {
51 |         let _ = black_box(svm_mod.predict(&test_inputs).unwrap());
52 |     });
53 | }
54 | 


--------------------------------------------------------------------------------
/benches/lib.rs:
--------------------------------------------------------------------------------
 1 | #![feature(test)]
 2 | 
 3 | extern crate rusty_machine;
 4 | extern crate test;
 5 | extern crate rand;
 6 | 
 7 | mod examples {
 8 |     mod cross_validation;
 9 |     mod k_means;
10 |     mod nnet;
11 |     mod svm;
12 | }
13 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
  1 | Examples with rusty-machine
  2 | 
  3 | This directory gathers fully-fledged programs, each using a piece of
  4 | `rusty-machine`'s API.
  5 | 
  6 | ## Overview
  7 | 
  8 | * [K-Means](#k-means)
  9 | * [SVM](#svm)
 10 | * [Neural Networks](#neural-networks)
 11 | * [Naïve Bayes](#naïve-bayes)
 12 | 
 13 | ## The Examples
 14 | 
 15 | ### K Means
 16 | 
 17 | #### Generating Clusters
 18 | 
 19 | [Generating Clusters](k-means_generating_clusters.rs) randomly generates data around a pair of clusters.
 20 | It then trains a K-Means model to learn new centroids from this sample.
 21 | 
 22 | The example shows a basic usage of the K-Means API - an Unsupervised model. We also show some basic usage
 23 | of [rulinalg](https://github.com/AtheMathmo/rulinalg) to generate the data.
 24 | 
 25 | Sample run:
 26 | 
 27 | ```
 28 | cargo run --example k-means_generating_cluster
 29 |    Compiling rusty-machine v0.4.0 (file:///rusty-machine/rusty-machine)
 30 |      Running `target/debug/examples/k-means_generating_cluster`
 31 | K-Means clustering example:
 32 | Generating 2000 samples from each centroids:
 33 | ⎡-0.5 -0.5⎤
 34 | ⎣   0  0.5⎦
 35 | Training the model...
 36 | Model Centroids:
 37 | ⎡-0.812 -0.888⎤
 38 | ⎣-0.525  0.877⎦
 39 | Classifying the samples...
 40 | Samples closest to first centroid: 1878
 41 | Samples closest to second centroid: 2122
 42 | ```
 43 | 
 44 | ### SVM
 45 | 
 46 | #### Sign Learner
 47 | 
 48 | [Sign learner](svm-sign_learner.rs) constructs and evaluates a model that learns to recognize the sign of an input number.
 49 | 
 50 | The sample shows a basic usage of the SVM API. It also configures the SVM algorithm with a specific kernel (`HyperTan`).
 51 | Evaluations are run in a loop to log individual predictions and do some book keeping for reporting the performance at the end.
 52 | The salient part from `rusty-machine` is to use the `train` and `predict` methods of the SVM model.
 53 | 
 54 | The accuracy evaluation is simplistic, so the model manages 100% accuracy (which is *really* too simple an example).
 55 | 
 56 | Sample run:
 57 | 
 58 | ```
 59 | cargo run --example svm-sign_learner
 60 |    Compiling rusty-machine v0.3.0 (file:///rusty-machine/rusty-machine)
 61 |      Running `target/debug/examples/svm-sign_learner`
 62 | Sign learner sample:
 63 | Training...
 64 | Evaluation...
 65 | -1000 -> -1: true
 66 | -900 -> -1: true
 67 | -800 -> -1: true
 68 | -700 -> -1: true
 69 | -600 -> -1: true
 70 | -500 -> -1: true
 71 | -400 -> -1: true
 72 | -300 -> -1: true
 73 | -200 -> -1: true
 74 | -100 -> -1: true
 75 | 0 -> -1: true
 76 | 100 -> 1: true
 77 | 200 -> 1: true
 78 | 300 -> 1: true
 79 | 400 -> 1: true
 80 | 500 -> 1: true
 81 | 600 -> 1: true
 82 | 700 -> 1: true
 83 | 800 -> 1: true
 84 | 900 -> 1: true
 85 | Performance report:
 86 | Hits: 20, Misses: 0
 87 | Accuracy: 100
 88 | ```
 89 | 
 90 | ### Neural Networks
 91 | 
 92 | #### AND Gate
 93 | 
 94 | [AND gate](nnet-and_gate.rs) makes an AND gate out of a perceptron.
 95 | 
 96 | The sample code generates random data to learn from.
 97 | The input data is like an electric signal between 0 and 1, with some jitter that makes it not quite 0 or 1.
 98 | By default, the code decides that any pair input "above"
 99 | (0.7, 0.7) is labeled as 1.0 (AND gate passing), otherwise labeled as 0.0 (AND gate blocking).
100 | This means that the training set is biased toward learning the passing scenario: An AND gate passes
101 | 25% of the time on average, and we'd like it to learn it.
102 | 
103 | The test data uses only the 4 "perfect" inputs for a gate: (0.0, 0.0), (1.0, 0.0), etc.
104 | 
105 | The code generates 10,000 training data points by default. Please give it a try, and then change `SAMPLE`,
106 | the number of training data points, and `THRESHOLD`, the value for "deciding" for a passing gate.
107 | 
108 | Sample run:
109 | 
110 | ```
111 | > cargo run --example nnet-and_gate
112 |    Compiling rusty-machine v0.3.0 (file:///rusty-machine/rusty-machine)
113 |      Running `target/debug/examples/nnet-and_gate`
114 | AND gate learner sample:
115 | Generating 10000 training data and labels...
116 | Training...
117 | Evaluation...
118 | Got  Expected
119 | 0.00  0
120 | 0.00  0
121 | 0.96  1
122 | 0.01  0
123 | Hits: 4, Misses: 0
124 | Accuracy: 100%
125 | ```
126 | 
127 | ### Naïve Bayes
128 | 
129 | #### Dog Classification
130 | 
131 | Suppose we have a population composed of red dogs and white dogs,
132 | whose friendliness, furriness, and speed can be measured. In this
133 | example we train a Naïve Bayes model to determine whether
134 | a dog is white or red.
135 | 
136 | The group of white dogs are friendlier, furrier, and slower than
137 | the red dogs. Given the color of a dog, friendliness, furriness,
138 | and speed are independent of each other (a requirement of the Naïve
139 | Bayes model).
140 | 
141 | In the example code we will generate our own data and then train
142 | our model using it. This is a common technique used to validate
143 | a model. We generate the data by sampling each of the dogs features
144 | from Gaussian random variables. We will have a total of 6 Gaussian
145 | random variables representing three features for both colors of dog.
146 | As we are using Gaussian random variables we will use a Gaussian
147 | Naive Bayes model. Once we have generated our data we will convert
148 | it into `Matrix` structures and train our model.
149 | 
150 | 
151 | Sample run:
152 | 
153 | ```
154 | $ cargo run --example naive_bayes_dogs
155 | ...
156 | Predicted: Red; Actual: Red; Accurate? true
157 | Predicted: Red; Actual: Red; Accurate? true
158 | Predicted: White; Actual: Red; Accurate? false
159 | Predicted: Red; Actual: White; Accurate? false
160 | Predicted: Red; Actual: Red; Accurate? true
161 | Predicted: White; Actual: White; Accurate? true
162 | Predicted: White; Actual: White; Accurate? true
163 | Predicted: White; Actual: White; Accurate? true
164 | Predicted: White; Actual: White; Accurate? true
165 | Predicted: Red; Actual: Red; Accurate? true
166 | Accuracy: 822/1000 = 82.2%
167 | ```
168 | 


--------------------------------------------------------------------------------
/examples/k-means_generating_cluster.rs:
--------------------------------------------------------------------------------
 1 | extern crate rusty_machine;
 2 | extern crate rand;
 3 | 
 4 | use rusty_machine::linalg::{Matrix, BaseMatrix};
 5 | use rusty_machine::learning::k_means::KMeansClassifier;
 6 | use rusty_machine::learning::UnSupModel;
 7 | 
 8 | use rand::thread_rng;
 9 | use rand::distributions::IndependentSample;
10 | use rand::distributions::normal::Normal;
11 | 
12 | fn generate_data(centroids: &Matrix<f64>,
13 |                  points_per_centroid: usize,
14 |                  noise: f64)
15 |                  -> Matrix<f64> {
16 |     assert!(centroids.cols() > 0, "Centroids cannot be empty.");
17 |     assert!(centroids.rows() > 0, "Centroids cannot be empty.");
18 |     assert!(noise >= 0f64, "Noise must be non-negative.");
19 |     let mut raw_cluster_data = Vec::with_capacity(centroids.rows() * points_per_centroid *
20 |                                                   centroids.cols());
21 | 
22 |     let mut rng = thread_rng();
23 |     let normal_rv = Normal::new(0f64, noise);
24 | 
25 |     for _ in 0..points_per_centroid {
26 |         // Generate points from each centroid
27 |         for centroid in centroids.row_iter() {
28 |             // Generate a point randomly around the centroid
29 |             let mut point = Vec::with_capacity(centroids.cols());
30 |             for feature in centroid.iter() {
31 |                 point.push(feature + normal_rv.ind_sample(&mut rng));
32 |             }
33 | 
34 |             // Push point to raw_cluster_data
35 |             raw_cluster_data.extend(point);
36 |         }
37 |     }
38 | 
39 |     Matrix::new(centroids.rows() * points_per_centroid,
40 |                 centroids.cols(),
41 |                 raw_cluster_data)
42 | }
43 | 
44 | fn main() {
45 |     println!("K-Means clustering example:");
46 | 
47 |     const SAMPLES_PER_CENTROID: usize = 2000;
48 | 
49 |     println!("Generating {0} samples from each centroids:",
50 |              SAMPLES_PER_CENTROID);
51 |     // Choose two cluster centers, at (-0.5, -0.5) and (0, 0.5).
52 |     let centroids = Matrix::new(2, 2, vec![-0.5, -0.5, 0.0, 0.5]);
53 |     println!("{}", centroids);
54 | 
55 |     // Generate some data randomly around the centroids
56 |     let samples = generate_data(&centroids, SAMPLES_PER_CENTROID, 0.4);
57 | 
58 |     // Create a new model with 2 clusters
59 |     let mut model = KMeansClassifier::new(2);
60 | 
61 |     // Train the model
62 |     println!("Training the model...");
63 |     // Our train function returns a Result<(), E>
64 |     model.train(&samples).unwrap();
65 | 
66 |     let centroids = model.centroids().as_ref().unwrap();
67 |     println!("Model Centroids:\n{:.3}", centroids);
68 | 
69 |     // Predict the classes and partition into
70 |     println!("Classifying the samples...");
71 |     let classes = model.predict(&samples).unwrap();
72 |     let (first, second): (Vec<usize>, Vec<usize>) = classes.data().iter().partition(|&x| *x == 0);
73 | 
74 |     println!("Samples closest to first centroid: {}", first.len());
75 |     println!("Samples closest to second centroid: {}", second.len());
76 | }
77 | 


--------------------------------------------------------------------------------
/examples/naive_bayes_dogs.rs:
--------------------------------------------------------------------------------
  1 | extern crate rusty_machine;
  2 | extern crate rand;
  3 | 
  4 | use rand::Rand;
  5 | use rand::distributions::Sample;
  6 | use rand::distributions::normal::Normal;
  7 | use rusty_machine::learning::naive_bayes::{self, NaiveBayes};
  8 | use rusty_machine::linalg::{Matrix, BaseMatrix};
  9 | use rusty_machine::learning::SupModel;
 10 | 
 11 | 
 12 | #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 13 | enum Color {
 14 |     Red,
 15 |     White,
 16 | }
 17 | 
 18 | #[derive(Clone, Debug)]
 19 | struct Dog {
 20 |     color: Color,
 21 |     friendliness: f64,
 22 |     furriness: f64,
 23 |     speed: f64,
 24 | }
 25 | 
 26 | impl Rand for Dog {
 27 |     /// Generate a random dog.
 28 |     fn rand<R: rand::Rng>(rng: &mut R) -> Self {
 29 |         // Friendliness, furriness, and speed are normally distributed and
 30 |         // (given color:) independent.
 31 |         let mut red_dog_friendliness = Normal::new(0., 1.);
 32 |         let mut red_dog_furriness = Normal::new(0., 1.);
 33 |         let mut red_dog_speed = Normal::new(0., 1.);
 34 | 
 35 |         let mut white_dog_friendliness = Normal::new(1., 1.);
 36 |         let mut white_dog_furriness = Normal::new(1., 1.);
 37 |         let mut white_dog_speed = Normal::new(-1., 1.);
 38 | 
 39 |         // Flip a coin to decide whether to generate a red or white dog.
 40 |         let coin: f64 = rng.gen();
 41 |         let color = if coin < 0.5 { Color::Red } else { Color::White };
 42 | 
 43 |         match color {
 44 |             Color::Red => {
 45 |                 Dog {
 46 |                     color: Color::Red,
 47 |                     // sample from our normal distributions for each trait
 48 |                     friendliness: red_dog_friendliness.sample(rng),
 49 |                     furriness: red_dog_furriness.sample(rng),
 50 |                     speed: red_dog_speed.sample(rng),
 51 |                 }
 52 |             },
 53 |             Color::White => {
 54 |                 Dog {
 55 |                     color: Color::White,
 56 |                     friendliness: white_dog_friendliness.sample(rng),
 57 |                     furriness: white_dog_furriness.sample(rng),
 58 |                     speed: white_dog_speed.sample(rng),
 59 |                 }
 60 |             },
 61 |         }
 62 |     }
 63 | }
 64 | 
 65 | fn generate_dog_data(training_set_size: u32, test_set_size: u32)
 66 |     -> (Matrix<f64>, Matrix<f64>, Matrix<f64>, Vec<Dog>) {
 67 |     let mut randomness = rand::StdRng::new()
 68 |         .expect("we should be able to get an RNG");
 69 |     let rng = &mut randomness;
 70 | 
 71 |     // We'll train the model on these dogs
 72 |     let training_dogs = (0..training_set_size)
 73 |         .map(|_| { Dog::rand(rng) })
 74 |         .collect::<Vec<_>>();
 75 | 
 76 |     // ... and then use the model to make predictions about these dogs' color
 77 |     // given only their trait measurements.
 78 |     let test_dogs = (0..test_set_size)
 79 |         .map(|_| { Dog::rand(rng) })
 80 |         .collect::<Vec<_>>();
 81 | 
 82 |     // The model's `.train` method will take two matrices, each with a row for
 83 |     // each dog in the training set: the rows in the first matrix contain the
 84 |     // trait measurements; the rows in the second are either [1, 0] or [0, 1]
 85 |     // to indicate color.
 86 |     let training_data: Vec<f64> = training_dogs.iter()
 87 |         .flat_map(|dog| vec![dog.friendliness, dog.furriness, dog.speed])
 88 |         .collect();
 89 |     let training_matrix: Matrix<f64> = training_data.chunks(3).collect();
 90 |     let target_data: Vec<f64> = training_dogs.iter()
 91 |         .flat_map(|dog| match dog.color {
 92 |             Color::Red => vec![1., 0.],
 93 |             Color::White => vec![0., 1.],
 94 |         })
 95 |         .collect();
 96 |     let target_matrix: Matrix<f64> = target_data.chunks(2).collect();
 97 | 
 98 |     // Build another matrix for the test set of dogs to make predictions about.
 99 |     let test_data: Vec<f64> = test_dogs.iter()
100 |         .flat_map(|dog| vec![dog.friendliness, dog.furriness, dog.speed])
101 |         .collect();
102 |     let test_matrix: Matrix<f64> = test_data.chunks(3).collect();
103 | 
104 |     (training_matrix, target_matrix, test_matrix, test_dogs)
105 | }
106 | 
107 | fn evaluate_prediction(hits: &mut u32, dog: &Dog, prediction: &[f64]) -> (Color, bool) {
108 |     let predicted_color = dog.color;
109 |     let actual_color = if prediction[0] == 1. {
110 |         Color::Red
111 |     } else {
112 |         Color::White
113 |     };
114 |     let accurate = predicted_color == actual_color;
115 |     if accurate {
116 |         *hits += 1;
117 |     }
118 |     (actual_color, accurate)
119 | }
120 | 
121 | fn main() {
122 |     let (training_set_size, test_set_size) = (1000, 1000);
123 |     // Generate all of our train and test data
124 |     let (training_matrix, target_matrix, test_matrix, test_dogs) = generate_dog_data(training_set_size, test_set_size);
125 | 
126 |     // Train!
127 |     let mut model = NaiveBayes::<naive_bayes::Gaussian>::new();
128 |     model.train(&training_matrix, &target_matrix)
129 |         .expect("failed to train model of dogs");
130 | 
131 |     // Predict!
132 |     let predictions = model.predict(&test_matrix)
133 |         .expect("failed to predict dogs!?");
134 | 
135 |     // Score how well we did.
136 |     let mut hits = 0;
137 |     let unprinted_total = test_set_size.saturating_sub(10) as usize;
138 |     for (dog, prediction) in test_dogs.iter().zip(predictions.row_iter()).take(unprinted_total) {
139 |         evaluate_prediction(&mut hits, dog, prediction.raw_slice());
140 |     }
141 |     
142 |     if unprinted_total > 0 {
143 |         println!("...");
144 |     }
145 |     
146 |     for (dog, prediction) in test_dogs.iter().zip(predictions.row_iter()).skip(unprinted_total) {
147 |         let (actual_color, accurate) = evaluate_prediction(&mut hits, dog, prediction.raw_slice());
148 |         println!("Predicted: {:?}; Actual: {:?}; Accurate? {:?}",
149 |                  dog.color, actual_color, accurate);
150 |     }
151 | 
152 |     println!("Accuracy: {}/{} = {:.1}%", hits, test_set_size,
153 |              (f64::from(hits))/(f64::from(test_set_size)) * 100.);
154 | }
155 | 


--------------------------------------------------------------------------------
/examples/nnet-and_gate.rs:
--------------------------------------------------------------------------------
 1 | extern crate rusty_machine;
 2 | extern crate rand;
 3 | 
 4 | use rand::{random, Closed01};
 5 | use std::vec::Vec;
 6 | 
 7 | use rusty_machine::learning::nnet::{NeuralNet, BCECriterion};
 8 | use rusty_machine::learning::toolkit::regularization::Regularization;
 9 | use rusty_machine::learning::toolkit::activ_fn::Sigmoid;
10 | use rusty_machine::learning::optim::grad_desc::StochasticGD;
11 | 
12 | use rusty_machine::linalg::Matrix;
13 | use rusty_machine::learning::SupModel;
14 | 
15 | // AND gate
16 | fn main() {
17 |     println!("AND gate learner sample:");
18 | 
19 |     const THRESHOLD: f64 = 0.7;
20 | 
21 |     const SAMPLES: usize = 10000;
22 |     println!("Generating {} training data and labels...", SAMPLES as u32);
23 | 
24 |     let mut input_data = Vec::with_capacity(SAMPLES * 2);
25 |     let mut label_data = Vec::with_capacity(SAMPLES);
26 | 
27 |     for _ in 0..SAMPLES {
28 |         // The two inputs are "signals" between 0 and 1
29 |         let Closed01(left) = random::<Closed01<f64>>();
30 |         let Closed01(right) = random::<Closed01<f64>>();
31 |         input_data.push(left);
32 |         input_data.push(right);
33 |         if left > THRESHOLD && right > THRESHOLD {
34 |             label_data.push(1.0);
35 |         } else {
36 |             label_data.push(0.0)
37 |         }
38 |     }
39 | 
40 |     let inputs = Matrix::new(SAMPLES, 2, input_data);
41 |     let targets = Matrix::new(SAMPLES, 1, label_data);
42 | 
43 |     let layers = &[2, 1];
44 |     let criterion = BCECriterion::new(Regularization::L2(0.));
45 |     // Create a multilayer perceptron with an input layer of size 2 and output layer of size 1
46 |     // Uses a Sigmoid activation function and uses Stochastic gradient descent for training
47 |     let mut model = NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid);
48 | 
49 |     println!("Training...");
50 |     // Our train function returns a Result<(), E>
51 |     model.train(&inputs, &targets).unwrap();
52 | 
53 |     let test_cases = vec![
54 |         0.0, 0.0,
55 |         0.0, 1.0,
56 |         1.0, 1.0,
57 |         1.0, 0.0,
58 |         ];
59 |     let expected = vec![
60 |         0.0,
61 |         0.0,
62 |         1.0,
63 |         0.0,
64 |         ];
65 |     let test_inputs = Matrix::new(test_cases.len() / 2, 2, test_cases);
66 |     let res = model.predict(&test_inputs).unwrap();
67 | 
68 |     println!("Evaluation...");
69 |     let mut hits = 0;
70 |     let mut misses = 0;
71 |     // Evaluation
72 |     println!("Got\tExpected");
73 |     for (idx, prediction) in res.into_vec().iter().enumerate() {
74 |         println!("{:.2}\t{}", prediction, expected[idx]);
75 |         if (prediction - 0.5) * (expected[idx] - 0.5) > 0. {
76 |             hits += 1;
77 |         } else {
78 |             misses += 1;
79 |         }
80 |     }
81 | 
82 |     println!("Hits: {}, Misses: {}", hits, misses);
83 |     let hits_f = hits as f64;
84 |     let total = (hits + misses) as f64;
85 |     println!("Accuracy: {}%", (hits_f / total) * 100.);
86 | }
87 | 


--------------------------------------------------------------------------------
/examples/svm-sign_learner.rs:
--------------------------------------------------------------------------------
 1 | extern crate rusty_machine;
 2 | 
 3 | use rusty_machine::learning::svm::SVM;
 4 | // Necessary for the training trait.
 5 | use rusty_machine::learning::SupModel;
 6 | use rusty_machine::learning::toolkit::kernel::HyperTan;
 7 | 
 8 | use rusty_machine::linalg::Matrix;
 9 | use rusty_machine::linalg::Vector;
10 | 
11 | // Sign learner:
12 | //   * Model input a float number
13 | //   * Model output: A float representing the input sign.
14 | //       If the input is positive, the output is close to 1.0.
15 | //       If the input is negative, the output is close to -1.0.
16 | //   * Model generated with the SVM API.
17 | fn main() {
18 |     println!("Sign learner sample:");
19 | 
20 |     println!("Training...");
21 |     // Training data
22 |     let inputs = Matrix::new(11, 1, vec![
23 |                              -0.1, -2., -9., -101., -666.7,
24 |                              0., 0.1, 1., 11., 99., 456.7
25 |                              ]);
26 |     let targets = Vector::new(vec![
27 |                               -1., -1., -1., -1., -1.,
28 |                               1., 1., 1., 1., 1., 1.
29 |                               ]);
30 | 
31 |     // Trainee
32 |     let mut svm_mod = SVM::new(HyperTan::new(100., 0.), 0.3);
33 |     // Our train function returns a Result<(), E>
34 |     svm_mod.train(&inputs, &targets).unwrap();
35 | 
36 |     println!("Evaluation...");
37 |     let mut hits = 0;
38 |     let mut misses = 0;
39 |     // Evaluation
40 |     //   Note: We could pass all input values at once to the `predict` method!
41 |     //         Here, we use a loop just to count and print logs.
42 |     for n in (-1000..1000).filter(|&x| x % 100 == 0) {
43 |         let nf = n as f64;
44 |         let input = Matrix::new(1, 1, vec![nf]);
45 |         let out = svm_mod.predict(&input).unwrap();
46 |         let res = if out[0] * nf > 0. {
47 |             hits += 1;
48 |             true
49 |         } else if nf == 0. {
50 |             hits += 1;
51 |             true
52 |         } else {
53 |             misses += 1;
54 |             false
55 |         };
56 | 
57 |         println!("{} -> {}: {}", Matrix::data(&input)[0], out[0], res);
58 |     }
59 | 
60 |     println!("Performance report:");
61 |     println!("Hits: {}, Misses: {}", hits, misses);
62 |     let hits_f = hits as f64;
63 |     let total = (hits + misses) as f64;
64 |     println!("Accuracy: {}", (hits_f / total) * 100.);
65 | }
66 | 


--------------------------------------------------------------------------------
/src/analysis/confusion_matrix.rs:
--------------------------------------------------------------------------------
  1 | //! Module to compute the confusion matrix of a set of predictions.
  2 | 
  3 | use std::hash::Hash;
  4 | use std::collections::HashMap;
  5 | use linalg::Matrix;
  6 | 
  7 | /// Returns a square matrix C where C_ij is the count of the samples which were
  8 | /// predicted to lie in the class with jth label but actually lie in the class with
  9 | /// ith label.
 10 | ///
 11 | /// # Arguments
 12 | /// * `predictions` - A series of model predictions.
 13 | /// * `targets`     - A slice of equal length to predictions, containing the
 14 | ///                   target results.
 15 | /// * `labels`      - If None then the rows and columns of the returned matrix
 16 | ///                   correspond to the distinct labels appearing in either
 17 | ///                   predictions or targets, in increasing order.
 18 | ///                   If Some then the rows and columns correspond to the provided
 19 | ///                   labels, in the provided order. Note that in this case the
 20 | ///                   confusion matrix will only contain entries for the elements
 21 | ///                   of `labels`.
 22 | ///
 23 | /// # Examples
 24 | /// ```
 25 | /// use rusty_machine::analysis::confusion_matrix::confusion_matrix;
 26 | /// use rusty_machine::linalg::Matrix;
 27 | ///
 28 | /// let truth       = vec![2, 0, 2, 2, 0, 1];
 29 | /// let predictions = vec![0, 0, 2, 2, 0, 2];
 30 | ///
 31 | /// let confusion = confusion_matrix(&predictions, &truth, None);
 32 | ///
 33 | /// let expected = Matrix::new(3, 3, vec![
 34 | ///     2, 0, 0,
 35 | ///     0, 0, 1,
 36 | ///     1, 0, 2]);
 37 | ///
 38 | /// assert_eq!(confusion, expected);
 39 | /// ```
 40 | /// # Panics
 41 | ///
 42 | /// - If user-provided labels are not distinct.
 43 | /// - If predictions and targets have different lengths.
 44 | pub fn confusion_matrix<T>(predictions: &[T],
 45 |                            targets: &[T],
 46 |                            labels: Option<Vec<T>>) -> Matrix<usize>
 47 |     where T: Ord + Eq + Hash + Copy
 48 | {
 49 |     assert!(predictions.len() == targets.len(),
 50 |         "predictions and targets have different lengths");
 51 | 
 52 |     let labels = match labels {
 53 |         Some(ls) => ls,
 54 |         None => ordered_distinct(predictions, targets)
 55 |     };
 56 | 
 57 |     let mut label_to_index: HashMap<T, usize> = HashMap::new();
 58 |     for (i, l) in labels.iter().enumerate() {
 59 |         match label_to_index.insert(*l, i) {
 60 |             None => {},
 61 |             Some(_) => { panic!("labels must be distinct"); }
 62 |         }
 63 |     }
 64 | 
 65 |     let mut counts = Matrix::new(labels.len(), labels.len(),
 66 |         vec![0usize; labels.len() * labels.len()]);
 67 | 
 68 |     for (truth, pred) in targets.iter().zip(predictions) {
 69 |         if label_to_index.contains_key(truth) && label_to_index.contains_key(pred) {
 70 |             let row = label_to_index[truth];
 71 |             let col = label_to_index[pred];
 72 | 
 73 |             counts[[row, col]] += 1;
 74 |         }
 75 |     }
 76 | 
 77 |     counts
 78 | }
 79 | 
 80 | fn ordered_distinct<T: Ord + Eq + Copy>(xs: &[T], ys: &[T]) -> Vec<T> {
 81 |     let mut ds: Vec<T> = xs.iter().chain(ys).cloned().collect();
 82 |     ds.sort();
 83 |     ds.dedup();
 84 |     ds
 85 | }
 86 | 
 87 | #[cfg(test)]
 88 | mod tests {
 89 |     use super::confusion_matrix;
 90 | 
 91 |     #[test]
 92 |     fn confusion_matrix_no_labels() {
 93 |         let truth       = vec![2, 0, 2, 2, 0, 1];
 94 |         let predictions = vec![0, 0, 2, 2, 0, 2];
 95 | 
 96 |         let confusion = confusion_matrix(&predictions, &truth, None);
 97 | 
 98 |         let expected = matrix!(2, 0, 0;
 99 |                                0, 0, 1;
100 |                                1, 0, 2);
101 | 
102 |         assert_eq!(confusion, expected);
103 |     }
104 | 
105 |     #[test]
106 |     fn confusion_matrix_with_labels_a_permutation_of_classes() {
107 |         let truth       = vec![2, 0, 2, 2, 0, 1];
108 |         let predictions = vec![0, 0, 2, 2, 0, 2];
109 | 
110 |         let labels = vec![2, 1, 0];
111 |         let confusion = confusion_matrix(&predictions, &truth, Some(labels));
112 | 
113 |         let expected = matrix!(2, 0, 1;
114 |                                1, 0, 0;
115 |                                0, 0, 2);
116 | 
117 |         assert_eq!(confusion, expected);
118 |     }
119 | 
120 |     #[test]
121 |     fn confusion_matrix_accepts_labels_intersecting_targets_and_disjoint_from_predictions() {
122 |         let truth        = vec![2, 0, 2, 2, 3, 1];
123 |         let predictions  = vec![0, 0, 2, 2, 0, 2];
124 | 
125 |         let labels = vec![1, 3];
126 |         let confusion = confusion_matrix(&predictions, &truth, Some(labels));
127 | 
128 |         let expected = matrix!(0, 0;
129 |                                0, 0);
130 | 
131 |         assert_eq!(confusion, expected);
132 |     }
133 | 
134 |     #[test]
135 |     fn confusion_matrix_accepts_labels_intersecting_predictions_and_disjoint_from_targets() {
136 |         let truth       = vec![0, 0, 2, 2, 0, 2];
137 |         let predictions = vec![2, 0, 2, 2, 3, 1];
138 | 
139 |         let labels = vec![1, 3];
140 |         let confusion = confusion_matrix(&predictions, &truth, Some(labels));
141 | 
142 |         let expected = matrix!(0, 0;
143 |                                0, 0);
144 | 
145 |         assert_eq!(confusion, expected);
146 |     }
147 | 
148 |     #[test]
149 |     fn confusion_matrix_accepts_labels_disjoint_from_predictions_and_targets() {
150 |         let truth       = vec![0, 0, 2, 2, 0, 2];
151 |         let predictions = vec![2, 0, 2, 2, 3, 1];
152 | 
153 |         let labels = vec![4, 5];
154 |         let confusion = confusion_matrix(&predictions, &truth, Some(labels));
155 | 
156 |         let expected = matrix!(0, 0;
157 |                                0, 0);
158 | 
159 |         assert_eq!(confusion, expected);
160 |     }
161 | 
162 |     #[test]
163 |     #[should_panic]
164 |     fn confusion_matrix_rejects_duplicate_labels() {
165 |         let truth       = vec![0, 0, 2, 2, 0, 2];
166 |         let predictions = vec![2, 0, 2, 2, 3, 1];
167 | 
168 |         let labels = vec![1, 1];
169 |         let _ = confusion_matrix(&predictions, &truth, Some(labels));
170 |     }
171 | 
172 |     #[test]
173 |     #[should_panic]
174 |     fn confusion_matrix_rejects_mismatched_prediction_and_target_lengths() {
175 |         let truth       = vec![0, 0, 2, 2, 0, 2];
176 |         let predictions = vec![2, 0, 2, 2];
177 |         let _ = confusion_matrix(&predictions, &truth, None);
178 |     }
179 | }
180 | 


--------------------------------------------------------------------------------
/src/analysis/cross_validation.rs:
--------------------------------------------------------------------------------
  1 | //! Module for performing cross-validation of models.
  2 | 
  3 | use std::cmp;
  4 | use std::iter::Chain;
  5 | use std::slice::Iter;
  6 | use linalg::{BaseMatrix, Matrix};
  7 | use learning::{LearningResult, SupModel};
  8 | use learning::toolkit::rand_utils::in_place_fisher_yates;
  9 | 
 10 | /// Randomly splits the inputs into k 'folds'. For each fold a model
 11 | /// is trained using all inputs except for that fold, and tested on the
 12 | /// data in the fold. Returns the scores for each fold.
 13 | ///
 14 | /// # Arguments
 15 | /// * `model` - Used to train and predict for each fold.
 16 | /// * `inputs` - All input samples.
 17 | /// * `targets` - All targets.
 18 | /// * `k` - Number of folds to use.
 19 | /// * `score` - Used to compare the outputs for each fold to the targets. Higher scores are better. See the `analysis::score` module for examples.
 20 | ///
 21 | /// # Examples
 22 | /// ```
 23 | /// use rusty_machine::analysis::cross_validation::k_fold_validate;
 24 | /// use rusty_machine::analysis::score::row_accuracy;
 25 | /// use rusty_machine::learning::naive_bayes::{NaiveBayes, Bernoulli};
 26 | /// use rusty_machine::linalg::{BaseMatrix, Matrix};
 27 | ///
 28 | /// let inputs = Matrix::new(3, 2, vec![1.0, 1.1,
 29 | ///                                     5.2, 4.3,
 30 | ///                                     6.2, 7.3]);
 31 | ///
 32 | /// let targets = Matrix::new(3, 3, vec![1.0, 0.0, 0.0,
 33 | ///                                      0.0, 0.0, 1.0,
 34 | ///                                      0.0, 0.0, 1.0]);
 35 | ///
 36 | /// let mut model = NaiveBayes::<Bernoulli>::new();
 37 | ///
 38 | /// let accuracy_per_fold: Vec<f64> = k_fold_validate(
 39 | ///     &mut model,
 40 | ///     &inputs,
 41 | ///     &targets,
 42 | ///     3,
 43 | ///     // Score each fold by the fraction of test samples where
 44 | ///     // the model's prediction equals the target.
 45 | ///     row_accuracy
 46 | /// ).unwrap();
 47 | /// ```
 48 | pub fn k_fold_validate<M, S>(model: &mut M,
 49 |                              inputs: &Matrix<f64>,
 50 |                              targets: &Matrix<f64>,
 51 |                              k: usize,
 52 |                              score: S) -> LearningResult<Vec<f64>>
 53 |     where S: Fn(&Matrix<f64>, &Matrix<f64>) -> f64,
 54 |           M: SupModel<Matrix<f64>, Matrix<f64>>,
 55 | {
 56 |     assert_eq!(inputs.rows(), targets.rows());
 57 |     let num_samples = inputs.rows();
 58 |     let shuffled_indices = create_shuffled_indices(num_samples);
 59 |     let folds = Folds::new(&shuffled_indices, k);
 60 | 
 61 |     let mut costs: Vec<f64> = Vec::new();
 62 | 
 63 |     for p in folds {
 64 |         // TODO: don't allocate fresh buffers for every fold
 65 |         let train_inputs = inputs.select_rows(p.train_indices_iter.clone());
 66 |         let train_targets = targets.select_rows(p.train_indices_iter.clone());
 67 |         let test_inputs = inputs.select_rows(p.test_indices_iter.clone());
 68 |         let test_targets = targets.select_rows(p.test_indices_iter.clone());
 69 | 
 70 |         model.train(&train_inputs, &train_targets)?;
 71 |         let outputs = model.predict(&test_inputs)?;
 72 |         costs.push(score(&outputs, &test_targets));
 73 |     }
 74 | 
 75 |     Ok(costs)
 76 | }
 77 | 
 78 | /// A permutation of 0..n.
 79 | struct ShuffledIndices(Vec<usize>);
 80 | 
 81 | /// Permute the indices of the inputs samples.
 82 | fn create_shuffled_indices(num_samples: usize) -> ShuffledIndices {
 83 |     let mut indices: Vec<usize> = (0..num_samples).collect();
 84 |     in_place_fisher_yates(&mut indices);
 85 |     ShuffledIndices(indices)
 86 | }
 87 | 
 88 | /// A partition of indices of all available samples into
 89 | /// a training set and a test set.
 90 | struct Partition<'a> {
 91 |     train_indices_iter: TrainingIndices<'a>,
 92 |     test_indices_iter: TestIndices<'a>
 93 | }
 94 | 
 95 | #[derive(Clone)]
 96 | struct TestIndices<'a>(Iter<'a, usize>);
 97 | 
 98 | #[derive(Clone)]
 99 | struct TrainingIndices<'a> {
100 |     chain: Chain<Iter<'a, usize>, Iter<'a, usize>>,
101 |     size: usize
102 | }
103 | 
104 | impl<'a> TestIndices<'a> {
105 |     fn new(indices: &'a [usize]) -> TestIndices<'a> {
106 |         TestIndices(indices.iter())
107 |     }
108 | }
109 | 
110 | impl<'a> Iterator for TestIndices<'a> {
111 |     type Item = &'a usize;
112 | 
113 |     fn next(&mut self) -> Option<&'a usize> {
114 |         self.0.next()
115 |     }
116 | }
117 | 
118 | impl <'a> ExactSizeIterator for TestIndices<'a> {
119 |     fn len(&self) -> usize {
120 |         self.0.len()
121 |     }
122 | }
123 | 
124 | impl<'a> TrainingIndices<'a> {
125 |     fn new(left: &'a [usize], right: &'a [usize]) -> TrainingIndices<'a> {
126 |         let chain = left.iter().chain(right.iter());
127 |         TrainingIndices {
128 |             chain: chain,
129 |             size: left.len() + right.len()
130 |         }
131 |     }
132 | }
133 | 
134 | impl<'a> Iterator for TrainingIndices<'a> {
135 |     type Item = &'a usize;
136 | 
137 |     fn next(&mut self) -> Option<&'a usize> {
138 |         self.chain.next()
139 |     }
140 | }
141 | 
142 | impl <'a> ExactSizeIterator for TrainingIndices<'a> {
143 |     fn len(&self) -> usize {
144 |         self.size
145 |     }
146 | }
147 | 
148 | /// An iterator over the sets of indices required for k-fold cross validation.
149 | struct Folds<'a> {
150 |     num_folds: usize,
151 |     indices: &'a[usize],
152 |     count: usize
153 | }
154 | 
155 | impl<'a> Folds<'a> {
156 |     /// Let n = indices.len(), and k = num_folds.
157 |     /// The first n % k folds have size n / k + 1 and the
158 |     /// rest have size n / k. (In particular, if n % k == 0 then all
159 |     /// folds are the same size.)
160 |     fn new(indices: &'a ShuffledIndices, num_folds: usize) -> Folds<'a> {
161 |         let num_samples = indices.0.len();
162 |         assert!(num_folds > 1 && num_samples >= num_folds,
163 |             "Require num_folds > 1 && num_samples >= num_folds");
164 | 
165 |         Folds {
166 |             num_folds: num_folds,
167 |             indices: &indices.0,
168 |             count: 0
169 |         }
170 |     }
171 | }
172 | 
173 | impl<'a> Iterator for Folds<'a> {
174 |     type Item = Partition<'a>;
175 | 
176 |     fn next(&mut self) -> Option<Self::Item> {
177 |         if self.count >= self.num_folds {
178 |             return None;
179 |         }
180 | 
181 |         let num_samples = self.indices.len();
182 |         let q = num_samples / self.num_folds;
183 |         let r = num_samples % self.num_folds;
184 |         let fold_start = self.count * q + cmp::min(self.count, r);
185 |         let fold_size = if self.count >= r {q} else {q + 1};
186 |         let fold_end = fold_start + fold_size;
187 | 
188 |         self.count += 1;
189 | 
190 |         let prefix = &self.indices[..fold_start];
191 |         let suffix = &self.indices[fold_end..];
192 |         let infix = &self.indices[fold_start..fold_end];
193 |         Some(Partition {
194 |             train_indices_iter: TrainingIndices::new(prefix, suffix),
195 |             test_indices_iter: TestIndices::new(infix)
196 |         })
197 |     }
198 | }
199 | 
200 | #[cfg(test)]
201 | mod tests {
202 |     use super::{ShuffledIndices, Folds};
203 | 
204 |     // k % n == 0
205 |     #[test]
206 |     fn test_folds_n6_k3() {
207 |         let idxs = ShuffledIndices(vec![0, 1, 2, 3, 4, 5]);
208 |         let folds = collect_folds(Folds::new(&idxs, 3));
209 | 
210 |         assert_eq!(folds, vec![
211 |             (vec![2, 3, 4, 5], vec![0, 1]),
212 |             (vec![0, 1, 4, 5], vec![2, 3]),
213 |             (vec![0, 1, 2, 3], vec![4, 5])
214 |             ]);
215 |     }
216 | 
217 |     // k % n == 1
218 |     #[test]
219 |     fn test_folds_n5_k2() {
220 |         let idxs = ShuffledIndices(vec![0, 1, 2, 3, 4]);
221 |         let folds = collect_folds(Folds::new(&idxs, 2));
222 | 
223 |         assert_eq!(folds, vec![
224 |             (vec![3, 4], vec![0, 1, 2]),
225 |             (vec![0, 1, 2], vec![3, 4])
226 |             ]);
227 |     }
228 | 
229 |     // k % n == 2
230 |     #[test]
231 |     fn test_folds_n6_k4() {
232 |         let idxs = ShuffledIndices(vec![0, 1, 2, 3, 4, 5]);
233 |         let folds = collect_folds(Folds::new(&idxs, 4));
234 | 
235 |         assert_eq!(folds, vec![
236 |             (vec![2, 3, 4, 5], vec![0, 1]),
237 |             (vec![0, 1, 4, 5], vec![2, 3]),
238 |             (vec![0, 1, 2, 3, 5], vec![4]),
239 |             (vec![0, 1, 2, 3, 4], vec![5])
240 |             ]);
241 |     }
242 | 
243 |     // k == n
244 |     #[test]
245 |     fn test_folds_n4_k4() {
246 |         let idxs = ShuffledIndices(vec![0, 1, 2, 3]);
247 |         let folds = collect_folds(Folds::new(&idxs, 4));
248 | 
249 |         assert_eq!(folds, vec![
250 |             (vec![1, 2, 3], vec![0]),
251 |             (vec![0, 2, 3], vec![1]),
252 |             (vec![0, 1, 3], vec![2]),
253 |             (vec![0, 1, 2], vec![3])
254 |             ]);
255 |     }
256 | 
257 |     #[test]
258 |     #[should_panic]
259 |     fn test_folds_rejects_large_k() {
260 |         let idxs = ShuffledIndices(vec![0, 1, 2]);
261 |         let _ = collect_folds(Folds::new(&idxs, 4));
262 |     }
263 | 
264 |     // Check we're really returning iterators into the shuffled
265 |     // indices rather than into (0..n).
266 |     #[test]
267 |     fn test_folds_unordered_indices() {
268 |         let idxs = ShuffledIndices(vec![5, 4, 3, 2, 1, 0]);
269 |         let folds = collect_folds(Folds::new(&idxs, 3));
270 | 
271 |         assert_eq!(folds, vec![
272 |             (vec![3, 2, 1, 0], vec![5, 4]),
273 |             (vec![5, 4, 1, 0], vec![3, 2]),
274 |             (vec![5, 4, 3, 2], vec![1, 0])
275 |             ]);
276 |     }
277 | 
278 |     fn collect_folds<'a>(folds: Folds<'a>) -> Vec<(Vec<usize>, Vec<usize>)> {
279 |         folds
280 |             .map(|p|
281 |                 (p.train_indices_iter.map(|x| *x).collect::<Vec<_>>(),
282 |                  p.test_indices_iter.map(|x| *x).collect::<Vec<_>>()))
283 |             .collect::<Vec<(Vec<usize>, Vec<usize>)>>()
284 |     }
285 | }
286 | 


--------------------------------------------------------------------------------
/src/data/transforms/minmax.rs:
--------------------------------------------------------------------------------
  1 | //! The Min-Max transformer
  2 | //!
  3 | //! This module contains the `MinMaxScaler` transformer.
  4 | //!
  5 | //! The `MinMaxScaler` transformer is used to transform input data
  6 | //! so that the minimum and maximum of each column are as specified.
  7 | //! This is commonly used to transform the data to have a minimum of
  8 | //! `0` and a maximum of `1`.
  9 | //!
 10 | //! # Examples
 11 | //!
 12 | //! ```
 13 | //! use rusty_machine::data::transforms::{Transformer, TransformFitter, MinMaxFitter};
 14 | //! use rusty_machine::linalg::Matrix;
 15 | //!
 16 | //! let inputs = Matrix::new(2, 2, vec![-1.0, 2.0, 1.5, 3.0]);
 17 | //!
 18 | //! // Constructs a new `MinMaxScaler` to map minimum to 0 and maximum
 19 | //! // to 1.
 20 | //! let mut transformer = MinMaxFitter::default().fit(&inputs).unwrap();
 21 | //!
 22 | //!
 23 | //! // Transform the inputs to get output data with required minimum
 24 | //! // and maximum.
 25 | //! let transformed = transformer.transform(inputs).unwrap();
 26 | //! ```
 27 | 
 28 | use learning::error::{Error, ErrorKind};
 29 | use learning::LearningResult;
 30 | use linalg::{Matrix, BaseMatrix, BaseMatrixMut, Vector};
 31 | use super::{Invertible, Transformer, TransformFitter};
 32 | 
 33 | use rulinalg::utils;
 34 | 
 35 | use libnum::Float;
 36 | 
 37 | /// A builder used to construct a `MinMaxScaler`
 38 | #[derive(Debug)]
 39 | pub struct MinMaxFitter<T: Float> {
 40 |     scaled_min: T,
 41 |     scaled_max: T
 42 | }
 43 | 
 44 | impl<T: Float> Default for MinMaxFitter<T> {
 45 |     fn default() -> Self {
 46 |         MinMaxFitter {
 47 |             scaled_min: T::zero(),
 48 |             scaled_max: T::one()
 49 |         }
 50 |     }
 51 | }
 52 | 
 53 | impl<T: Float> MinMaxFitter<T> {
 54 |     /// Construct a new `MinMaxFitter` with
 55 |     /// specified mean and standard deviation.
 56 |     ///
 57 |     /// Note that this function does not create a `Transformer`
 58 |     /// only a builder which can be used to produce a fitted `Transformer`.
 59 |     ///
 60 |     /// # Examples
 61 |     ///
 62 |     /// ```
 63 |     /// use rusty_machine::data::transforms::MinMaxFitter;
 64 |     /// use rusty_machine::linalg::Matrix;
 65 |     ///
 66 |     /// let fitter = MinMaxFitter::new(0.0, 1.0);
 67 |     ///
 68 |     /// // We can call `fit` from the `transform::TransformFitter`
 69 |     /// // trait to create a `MinMaxScaler` used to actually transform data.
 70 |     /// use rusty_machine::data::transforms::TransformFitter;
 71 |     /// let mat = Matrix::new(2,2,vec![1.0, 2.0, 3.0, 5.0]);
 72 |     /// let transformer = fitter.fit(&mat);
 73 |     /// ```
 74 |     pub fn new(min: T, max: T) -> Self {
 75 |         MinMaxFitter {
 76 |             scaled_min: min,
 77 |             scaled_max: max
 78 |         }
 79 |     }
 80 | }
 81 | 
 82 | impl<T: Float> TransformFitter<Matrix<T>, MinMaxScaler<T>> for MinMaxFitter<T> {
 83 |     fn fit(self, inputs: &Matrix<T>) -> LearningResult<MinMaxScaler<T>> {
 84 |         let features = inputs.cols();
 85 | 
 86 |         // TODO: can use min, max
 87 |         // https://github.com/AtheMathmo/rulinalg/pull/115
 88 |         let mut input_min_max = vec![(T::max_value(), T::min_value()); features];
 89 | 
 90 |         for row in inputs.row_iter() {
 91 |             for (idx, (feature, min_max)) in row.into_iter().zip(input_min_max.iter_mut()).enumerate() {
 92 |                 if !feature.is_finite() {
 93 |                     return Err(Error::new(ErrorKind::InvalidData,
 94 |                                           format!("Data point in column {} cannot be \
 95 |                                                    processed",
 96 |                                                   idx)));
 97 |                 }
 98 |                 // Update min
 99 |                 if *feature < min_max.0 {
100 |                     min_max.0 = *feature;
101 |                 }
102 |                 // Update max
103 |                 if *feature > min_max.1 {
104 |                     min_max.1 = *feature;
105 |                 }
106 |             }
107 |         }
108 | 
109 |         // We'll scale each feature by a * x + b.
110 |         // Where scales holds `a` per column and consts
111 |         // holds `b`.
112 |         let scales = input_min_max.iter()
113 |             .map(|&(x, y)| {
114 |                 let s = (self.scaled_max - self.scaled_min) / (y - x);
115 |                 if s.is_finite() {
116 |                     Ok(s)
117 |                 } else {
118 |                     Err(Error::new(ErrorKind::InvalidData,
119 |                                    "Constant feature columns not supported."))
120 |                 }
121 |             })
122 |             .collect::<Result<Vec<_>, _>>()?;
123 | 
124 |         let consts = input_min_max.iter()
125 |             .zip(scales.iter())
126 |             .map(|(&(_, x), &s)| self.scaled_max - x * s)
127 |             .collect::<Vec<_>>();
128 |         
129 |         Ok(MinMaxScaler {
130 |             scale_factors: Vector::new(scales),
131 |             const_factors: Vector::new(consts)
132 |         })
133 |     }
134 | }
135 | 
136 | /// The `MinMaxScaler`
137 | ///
138 | /// The `MinMaxScaler` provides an implementation of `Transformer`
139 | /// which allows us to transform the input data to have a new minimum
140 | /// and maximum per column.
141 | ///
142 | /// See the module description for more information.
143 | #[derive(Debug)]
144 | pub struct MinMaxScaler<T: Float> {
145 |     /// Values to scale each column by
146 |     scale_factors: Vector<T>,
147 |     /// Values to add to each column after scaling
148 |     const_factors: Vector<T>,
149 | }
150 | 
151 | 
152 | impl<T: Float> Transformer<Matrix<T>> for MinMaxScaler<T> {
153 |     fn transform(&mut self, mut inputs: Matrix<T>) -> Result<Matrix<T>, Error> {
154 |         if self.scale_factors.size() != inputs.cols() {
155 |             Err(Error::new(ErrorKind::InvalidData,
156 |                             "Input data has different number of columns than fitted data."))
157 |         } else {
158 |             for mut row in inputs.row_iter_mut() {
159 |                 utils::in_place_vec_bin_op(row.raw_slice_mut(), self.scale_factors.data(), |x, &y| {
160 |                     *x = *x * y;
161 |                 });
162 | 
163 |                 utils::in_place_vec_bin_op(row.raw_slice_mut(), self.const_factors.data(), |x, &y| {
164 |                     *x = *x + y;
165 |                 });
166 |             }
167 |             Ok(inputs)
168 |         }
169 |     }
170 | }
171 | 
172 | impl<T: Float> Invertible<Matrix<T>> for MinMaxScaler<T> {
173 | 
174 |     fn inv_transform(&self, mut inputs: Matrix<T>) -> Result<Matrix<T>, Error> {
175 |         let features = self.scale_factors.size();
176 |         if inputs.cols() != features {
177 |             return Err(Error::new(ErrorKind::InvalidData,
178 |                                     "Input data has different number of columns than fitted data."));
179 |         }
180 | 
181 |         for mut row in inputs.row_iter_mut() {
182 |             for i in 0..features {
183 |                 row[i] = (row[i] - self.const_factors[i]) / self.scale_factors[i];
184 |             }
185 |         }
186 | 
187 |         Ok(inputs)
188 |     }
189 | }
190 | 
191 | #[cfg(test)]
192 | mod tests {
193 |     use super::*;
194 |     use super::super::{Transformer, TransformFitter, Invertible};
195 |     use linalg::Matrix;
196 |     use std::f64;
197 | 
198 |     #[test]
199 |     fn nan_data_test() {
200 |         let inputs = Matrix::new(2, 2, vec![f64::NAN; 4]);
201 | 
202 |         let res = MinMaxFitter::new(0.0, 1.0).fit(&inputs);
203 |         assert!(res.is_err());
204 |     }
205 | 
206 |     #[test]
207 |     fn infinity_data_test() {
208 |         let inputs = Matrix::new(2, 2, vec![f64::INFINITY; 4]);
209 | 
210 |         let res = MinMaxFitter::new(0.0, 1.0).fit(&inputs);
211 |         assert!(res.is_err());
212 |     }
213 | 
214 |     #[test]
215 |     fn basic_scale_test() {
216 |         let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
217 | 
218 |         let mut scaler = MinMaxFitter::new(0.0, 1.0).fit(&inputs).unwrap();
219 |         let transformed = scaler.transform(inputs).unwrap();
220 | 
221 |         assert!(transformed.data().iter().all(|&x| x >= 0.0));
222 |         assert!(transformed.data().iter().all(|&x| x <= 1.0));
223 | 
224 |         // First row scales to 0 and second to 1
225 |         transformed[[0, 0]].abs() < 1e-10;
226 |         transformed[[0, 1]].abs() < 1e-10;
227 |         (transformed[[1, 0]] - 1.0).abs() < 1e-10;
228 |         (transformed[[1, 1]] - 1.0).abs() < 1e-10;
229 |     }
230 | 
231 |     #[test]
232 |     fn custom_scale_test() {
233 |         let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
234 | 
235 |         let mut scaler = MinMaxFitter::new(1.0, 3.0).fit(&inputs).unwrap();
236 |         let transformed = scaler.transform(inputs).unwrap();
237 | 
238 |         assert!(transformed.data().iter().all(|&x| x >= 1.0));
239 |         assert!(transformed.data().iter().all(|&x| x <= 3.0));
240 | 
241 |         // First row scales to 1 and second to 3
242 |         (transformed[[0, 0]] - 1.0).abs() < 1e-10;
243 |         (transformed[[0, 1]] - 1.0).abs() < 1e-10;
244 |         (transformed[[1, 0]] - 3.0).abs() < 1e-10;
245 |         (transformed[[1, 1]] - 3.0).abs() < 1e-10;
246 |     }
247 | 
248 |     #[test]
249 |     fn constant_feature_test() {
250 |         let inputs = Matrix::new(2, 2, vec![1.0, 2.0, 1.0, 3.0]);
251 | 
252 |         let res = MinMaxFitter::new(0.0, 1.0).fit(&inputs);
253 |         assert!(res.is_err());
254 |     }
255 | 
256 |     #[test]
257 |     fn inv_transform_identity_test() {
258 |         let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
259 | 
260 |         let mut scaler = MinMaxFitter::new(1.0, 3.0).fit(&inputs).unwrap();
261 |         let transformed = scaler.transform(inputs.clone()).unwrap();
262 | 
263 |         let original = scaler.inv_transform(transformed).unwrap();
264 | 
265 |         assert!((inputs - original).data().iter().all(|x| x.abs() < 1e-5));
266 |     }
267 | }
268 | 


--------------------------------------------------------------------------------
/src/data/transforms/mod.rs:
--------------------------------------------------------------------------------
 1 | //! The Transforms module
 2 | //!
 3 | //! This module contains traits used to transform data using common
 4 | //! techniques. It also reexports these `Transformer`s from child modules.
 5 | //!
 6 | //! The `Transformer` trait provides a shared interface for all of the
 7 | //! data preprocessing transformations in rusty-machine. Some of these `Transformations`
 8 | //! can be inverted via the `Invertible` trait.
 9 | //!
10 | //! Note that some `Transformer`s can not be created without first using the
11 | //! `TransformFitter` trait.
12 | //!
13 | //! # Examples
14 | //!
15 | //! ```
16 | //! use rusty_machine::data::transforms::{Transformer, TransformFitter, MinMaxFitter};
17 | //! use rusty_machine::data::transforms::minmax::MinMaxScaler;
18 | //! use rusty_machine::linalg::Matrix;
19 | //!
20 | //! // Some data that we want to scale between 0 and 1
21 | //! let data = Matrix::new(3, 2, vec![-1.5, 1.0, 2.0, 3.0, -1.0, 2.5]);
22 | //! // Create a new `MinMaxScaler` using the `MinMaxFitter`
23 | //! let mut scaler: MinMaxScaler<f64> = MinMaxFitter::new(0.0, 1.0).fit(&data).expect("Failed to fit transformer");
24 | //! // Transform the data using the scaler
25 | //! let transformed = scaler.transform(data).expect("Failed to transformer data");
26 | //! ```
27 | 
28 | pub mod minmax;
29 | pub mod normalize;
30 | pub mod standardize;
31 | pub mod shuffle;
32 | 
33 | use learning::LearningResult;
34 | 
35 | pub use self::minmax::MinMaxFitter;
36 | pub use self::normalize::Normalizer;
37 | pub use self::shuffle::Shuffler;
38 | pub use self::standardize::StandardizerFitter;
39 | 
40 | /// A trait used to construct Transformers which must first be fitted
41 | pub trait TransformFitter<U, T: Transformer<U>> {
42 |     /// Fit the inputs to create the `Transformer`
43 |     fn fit(self, inputs: &U) -> LearningResult<T>;
44 | }
45 | 
46 | /// Trait for data transformers
47 | pub trait Transformer<T> {
48 |     /// Transforms the inputs
49 |     fn transform(&mut self, inputs: T) -> LearningResult<T>;
50 | }
51 | 
52 | /// Trait for invertible data transformers
53 | pub trait Invertible<T> : Transformer<T> {
54 |     /// Maps the inputs using the inverse of the fitted transform.
55 |     fn inv_transform(&self, inputs: T) -> LearningResult<T>;
56 | }
57 | 


--------------------------------------------------------------------------------
/src/data/transforms/normalize.rs:
--------------------------------------------------------------------------------
  1 | //! The Normalizing Transformer
  2 | //!
  3 | //! This module contains the `Normalizer` transformer.
  4 | //!
  5 | //! The `Normalizer` transformer is used to transform input data
  6 | //! so that the norm of each row is equal to 1. By default the
  7 | //! `Normalizer` uses the `Euclidean` norm.
  8 | //!
  9 | //! If input data has a row with all 0, `Normalizer` keeps the row as it is.
 10 | //!
 11 | //! Because transformation is performed per row independently,
 12 | //! inverse transformation is not supported.
 13 | //!
 14 | //! # Examples
 15 | //!
 16 | //! ```
 17 | //! use rusty_machine::data::transforms::{Transformer, Normalizer};
 18 | //! use rusty_machine::linalg::Matrix;
 19 | //!
 20 | //! // Constructs a new `Normalizer`
 21 | //! let mut transformer = Normalizer::default();
 22 | //!
 23 | //! let inputs = Matrix::new(2, 2, vec![-1.0, 2.0, 1.5, 3.0]);
 24 | //!
 25 | //! // Transform the inputs
 26 | //! let transformed = transformer.transform(inputs).unwrap();
 27 | //! ```
 28 | 
 29 | use learning::error::{Error, ErrorKind};
 30 | use linalg::{Matrix, MatrixSlice, BaseMatrix, BaseMatrixMut};
 31 | use rulinalg::norm::{MatrixNorm, Euclidean};
 32 | 
 33 | use super::Transformer;
 34 | 
 35 | use libnum::Float;
 36 | 
 37 | use std::marker::PhantomData;
 38 | 
 39 | /// The Normalizer
 40 | ///
 41 | /// The Normalizer provides an implementation of `Transformer`
 42 | /// which allows us to transform the all rows to have the same norm.
 43 | ///
 44 | /// The default `Normalizer` will use the `Euclidean` norm.
 45 | ///
 46 | /// See the module description for more information.
 47 | #[derive(Debug)]
 48 | pub struct Normalizer<T: Float, M>
 49 |     where for<'a> M: MatrixNorm<T, MatrixSlice<'a, T>>
 50 | {
 51 |     norm: M,
 52 |     _marker: PhantomData<T>
 53 | }
 54 | 
 55 | /// Create a `Normalizer` with a Euclidean norm.
 56 | impl<T: Float> Default for Normalizer<T, Euclidean> {
 57 |     fn default() -> Self {
 58 |         Normalizer {
 59 |             norm: Euclidean,
 60 |             _marker: PhantomData,
 61 |         }
 62 |     }
 63 | }
 64 | 
 65 | impl<T: Float, M> Normalizer<T, M>
 66 |     where for<'a> M: MatrixNorm<T, MatrixSlice<'a, T>>
 67 | {
 68 |     /// Constructs a new `Normalizer` with given norm.
 69 |     ///
 70 |     /// # Examples
 71 |     ///
 72 |     /// ```
 73 |     /// use rusty_machine::data::transforms::Normalizer;
 74 |     /// use rusty_machine::linalg::norm::Euclidean;
 75 |     ///
 76 |     /// // Constructs a new `Normalizer`
 77 |     /// let _ = Normalizer::<f64, Euclidean>::new(Euclidean);
 78 |     /// ```
 79 |     pub fn new(norm: M) -> Self {
 80 |         Normalizer {
 81 |             norm: norm,
 82 |             _marker: PhantomData
 83 |         }
 84 |     }
 85 | }
 86 | 
 87 | impl<T: Float, M> Transformer<Matrix<T>> for Normalizer<T, M>
 88 |     where for<'a> M: MatrixNorm<T, MatrixSlice<'a, T>>
 89 | {
 90 |     fn transform(&mut self, mut inputs: Matrix<T>) -> Result<Matrix<T>, Error> {
 91 |         let dists: Vec<T> = inputs.row_iter().map(|m| self.norm.norm(&*m)).collect();
 92 |         for (mut row, &d) in inputs.row_iter_mut().zip(dists.iter()) {
 93 | 
 94 |             if !d.is_finite() {
 95 |                 return Err(Error::new(ErrorKind::InvalidData,
 96 |                                       "Some data point is non-finite."));
 97 |             } else if d != T::zero() {
 98 |                 // no change if distance is 0
 99 |                 *row /= d;
100 |             }
101 |         }
102 |         Ok(inputs)
103 |     }
104 | }
105 | 
106 | 
107 | #[cfg(test)]
108 | mod tests {
109 |     use super::*;
110 |     use super::super::Transformer;
111 |     use linalg::Matrix;
112 | 
113 |     use std::f64;
114 | 
115 |     #[test]
116 |     fn nan_data_test() {
117 |         let inputs = Matrix::new(2, 2, vec![f64::NAN; 4]);
118 |         let mut normalizer = Normalizer::default();
119 |         let res = normalizer.transform(inputs);
120 |         assert!(res.is_err());
121 |     }
122 | 
123 |     #[test]
124 |     fn inf_data_test() {
125 |         let inputs = Matrix::new(2, 2, vec![f64::INFINITY; 4]);
126 |         let mut normalizer = Normalizer::default();
127 |         let res = normalizer.transform(inputs);
128 |         assert!(res.is_err());
129 |     }
130 | 
131 |     #[test]
132 |     fn single_row_test() {
133 |         let inputs = matrix![1.0, 2.0];
134 |         let mut normalizer = Normalizer::default();
135 |         let transformed = normalizer.transform(inputs).unwrap();
136 | 
137 |         let exp = matrix![0.4472135954999579, 0.8944271909999159];
138 |         assert_matrix_eq!(transformed, exp);
139 |     }
140 | 
141 |     #[test]
142 |     fn basic_normalizer_test() {
143 |         let inputs = matrix![-1.0f32, 2.0;
144 |                              0.0, 3.0];
145 | 
146 |         let mut normalizer = Normalizer::default();
147 |         let transformed = normalizer.transform(inputs).unwrap();
148 | 
149 |         let exp = matrix![-0.4472135954999579, 0.8944271909999159;
150 |                           0., 1.];
151 |         assert_matrix_eq!(transformed, exp);
152 | 
153 |         let inputs = matrix![1., 2.;
154 |                              10., 20.;
155 |                              100., 200.];
156 | 
157 |         let transformed = normalizer.transform(inputs).unwrap();
158 | 
159 |         let exp = matrix![0.4472135954999579, 0.8944271909999159;
160 |                           0.4472135954999579, 0.8944271909999159;
161 |                           0.4472135954999579, 0.8944271909999159];
162 |         assert_matrix_eq!(transformed, exp);
163 | 
164 |         let inputs = matrix![1., 2., 10.;
165 |                              0., 10., 20.;
166 |                              100., 10., 200.;
167 |                              0., 0., 0.];
168 |         let transformed = normalizer.transform(inputs).unwrap();
169 | 
170 |         let exp = matrix![0.09759000729485333, 0.19518001458970666, 0.9759000729485332;
171 |                           0., 0.4472135954999579, 0.8944271909999159;
172 |                           0.4467670516087703, 0.04467670516087703, 0.8935341032175406;
173 |                           0., 0., 0.];
174 |         assert_matrix_eq!(transformed, exp);
175 |     }
176 | }
177 | 


--------------------------------------------------------------------------------
/src/data/transforms/shuffle.rs:
--------------------------------------------------------------------------------
  1 | //! The Shuffler
  2 | //!
  3 | //! This module contains the `Shuffler` transformer. `Shuffler` implements the
  4 | //! `Transformer` trait and is used to shuffle the rows of an input matrix.
  5 | //! You can control the random number generator used by the `Shuffler`.
  6 | //!
  7 | //! # Examples
  8 | //!
  9 | //! ```
 10 | //! use rusty_machine::linalg::Matrix;
 11 | //! use rusty_machine::data::transforms::Transformer;
 12 | //! use rusty_machine::data::transforms::shuffle::Shuffler;
 13 | //!
 14 | //! // Create an input matrix that we want to shuffle
 15 | //! let mat = Matrix::new(3, 2, vec![1.0, 2.0,
 16 | //!                                  3.0, 4.0,
 17 | //!                                  5.0, 6.0]);
 18 | //!
 19 | //! // Create a new shuffler
 20 | //! let mut shuffler = Shuffler::default();
 21 | //! let shuffled_mat = shuffler.transform(mat).unwrap();
 22 | //!
 23 | //! println!("{}", shuffled_mat);
 24 | //! ```
 25 | 
 26 | use learning::LearningResult;
 27 | use linalg::{Matrix, BaseMatrix, BaseMatrixMut};
 28 | use super::Transformer;
 29 | 
 30 | use rand::{Rng, thread_rng, ThreadRng};
 31 | 
 32 | /// The `Shuffler`
 33 | ///
 34 | /// Provides an implementation of `Transformer` which shuffles
 35 | /// the input rows in place.
 36 | #[derive(Debug)]
 37 | pub struct Shuffler<R: Rng> {
 38 |     rng: R,
 39 | }
 40 | 
 41 | impl<R: Rng> Shuffler<R> {
 42 |     /// Construct a new `Shuffler` with given random number generator.
 43 |     ///
 44 |     /// # Examples
 45 |     ///
 46 |     /// ```
 47 |     /// # extern crate rand;
 48 |     /// # extern crate rusty_machine;
 49 |     ///
 50 |     /// use rusty_machine::data::transforms::Transformer;
 51 |     /// use rusty_machine::data::transforms::shuffle::Shuffler;
 52 |     /// use rand::{StdRng, SeedableRng};
 53 |     ///
 54 |     /// # fn main() {
 55 |     /// // We can create a seeded rng
 56 |     /// let rng = StdRng::from_seed(&[1, 2, 3]);
 57 |     ///
 58 |     /// let shuffler = Shuffler::new(rng);
 59 |     /// # }
 60 |     /// ```
 61 |     pub fn new(rng: R) -> Self {
 62 |         Shuffler { rng: rng }
 63 |     }
 64 | }
 65 | 
 66 | /// Create a new shuffler using the `rand::thread_rng` function
 67 | /// to provide a randomly seeded random number generator.
 68 | impl Default for Shuffler<ThreadRng> {
 69 |     fn default() -> Self {
 70 |         Shuffler { rng: thread_rng() }
 71 |     }
 72 | }
 73 | 
 74 | /// The `Shuffler` will transform the input `Matrix` by shuffling
 75 | /// its rows in place.
 76 | ///
 77 | /// Under the hood this uses a Fisher-Yates shuffle.
 78 | impl<R: Rng, T> Transformer<Matrix<T>> for Shuffler<R> {
 79 |     fn transform(&mut self, mut inputs: Matrix<T>) -> LearningResult<Matrix<T>> {
 80 |         let n = inputs.rows();
 81 | 
 82 |         for i in 0..n {
 83 |             // Swap i with a random point after it
 84 |             let j = self.rng.gen_range(0, n - i);
 85 |             inputs.swap_rows(i, i + j);
 86 |         }
 87 |         Ok(inputs)
 88 |     }
 89 | }
 90 | 
 91 | #[cfg(test)]
 92 | mod tests {
 93 |     use linalg::Matrix;
 94 |     use super::super::Transformer;
 95 |     use super::Shuffler;
 96 | 
 97 |     use rand::{StdRng, SeedableRng};
 98 | 
 99 |     #[test]
100 |     fn seeded_shuffle() {
101 |         let rng = StdRng::from_seed(&[1, 2, 3]);
102 |         let mut shuffler = Shuffler::new(rng);
103 | 
104 |         let mat = Matrix::new(4, 2, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
105 |         let shuffled = shuffler.transform(mat).unwrap();
106 | 
107 |         assert_eq!(shuffled.into_vec(),
108 |                    vec![3.0, 4.0, 1.0, 2.0, 7.0, 8.0, 5.0, 6.0]);
109 |     }
110 | 
111 |     #[test]
112 |     fn shuffle_single_row() {
113 |         let mut shuffler = Shuffler::default();
114 | 
115 |         let mat = Matrix::new(1, 8, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
116 |         let shuffled = shuffler.transform(mat).unwrap();
117 | 
118 |         assert_eq!(shuffled.into_vec(),
119 |                    vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
120 |     }
121 | }


--------------------------------------------------------------------------------
/src/data/transforms/standardize.rs:
--------------------------------------------------------------------------------
  1 | //! The Standardizing Transformer
  2 | //!
  3 | //! This module contains the `Standardizer` transformer.
  4 | //!
  5 | //! The `Standardizer` transformer is used to transform input data
  6 | //! so that the mean and standard deviation of each column are as
  7 | //! specified. This is commonly used to transform the data to have `0` mean
  8 | //! and a standard deviation of `1`.
  9 | //!
 10 | //! # Examples
 11 | //!
 12 | //! ```
 13 | //! use rusty_machine::data::transforms::{Transformer, TransformFitter, StandardizerFitter};
 14 | //! use rusty_machine::linalg::Matrix;
 15 | //!
 16 | //! let inputs = Matrix::new(2, 2, vec![-1.0, 2.0, 1.5, 3.0]);
 17 | //!
 18 | //! // Constructs a new `Standardizer` to map to mean 0 and standard
 19 | //! // deviation of 1.
 20 | //! let mut transformer = StandardizerFitter::default().fit(&inputs).unwrap();
 21 | //!
 22 | //! // Transform the inputs to get output data with required mean and
 23 | //! // standard deviation.
 24 | //! let transformed = transformer.transform(inputs).unwrap();
 25 | //! ```
 26 | 
 27 | use learning::LearningResult;
 28 | use learning::error::{Error, ErrorKind};
 29 | use linalg::{Matrix, Vector, Axes, BaseMatrix, BaseMatrixMut};
 30 | use super::{Invertible, Transformer, TransformFitter};
 31 | 
 32 | use rulinalg::utils;
 33 | 
 34 | use libnum::{Float, FromPrimitive};
 35 | 
 36 | /// A builder used to construct a `Standardizer`
 37 | #[derive(Debug)]
 38 | pub struct StandardizerFitter<T: Float> {
 39 |     scaled_mean: T,
 40 |     scaled_stdev: T
 41 | }
 42 | 
 43 | impl<T: Float> Default for StandardizerFitter<T> {
 44 |     fn default() -> Self {
 45 |         StandardizerFitter {
 46 |             scaled_mean: T::zero(),
 47 |             scaled_stdev: T::one()
 48 |         }
 49 |     }
 50 | }
 51 | 
 52 | impl<T: Float> StandardizerFitter<T> {
 53 |     /// Construct a new `StandardizerFitter` with
 54 |     /// specified mean and standard deviation.
 55 |     ///
 56 |     /// Note that this function does not create a `Transformer`
 57 |     /// only a builder which can be used to produce a fitted `Transformer`.
 58 |     ///
 59 |     /// # Examples
 60 |     ///
 61 |     /// ```
 62 |     /// use rusty_machine::data::transforms::StandardizerFitter;
 63 |     /// use rusty_machine::linalg::Matrix;
 64 |     ///
 65 |     /// let fitter = StandardizerFitter::new(0.0, 1.0);
 66 |     ///
 67 |     /// // We can call `fit` from the `transform::TransformFitter`
 68 |     /// // trait to create a `Standardizer` used to actually transform data.
 69 |     /// use rusty_machine::data::transforms::TransformFitter;
 70 |     /// let mat = Matrix::new(2, 2, vec![1.0, 2.0, 3.0, 5.0]);
 71 |     /// let transformer = fitter.fit(&mat);
 72 |     /// ```
 73 |     pub fn new(mean: T, stdev: T) -> StandardizerFitter<T> {
 74 |         StandardizerFitter {
 75 |             scaled_mean: mean,
 76 |             scaled_stdev: stdev
 77 |         }
 78 |     }
 79 | }
 80 | 
 81 | impl<T: Float + FromPrimitive> TransformFitter<Matrix<T>, Standardizer<T>> for StandardizerFitter<T> {
 82 |     fn fit(self, inputs: &Matrix<T>) -> LearningResult<Standardizer<T>> {
 83 |         if inputs.rows() <= 1 {
 84 |             Err(Error::new(ErrorKind::InvalidData,
 85 |                            "Cannot standardize data with only one row."))
 86 |         } else {
 87 |             let mean = inputs.mean(Axes::Row);
 88 |             let variance = inputs.variance(Axes::Row).map_err(|_| {
 89 |                 Error::new(ErrorKind::InvalidData, "Cannot compute variance of data.")
 90 |             })?;
 91 | 
 92 |             if mean.data().iter().any(|x| !x.is_finite()) {
 93 |                 return Err(Error::new(ErrorKind::InvalidData, "Some data point is non-finite."));
 94 |             }
 95 | 
 96 |             Ok(Standardizer {
 97 |                 means: mean,
 98 |                 variances: variance,
 99 |                 scaled_mean: self.scaled_mean,
100 |                 scaled_stdev: self.scaled_stdev
101 |             })
102 |         }
103 |     }
104 | }
105 | 
106 | /// The Standardizer
107 | ///
108 | /// The Standardizer provides an implementation of `Transformer`
109 | /// which allows us to transform the input data to have a new mean
110 | /// and standard deviation.
111 | ///
112 | /// See the module description for more information.
113 | #[derive(Debug)]
114 | pub struct Standardizer<T: Float> {
115 |     /// Means per column of input data
116 |     means: Vector<T>,
117 |     /// Variances per column of input data
118 |     variances: Vector<T>,
119 |     /// The mean of the new data (default 0)
120 |     scaled_mean: T,
121 |     /// The standard deviation of the new data (default 1)
122 |     scaled_stdev: T,
123 | }
124 | 
125 | impl<T: Float + FromPrimitive> Transformer<Matrix<T>> for Standardizer<T> {
126 |     fn transform(&mut self, mut inputs: Matrix<T>) -> LearningResult<Matrix<T>> {
127 |         if self.means.size() != inputs.cols() {
128 |             Err(Error::new(ErrorKind::InvalidData,
129 |                             "Input data has different number of columns from fitted data."))
130 |         } else {
131 |             for mut row in inputs.row_iter_mut() {
132 |                 // Subtract the mean
133 |                 utils::in_place_vec_bin_op(row.raw_slice_mut(), self.means.data(), |x, &y| *x = *x - y);
134 |                 utils::in_place_vec_bin_op(row.raw_slice_mut(), self.variances.data(), |x, &y| {
135 |                     *x = (*x * self.scaled_stdev / y.sqrt()) + self.scaled_mean
136 |                 });
137 |             }
138 |             Ok(inputs)
139 |         }
140 |     }
141 | }
142 | 
143 | impl<T: Float + FromPrimitive> Invertible<Matrix<T>> for Standardizer<T> {
144 |     fn inv_transform(&self, mut inputs: Matrix<T>) -> LearningResult<Matrix<T>> {
145 |         let features = self.means.size();
146 |         if inputs.cols() != features {
147 |             return Err(Error::new(ErrorKind::InvalidData,
148 |                                     "Inputs have different feature count than transformer."));
149 |         }
150 | 
151 |         for mut row in inputs.row_iter_mut() {
152 |             utils::in_place_vec_bin_op(row.raw_slice_mut(), self.variances.data(), |x, &y| {
153 |                 *x = (*x - self.scaled_mean) * y.sqrt() / self.scaled_stdev
154 |             });
155 | 
156 |             // Add the mean
157 |             utils::in_place_vec_bin_op(row.raw_slice_mut(), self.means.data(), |x, &y| *x = *x + y);
158 |         }
159 | 
160 |         Ok(inputs)
161 |     }
162 | }
163 | 
164 | #[cfg(test)]
165 | mod tests {
166 |     use super::*;
167 |     use super::super::{Transformer, TransformFitter, Invertible};
168 |     use linalg::{Axes, Matrix};
169 | 
170 |     use std::f64;
171 | 
172 |     #[test]
173 |     fn single_row_test() {
174 |         let inputs = Matrix::new(1, 2, vec![1.0, 2.0]);
175 | 
176 |         let standardizer = StandardizerFitter::default();
177 |         let transformer = standardizer.fit(&inputs);
178 |         assert!(transformer.is_err());
179 |     }
180 | 
181 |     #[test]
182 |     fn nan_data_test() {
183 |         let inputs = Matrix::new(2, 2, vec![f64::NAN; 4]);
184 | 
185 |         let standardizer = StandardizerFitter::default();
186 |         let transformer = standardizer.fit(&inputs);
187 |         assert!(transformer.is_err());
188 |     }
189 | 
190 |     #[test]
191 |     fn inf_data_test() {
192 |         let inputs = Matrix::new(2, 2, vec![f64::INFINITY; 4]);
193 | 
194 |         let standardizer = StandardizerFitter::default();
195 |         let transformer = standardizer.fit(&inputs);
196 |         assert!(transformer.is_err());
197 |     }
198 | 
199 |     #[test]
200 |     fn wrong_transform_size_test() {
201 |         let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
202 | 
203 |         let mut standardizer = StandardizerFitter::default().fit(&inputs).unwrap();
204 |         let res = standardizer.transform(matrix![1.0, 2.0, 3.0; 4.0, 5.0, 6.0]);
205 |         assert!(res.is_err());
206 |     }
207 | 
208 |     #[test]
209 |     fn basic_standardize_test() {
210 |         let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
211 | 
212 |         let mut standardizer = StandardizerFitter::default().fit(&inputs).unwrap();
213 |         let transformed = standardizer.transform(inputs).unwrap();
214 | 
215 |         let new_mean = transformed.mean(Axes::Row);
216 |         let new_var = transformed.variance(Axes::Row).unwrap();
217 | 
218 |         assert!(new_mean.data().iter().all(|x| x.abs() < 1e-5));
219 |         assert!(new_var.data().iter().all(|x| (x.abs() - 1.0) < 1e-5));
220 |     }
221 | 
222 |     #[test]
223 |     fn custom_standardize_test() {
224 |         let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
225 | 
226 |         let mut standardizer = StandardizerFitter::new(1.0, 2.0).fit(&inputs).unwrap();
227 |         let transformed = standardizer.transform(inputs).unwrap();
228 | 
229 |         let new_mean = transformed.mean(Axes::Row);
230 |         let new_var = transformed.variance(Axes::Row).unwrap();
231 | 
232 |         assert!(new_mean.data().iter().all(|x| (x.abs() - 1.0) < 1e-5));
233 |         assert!(new_var.data().iter().all(|x| (x.abs() - 4.0) < 1e-5));
234 |     }
235 | 
236 |     #[test]
237 |     fn inv_transform_identity_test() {
238 |         let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
239 | 
240 |         let mut standardizer = StandardizerFitter::new(1.0, 3.0).fit(&inputs).unwrap();
241 |         let transformed = standardizer.transform(inputs.clone()).unwrap();
242 | 
243 |         let original = standardizer.inv_transform(transformed).unwrap();
244 | 
245 |         assert!((inputs - original).data().iter().all(|x| x.abs() < 1e-5));
246 |     }
247 | }
248 | 


--------------------------------------------------------------------------------
/src/datasets/mod.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::Debug;
 2 | 
 3 | /// Module for iris dataset.
 4 | pub mod iris;
 5 | /// Module for trees dataset.
 6 | pub mod trees;
 7 | 
 8 | /// Dataset container
 9 | #[derive(Clone, Debug)]
10 | pub struct Dataset<D, T> where D: Clone + Debug, T: Clone + Debug {
11 | 
12 |     data: D,
13 |     target: T
14 | }
15 | 
16 | impl<D, T> Dataset<D, T> where D: Clone + Debug, T: Clone + Debug {
17 | 
18 |     /// Returns explanatory variable (features)
19 |     pub fn data(&self) -> &D {
20 |         &self.data
21 |     }
22 | 
23 |     /// Returns objective variable (target)
24 |     pub fn target(&self) -> &T {
25 |         &self.target
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/datasets/trees.rs:
--------------------------------------------------------------------------------
 1 | use rulinalg::matrix::Matrix;
 2 | use rulinalg::vector::Vector;
 3 | 
 4 | use super::Dataset;
 5 | 
 6 | /// Load trees dataset.
 7 | ///
 8 | /// The data set contains a sample of 31 black cherry trees in the
 9 | /// Allegheny National Forest, Pennsylvania.
10 | ///
11 | /// ## Attribute Information
12 | ///
13 | /// ### Data
14 | ///
15 | /// ``Matrix<f64>`` contains following columns.
16 | ///
17 | ///   - diameter (inches)
18 | ///   - height (feet)
19 | ///
20 | /// ### Target
21 | ///
22 | /// ``Vector<f64>`` contains volume (cubic feet) of trees.
23 | ///
24 | /// Thomas A. Ryan, Brian L. Joiner, Barbara F. Ryan. (1976).
25 | /// Minitab student handbook. Duxbury Press
26 | pub fn load() -> Dataset<Matrix<f64>, Vector<f64>> {
27 |     let data = matrix![8.3, 70.;
28 |                        8.6, 65.;
29 |                        8.8, 63.;
30 |                        10.5, 72.;
31 |                        10.7, 81.;
32 |                        10.8, 83.;
33 |                        11.0, 66.;
34 |                        11.0, 75.;
35 |                        11.1, 80.;
36 |                        11.2, 75.;
37 |                        11.3, 79.;
38 |                        11.4, 76.;
39 |                        11.4, 76.;
40 |                        11.7, 69.;
41 |                        12.0, 75.;
42 |                        12.9, 74.;
43 |                        12.9, 85.;
44 |                        13.3, 86.;
45 |                        13.7, 71.;
46 |                        13.8, 64.;
47 |                        14.0, 78.;
48 |                        14.2, 80.;
49 |                        14.5, 74.;
50 |                        16.0, 72.;
51 |                        16.3, 77.;
52 |                        17.3, 81.;
53 |                        17.5, 82.;
54 |                        17.9, 80.;
55 |                        18.0, 80.;
56 |                        18.0, 80.;
57 |                        20.6, 87.];
58 |     let target = vec![10.3, 10.3, 10.2, 16.4, 18.8, 19.7, 15.6, 18.2, 22.6, 19.9,
59 |                       24.2, 21.0, 21.4, 21.3, 19.1, 22.2, 33.8, 27.4, 25.7, 24.9,
60 |                       34.5, 31.7, 36.3, 38.3, 42.6, 55.4, 55.7, 58.3, 51.5, 51.0,
61 |                       77.0];
62 |     Dataset{ data: data,
63 |              target: Vector::new(target) }
64 | }


--------------------------------------------------------------------------------
/src/learning/dbscan.rs:
--------------------------------------------------------------------------------
  1 | //! DBSCAN Clustering
  2 | //!
  3 | //! *Note: This module is likely to change dramatically in the future and
  4 | //! should be treated as experimental.*
  5 | //!
  6 | //! Provides an implementaton of DBSCAN clustering. The model
  7 | //! also implements a `predict` function which uses nearest neighbours
  8 | //! to classify the points. To utilize this function you must use
  9 | //! `self.set_predictive(true)` before training the model.
 10 | //!
 11 | //! The algorithm works by specifying `eps` and `min_points` parameters.
 12 | //! The `eps` parameter controls how close together points must be to be
 13 | //! placed in the same cluster. The `min_points` parameter controls how many
 14 | //! points must be within distance `eps` of eachother to be considered a cluster.
 15 | //!
 16 | //! If a point is not within distance `eps` of a cluster it will be classified
 17 | //! as noise. This means that it will be set to `None` in the clusters `Vector`.
 18 | //!
 19 | //! # Examples
 20 | //!
 21 | //! ```
 22 | //! use rusty_machine::learning::dbscan::DBSCAN;
 23 | //! use rusty_machine::learning::UnSupModel;
 24 | //! use rusty_machine::linalg::Matrix;
 25 | //!
 26 | //! let inputs = Matrix::new(6, 2, vec![1.0, 2.0,
 27 | //!                                     1.1, 2.2,
 28 | //!                                     0.9, 1.9,
 29 | //!                                     1.0, 2.1,
 30 | //!                                     -2.0, 3.0,
 31 | //!                                     -2.2, 3.1]);
 32 | //!
 33 | //! let mut model = DBSCAN::new(0.5, 2);
 34 | //! model.train(&inputs).unwrap();
 35 | //!
 36 | //! let clustering = model.clusters().unwrap();
 37 | //! ```
 38 | 
 39 | use learning::{LearningResult, UnSupModel};
 40 | use learning::error::{Error, ErrorKind};
 41 | 
 42 | use linalg::{Matrix, Vector, BaseMatrix};
 43 | use rulinalg::utils;
 44 | use rulinalg::matrix::Row;
 45 | 
 46 | /// DBSCAN Model
 47 | ///
 48 | /// Implements clustering using the DBSCAN algorithm
 49 | /// via the `UnSupModel` trait.
 50 | #[derive(Debug)]
 51 | pub struct DBSCAN {
 52 |     eps: f64,
 53 |     min_points: usize,
 54 |     clusters: Option<Vector<Option<usize>>>,
 55 |     predictive: bool,
 56 |     _visited: Vec<bool>,
 57 |     _cluster_data: Option<Matrix<f64>>,
 58 | }
 59 | 
 60 | /// Constructs a non-predictive DBSCAN model with the
 61 | /// following parameters:
 62 | ///
 63 | /// - `eps` : `0.5`
 64 | /// - `min_points` : `5`
 65 | impl Default for DBSCAN {
 66 |     fn default() -> DBSCAN {
 67 |         DBSCAN {
 68 |             eps: 0.5,
 69 |             min_points: 5,
 70 |             clusters: None,
 71 |             predictive: false,
 72 |             _visited: Vec::new(),
 73 |             _cluster_data: None,
 74 |         }
 75 |     }
 76 | }
 77 | 
 78 | impl UnSupModel<Matrix<f64>, Vector<Option<usize>>> for DBSCAN {
 79 |     /// Train the classifier using input data.
 80 |     fn train(&mut self, inputs: &Matrix<f64>) -> LearningResult<()> {
 81 |         self.init_params(inputs.rows());
 82 |         let mut cluster = 0;
 83 | 
 84 |         for (idx, point) in inputs.row_iter().enumerate() {
 85 |             let visited = self._visited[idx];
 86 | 
 87 |             if !visited {
 88 |                 self._visited[idx] = true;
 89 | 
 90 |                 let neighbours = self.region_query(point, inputs);
 91 | 
 92 |                 if neighbours.len() >= self.min_points {
 93 |                     self.expand_cluster(inputs, idx, neighbours, cluster);
 94 |                     cluster += 1;
 95 |                 }
 96 |             }
 97 |         }
 98 | 
 99 |         if self.predictive {
100 |             self._cluster_data = Some(inputs.clone());
101 |         }
102 | 
103 |         Ok(())
104 |     }
105 | 
106 |     fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Vector<Option<usize>>> {
107 |         if self.predictive {
108 |             if let (&Some(ref cluster_data), &Some(ref clusters)) = (&self._cluster_data,
109 |                                                                      &self.clusters) {
110 |                 let mut classes = Vec::with_capacity(inputs.rows());
111 | 
112 |                 for input_point in inputs.row_iter() {
113 |                     let mut distances = Vec::with_capacity(cluster_data.rows());
114 | 
115 |                     for cluster_point in cluster_data.row_iter() {
116 |                         let point_distance =
117 |                             utils::vec_bin_op(input_point.raw_slice(), cluster_point.raw_slice(), |x, y| x - y);
118 |                         distances.push(utils::dot(&point_distance, &point_distance).sqrt());
119 |                     }
120 | 
121 |                     let (closest_idx, closest_dist) = utils::argmin(&distances);
122 |                     if closest_dist < self.eps {
123 |                         classes.push(clusters[closest_idx]);
124 |                     } else {
125 |                         classes.push(None);
126 |                     }
127 |                 }
128 | 
129 |                 Ok(Vector::new(classes))
130 |             } else {
131 |                 Err(Error::new_untrained())
132 |             }
133 |         } else {
134 |             Err(Error::new(ErrorKind::InvalidState,
135 |                            "Model must be set to predictive. Use `self.set_predictive(true)`."))
136 |         }
137 |     }
138 | }
139 | 
140 | impl DBSCAN {
141 |     /// Create a new DBSCAN model with a given
142 |     /// distance episilon and minimum points per cluster.
143 |     pub fn new(eps: f64, min_points: usize) -> DBSCAN {
144 |         assert!(eps > 0f64, "The model epsilon must be positive.");
145 | 
146 |         DBSCAN {
147 |             eps: eps,
148 |             min_points: min_points,
149 |             clusters: None,
150 |             predictive: false,
151 |             _visited: Vec::new(),
152 |             _cluster_data: None,
153 |         }
154 |     }
155 | 
156 |     /// Set predictive to true if the model is to be used
157 |     /// to classify future points.
158 |     ///
159 |     /// If the model is set as predictive then the input data
160 |     /// will be cloned during training.
161 |     pub fn set_predictive(&mut self, predictive: bool) {
162 |         self.predictive = predictive;
163 |     }
164 | 
165 |     /// Return an Option pointing to the model clusters.
166 |     pub fn clusters(&self) -> Option<&Vector<Option<usize>>> {
167 |         self.clusters.as_ref()
168 |     }
169 | 
170 |     fn expand_cluster(&mut self,
171 |                       inputs: &Matrix<f64>,
172 |                       point_idx: usize,
173 |                       neighbour_pts: Vec<usize>,
174 |                       cluster: usize) {
175 |         debug_assert!(point_idx < inputs.rows(),
176 |                       "Point index too large for inputs");
177 |         debug_assert!(neighbour_pts.iter().all(|x| *x < inputs.rows()),
178 |                       "Neighbour indices too large for inputs");
179 | 
180 |         self.clusters.as_mut().map(|x| x.mut_data()[point_idx] = Some(cluster));
181 | 
182 |         for data_point_idx in &neighbour_pts {
183 |             let visited = self._visited[*data_point_idx];
184 |             if !visited {
185 |                 self._visited[*data_point_idx] = true;
186 |                 let data_point_row = unsafe { inputs.row_unchecked(*data_point_idx) };
187 |                 let sub_neighbours = self.region_query(data_point_row, inputs);
188 | 
189 |                 if sub_neighbours.len() >= self.min_points {
190 |                     self.expand_cluster(inputs, *data_point_idx, sub_neighbours, cluster);
191 |                 }
192 |             }
193 |         }
194 |     }
195 | 
196 | 
197 |     fn region_query(&self, point: Row<f64>, inputs: &Matrix<f64>) -> Vec<usize> {
198 |         debug_assert!(point.cols() == inputs.cols(),
199 |                       "point must be of same dimension as inputs");
200 | 
201 |         let mut in_neighbourhood = Vec::new();
202 |         for (idx, data_point) in inputs.row_iter().enumerate() {
203 |             //TODO: Use `MatrixMetric` when rulinalg#154 is fixed.
204 |             let point_distance = utils::vec_bin_op(data_point.raw_slice(), point.raw_slice(), |x, y| x - y);
205 |             let dist = utils::dot(&point_distance, &point_distance).sqrt();
206 | 
207 |             if dist < self.eps {
208 |                 in_neighbourhood.push(idx);
209 |             }
210 |         }
211 | 
212 |         in_neighbourhood
213 |     }
214 | 
215 |     fn init_params(&mut self, total_points: usize) {
216 |         unsafe {
217 |             self._visited.reserve(total_points);
218 |             self._visited.set_len(total_points);
219 |         }
220 | 
221 |         for i in 0..total_points {
222 |             self._visited[i] = false;
223 |         }
224 | 
225 |         self.clusters = Some(Vector::new(vec![None; total_points]));
226 |     }
227 | }
228 | 
229 | #[cfg(test)]
230 | mod tests {
231 |     use super::DBSCAN;
232 |     use linalg::{Matrix, BaseMatrix};
233 | 
234 |     #[test]
235 |     fn test_region_query() {
236 |         let model = DBSCAN::new(1.0, 3);
237 | 
238 |         let inputs = Matrix::new(3, 2, vec![1.0, 1.0, 1.1, 1.9, 3.0, 3.0]);
239 | 
240 |         let m = matrix![1.0, 1.0];
241 |         let row = m.row(0);
242 |         let neighbours = model.region_query(row, &inputs);
243 | 
244 |         assert!(neighbours.len() == 2);
245 |     }
246 | 
247 |     #[test]
248 |     fn test_region_query_small_eps() {
249 |         let model = DBSCAN::new(0.01, 3);
250 | 
251 |         let inputs = Matrix::new(3, 2, vec![1.0, 1.0, 1.1, 1.9, 1.1, 1.1]);
252 | 
253 |         let m = matrix![1.0, 1.0];
254 |         let row = m.row(0);
255 |         let neighbours = model.region_query(row, &inputs);
256 | 
257 |         assert!(neighbours.len() == 1);
258 |     }
259 | }
260 | 


--------------------------------------------------------------------------------
/src/learning/error.rs:
--------------------------------------------------------------------------------
 1 | //! Error handling for the learning module.
 2 | 
 3 | use std::boxed::Box;
 4 | use std::convert::Into;
 5 | use std::error;
 6 | use std::fmt;
 7 | use std::marker::{Send, Sync};
 8 | 
 9 | use rulinalg;
10 | 
11 | /// An error related to the learning module.
12 | #[derive(Debug)]
13 | pub struct Error {
14 |     kind: ErrorKind,
15 |     error: Box<dyn error::Error + Send + Sync>,
16 | }
17 | 
18 | /// Types of errors produced in the learning module.
19 | ///
20 | /// List intended to grow and so you should
21 | /// be wary of matching against explicitly.
22 | #[derive(Debug)]
23 | pub enum ErrorKind {
24 |     /// The parameters used to define the model are not valid.
25 |     InvalidParameters,
26 |     /// The input data to the model is not valid.
27 |     InvalidData,
28 |     /// The action could not be carried out as the model was in an invalid state.
29 |     InvalidState,
30 |     /// The model has not been trained
31 |     UntrainedModel,
32 |     /// Linear algebra related error
33 |     LinearAlgebra
34 | }
35 | 
36 | impl Error {
37 |     /// Construct a new `Error` of a particular `ErrorKind`.
38 |     pub fn new<E>(kind: ErrorKind, error: E) -> Error
39 |         where E: Into<Box<dyn error::Error + Send + Sync>>
40 |     {
41 |         Error {
42 |             kind: kind,
43 |             error: error.into(),
44 |         }
45 |     }
46 | 
47 |     /// Returns a new error for an untrained model
48 |     ///
49 |     /// This function is unstable and may be removed with changes to the API.
50 |     pub fn new_untrained() -> Error {
51 |         Error::new(ErrorKind::UntrainedModel, "The model has not been trained.")
52 |     }
53 | 
54 |     /// Get the kind of this `Error`.
55 |     pub fn kind(&self) -> &ErrorKind {
56 |         &self.kind
57 |     }
58 | }
59 | 
60 | impl From<rulinalg::error::Error> for Error {
61 |     fn from(e: rulinalg::error::Error) -> Error {
62 |         Error::new(ErrorKind::LinearAlgebra, <rulinalg::error::Error as error::Error>::description(&e))
63 |     }
64 | }
65 | 
66 | impl error::Error for Error {
67 |     fn description(&self) -> &str {
68 |         self.error.description()
69 |     }
70 | }
71 | 
72 | impl fmt::Display for Error {
73 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74 |         self.error.fmt(f)
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/learning/gp.rs:
--------------------------------------------------------------------------------
  1 | //! Gaussian Processes
  2 | //!
  3 | //! Provides implementation of gaussian process regression.
  4 | //!
  5 | //! # Usage
  6 | //!
  7 | //! ```
  8 | //! use rusty_machine::learning::gp;
  9 | //! use rusty_machine::learning::SupModel;
 10 | //! use rusty_machine::linalg::Matrix;
 11 | //! use rusty_machine::linalg::Vector;
 12 | //!
 13 | //! let mut gaussp = gp::GaussianProcess::default();
 14 | //! gaussp.noise = 10f64;
 15 | //!
 16 | //! let train_data = Matrix::new(10,1,vec![0.,1.,2.,3.,4.,5.,6.,7.,8.,9.]);
 17 | //! let target = Vector::new(vec![0.,1.,2.,3.,4.,4.,3.,2.,1.,0.]);
 18 | //!
 19 | //! gaussp.train(&train_data, &target).unwrap();
 20 | //!
 21 | //! let test_data = Matrix::new(5,1,vec![2.3,4.4,5.1,6.2,7.1]);
 22 | //!
 23 | //! let outputs = gaussp.predict(&test_data).unwrap();
 24 | //! ```
 25 | //! Alternatively one could use `gaussp.get_posterior()` which would return both
 26 | //! the predictive mean and covariance. However, this is likely to change in
 27 | //! a future release.
 28 | 
 29 | use learning::toolkit::kernel::{Kernel, SquaredExp};
 30 | use linalg::{Matrix, BaseMatrix, Decomposition, Cholesky};
 31 | use linalg::Vector;
 32 | use learning::{LearningResult, SupModel};
 33 | use learning::error::{Error, ErrorKind};
 34 | 
 35 | /// Trait for GP mean functions.
 36 | pub trait MeanFunc {
 37 |     /// Compute the mean function applied elementwise to a matrix.
 38 |     fn func(&self, x: Matrix<f64>) -> Vector<f64>;
 39 | }
 40 | 
 41 | /// Constant mean function
 42 | #[derive(Clone, Copy, Debug)]
 43 | pub struct ConstMean {
 44 |     a: f64,
 45 | }
 46 | 
 47 | /// Constructs the zero function.
 48 | impl Default for ConstMean {
 49 |     fn default() -> ConstMean {
 50 |         ConstMean { a: 0f64 }
 51 |     }
 52 | }
 53 | 
 54 | impl MeanFunc for ConstMean {
 55 |     fn func(&self, x: Matrix<f64>) -> Vector<f64> {
 56 |         Vector::zeros(x.rows()) + self.a
 57 |     }
 58 | }
 59 | 
 60 | /// Gaussian Process struct
 61 | ///
 62 | /// Gaussian process with generic kernel and deterministic mean function.
 63 | /// Can be used for gaussian process regression with noise.
 64 | /// Currently does not support classification.
 65 | #[derive(Debug)]
 66 | pub struct GaussianProcess<T: Kernel, U: MeanFunc> {
 67 |     ker: T,
 68 |     mean: U,
 69 |     /// The observation noise of the GP.
 70 |     pub noise: f64,
 71 |     alpha: Option<Vector<f64>>,
 72 |     train_mat: Option<Matrix<f64>>,
 73 |     train_data: Option<Matrix<f64>>,
 74 | }
 75 | 
 76 | /// Construct a default Gaussian Process
 77 | ///
 78 | /// The defaults are:
 79 | ///
 80 | /// - Squared Exponential kernel.
 81 | /// - Zero-mean function.
 82 | /// - Zero noise.
 83 | ///
 84 | /// Note that zero noise can often lead to numerical instability.
 85 | /// A small value for the noise may be a better alternative.
 86 | impl Default for GaussianProcess<SquaredExp, ConstMean> {
 87 |     fn default() -> GaussianProcess<SquaredExp, ConstMean> {
 88 |         GaussianProcess {
 89 |             ker: SquaredExp::default(),
 90 |             mean: ConstMean::default(),
 91 |             noise: 0f64,
 92 |             train_mat: None,
 93 |             train_data: None,
 94 |             alpha: None,
 95 |         }
 96 |     }
 97 | }
 98 | 
 99 | impl<T: Kernel, U: MeanFunc> GaussianProcess<T, U> {
100 |     /// Construct a new Gaussian Process.
101 |     ///
102 |     /// # Examples
103 |     ///
104 |     /// ```
105 |     /// use rusty_machine::learning::gp;
106 |     /// use rusty_machine::learning::toolkit::kernel;
107 |     ///
108 |     /// let ker = kernel::SquaredExp::default();
109 |     /// let mean = gp::ConstMean::default();
110 |     /// let gaussp = gp::GaussianProcess::new(ker, mean, 1e-3f64);
111 |     /// ```
112 |     pub fn new(ker: T, mean: U, noise: f64) -> GaussianProcess<T, U> {
113 |         GaussianProcess {
114 |             ker: ker,
115 |             mean: mean,
116 |             noise: noise,
117 |             train_mat: None,
118 |             train_data: None,
119 |             alpha: None,
120 |         }
121 |     }
122 | 
123 |     /// Construct a kernel matrix
124 |     fn ker_mat(&self, m1: &Matrix<f64>, m2: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
125 |         if m1.cols() != m2.cols() {
126 |             Err(Error::new(ErrorKind::InvalidState,
127 |                            "Inputs to kernel matrices have different column counts."))
128 |         } else {
129 |             let dim1 = m1.rows();
130 |             let dim2 = m2.rows();
131 | 
132 |             let mut ker_data = Vec::with_capacity(dim1 * dim2);
133 |             ker_data.extend(m1.row_iter().flat_map(|row1| {
134 |                 m2.row_iter()
135 |                     .map(move |row2| self.ker.kernel(row1.raw_slice(), row2.raw_slice()))
136 |             }));
137 | 
138 |             Ok(Matrix::new(dim1, dim2, ker_data))
139 |         }
140 |     }
141 | }
142 | 
143 | impl<T: Kernel, U: MeanFunc> SupModel<Matrix<f64>, Vector<f64>> for GaussianProcess<T, U> {
144 |     /// Predict output from inputs.
145 |     fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Vector<f64>> {
146 | 
147 |         // Messy referencing for succint syntax
148 |         if let (&Some(ref alpha), &Some(ref t_data)) = (&self.alpha, &self.train_data) {
149 |             let mean = self.mean.func(inputs.clone());
150 |             let post_mean = self.ker_mat(inputs, t_data)? * alpha;
151 |             Ok(mean + post_mean)
152 |         } else {
153 |             Err(Error::new(ErrorKind::UntrainedModel, "The model has not been trained."))
154 |         }
155 |     }
156 | 
157 |     /// Train the model using data and outputs.
158 |     fn train(&mut self, inputs: &Matrix<f64>, targets: &Vector<f64>) -> LearningResult<()> {
159 |         let noise_mat = Matrix::identity(inputs.rows()) * self.noise;
160 | 
161 |         let ker_mat = self.ker_mat(inputs, inputs).unwrap();
162 | 
163 |         let train_mat = Cholesky::decompose(ker_mat + noise_mat).map_err(|_| {
164 |             Error::new(ErrorKind::InvalidState,
165 |                        "Could not compute Cholesky decomposition.")
166 |         })?.unpack();
167 | 
168 |         let x = train_mat.solve_l_triangular(targets - self.mean.func(inputs.clone())).unwrap();
169 |         let alpha = train_mat.transpose().solve_u_triangular(x).unwrap();
170 | 
171 |         self.train_mat = Some(train_mat);
172 |         self.train_data = Some(inputs.clone());
173 |         self.alpha = Some(alpha);
174 | 
175 |         Ok(())
176 |     }
177 | }
178 | 
179 | impl<T: Kernel, U: MeanFunc> GaussianProcess<T, U> {
180 |     /// Compute the posterior distribution [UNSTABLE]
181 |     ///
182 |     /// Requires the model to be trained first.
183 |     ///
184 |     /// Outputs the posterior mean and covariance matrix.
185 |     pub fn get_posterior(&self,
186 |                          inputs: &Matrix<f64>)
187 |                          -> LearningResult<(Vector<f64>, Matrix<f64>)> {
188 |         if let (&Some(ref t_mat), &Some(ref alpha), &Some(ref t_data)) = (&self.train_mat,
189 |                                                                           &self.alpha,
190 |                                                                           &self.train_data) {
191 |             let mean = self.mean.func(inputs.clone());
192 | 
193 |             let post_mean = mean + self.ker_mat(inputs, t_data)? * alpha;
194 | 
195 |             let test_mat = self.ker_mat(inputs, t_data)?;
196 |             let mut var_data = Vec::with_capacity(inputs.rows() * inputs.cols());
197 |             for row in test_mat.row_iter() {
198 |                 let test_point = Vector::new(row.raw_slice());
199 |                 var_data.append(&mut t_mat.solve_l_triangular(test_point).unwrap().into_vec());
200 |             }
201 | 
202 |             let v_mat = Matrix::new(test_mat.rows(), test_mat.cols(), var_data);
203 | 
204 |             let post_var = self.ker_mat(inputs, inputs)? - &v_mat * v_mat.transpose();
205 | 
206 |             Ok((post_mean, post_var))
207 |         } else {
208 |             Err(Error::new_untrained())
209 |         }
210 |     }
211 | }
212 | 


--------------------------------------------------------------------------------
/src/learning/knn/brute_force.rs:
--------------------------------------------------------------------------------
  1 | //! Bruteforce search implementations
  2 | use linalg::{Matrix, BaseMatrix};
  3 | use learning::error::Error;
  4 | 
  5 | use super::{KNearest, KNearestSearch, get_distances, dist};
  6 | 
  7 | /// Perform brute-force search
  8 | #[derive(Debug)]
  9 | pub struct BruteForce {
 10 |     data: Option<Matrix<f64>>,
 11 | }
 12 | 
 13 | impl Default for BruteForce {
 14 |     /// Constructs new brute-force search
 15 |     ///
 16 |     /// # Examples
 17 |     ///
 18 |     /// ```
 19 |     /// use rusty_machine::learning::knn::BruteForce;
 20 |     /// let _ = BruteForce::default();
 21 |     /// ```
 22 |     fn default() -> Self {
 23 |         BruteForce {
 24 |             data: None
 25 |         }
 26 |     }
 27 | }
 28 | 
 29 | impl BruteForce {
 30 |     /// Constructs new brute-force search.
 31 |     /// BruteForce accepts no parapeters.
 32 |     ///
 33 |     /// # Examples
 34 |     ///
 35 |     /// ```
 36 |     /// use rusty_machine::learning::knn::BruteForce;
 37 |     /// let _ = BruteForce::new();
 38 |     /// ```
 39 |     pub fn new() -> Self {
 40 |         BruteForce::default()
 41 |     }
 42 | }
 43 | 
 44 | /// Can search K-nearest items
 45 | impl KNearestSearch for BruteForce {
 46 | 
 47 |     /// initialize BruteForce Searcher
 48 |     fn build(&mut self, data: Matrix<f64>) {
 49 |         self.data = Some(data);
 50 |     }
 51 | 
 52 |     /// Serch k-nearest items close to the point
 53 |     fn search(&self, point: &[f64], k: usize) -> Result<(Vec<usize>, Vec<f64>), Error> {
 54 |         if let Some(ref data) = self.data {
 55 |             let indices: Vec<usize> = (0..k).collect();
 56 |             let distances = get_distances(data, point, &indices);
 57 | 
 58 |             let mut query = KNearest::new(k, indices, distances);
 59 |             let mut current_dist = query.dist();
 60 | 
 61 |             let mut i = k;
 62 |             for row in data.row_iter().skip(k) {
 63 |                 let d = dist(point, row.raw_slice());
 64 |                 if d < current_dist {
 65 |                     current_dist = query.add(i, d);
 66 |                 }
 67 |                 i += 1;
 68 |             }
 69 |             Ok(query.get_results())
 70 |         } else {
 71 |             Err(Error::new_untrained())
 72 |         }
 73 |     }
 74 | }
 75 | 
 76 | #[cfg(test)]
 77 | mod tests {
 78 | 
 79 |     use linalg::Matrix;
 80 |     use super::super::KNearestSearch;
 81 |     use super::BruteForce;
 82 | 
 83 |     #[test]
 84 |     fn test_bruteforce_search() {
 85 |         let m = Matrix::new(5, 2, vec![1., 2.,
 86 |                                        8., 0.,
 87 |                                        6., 10.,
 88 |                                        3., 6.,
 89 |                                        0., 3.]);
 90 |         let mut b = BruteForce::new();
 91 |         b.build(m);
 92 | 
 93 |         let (ind, dist) = b.search(&vec![3., 4.9], 1).unwrap();
 94 |         assert_eq!(ind, vec![3]);
 95 |         assert_eq!(dist, vec![1.0999999999999996]);
 96 | 
 97 |         let (ind, dist) = b.search(&vec![3., 4.9], 2).unwrap();
 98 |         assert_eq!(ind, vec![3, 0]);
 99 |         assert_eq!(dist, vec![1.0999999999999996, 3.5227829907617076]);
100 | 
101 |         let (ind, dist) = b.search(&vec![3., 4.9], 3).unwrap();
102 |         assert_eq!(ind, vec![3, 0, 4]);
103 |         assert_eq!(dist, vec![1.0999999999999996, 3.5227829907617076, 3.551056180912941]);
104 |     }
105 | 
106 |     #[test]
107 |     fn test_bruteforce_untrained() {
108 |         let b = BruteForce::new();
109 |         let e = b.search(&vec![3., 4.9], 1);
110 |         assert!(e.is_err());
111 |     }
112 | }
113 | 


--------------------------------------------------------------------------------
/src/learning/lin_reg.rs:
--------------------------------------------------------------------------------
  1 | //! Linear Regression module
  2 | //!
  3 | //! Contains implemention of linear regression using
  4 | //! OLS and gradient descent optimization.
  5 | //!
  6 | //! The regressor will automatically add the intercept term
  7 | //! so you do not need to format the input matrices yourself.
  8 | //!
  9 | //! # Usage
 10 | //!
 11 | //! ```
 12 | //! use rusty_machine::learning::lin_reg::LinRegressor;
 13 | //! use rusty_machine::learning::SupModel;
 14 | //! use rusty_machine::linalg::Matrix;
 15 | //! use rusty_machine::linalg::Vector;
 16 | //!
 17 | //! let inputs = Matrix::new(4,1,vec![1.0,3.0,5.0,7.0]);
 18 | //! let targets = Vector::new(vec![1.,5.,9.,13.]);
 19 | //!
 20 | //! let mut lin_mod = LinRegressor::default();
 21 | //!
 22 | //! // Train the model
 23 | //! lin_mod.train(&inputs, &targets).unwrap();
 24 | //!
 25 | //! // Now we'll predict a new point
 26 | //! let new_point = Matrix::new(1,1,vec![10.]);
 27 | //! let output = lin_mod.predict(&new_point).unwrap();
 28 | //!
 29 | //! // Hopefully we classified our new point correctly!
 30 | //! assert!(output[0] > 17f64, "Our regressor isn't very good!");
 31 | //! ```
 32 | 
 33 | use linalg::{Matrix, BaseMatrix};
 34 | use linalg::Vector;
 35 | use learning::{LearningResult, SupModel};
 36 | use learning::toolkit::cost_fn::CostFunc;
 37 | use learning::toolkit::cost_fn::MeanSqError;
 38 | use learning::optim::grad_desc::GradientDesc;
 39 | use learning::optim::{OptimAlgorithm, Optimizable};
 40 | use learning::error::Error;
 41 | 
 42 | /// Linear Regression Model.
 43 | ///
 44 | /// Contains option for optimized parameter.
 45 | #[derive(Debug)]
 46 | pub struct LinRegressor {
 47 |     /// The parameters for the regression model.
 48 |     parameters: Option<Vector<f64>>,
 49 | }
 50 | 
 51 | impl Default for LinRegressor {
 52 |     fn default() -> LinRegressor {
 53 |         LinRegressor { parameters: None }
 54 |     }
 55 | }
 56 | 
 57 | impl LinRegressor {
 58 |     /// Get the parameters from the model.
 59 |     ///
 60 |     /// Returns an option that is None if the model has not been trained.
 61 |     pub fn parameters(&self) -> Option<&Vector<f64>> {
 62 |         self.parameters.as_ref()
 63 |     }
 64 | }
 65 | 
 66 | impl SupModel<Matrix<f64>, Vector<f64>> for LinRegressor {
 67 |     /// Train the linear regression model.
 68 |     ///
 69 |     /// Takes training data and output values as input.
 70 |     ///
 71 |     /// # Examples
 72 |     ///
 73 |     /// ```
 74 |     /// use rusty_machine::learning::lin_reg::LinRegressor;
 75 |     /// use rusty_machine::linalg::Matrix;
 76 |     /// use rusty_machine::linalg::Vector;
 77 |     /// use rusty_machine::learning::SupModel;
 78 |     ///
 79 |     /// let mut lin_mod = LinRegressor::default();
 80 |     /// let inputs = Matrix::new(3,1, vec![2.0, 3.0, 4.0]);
 81 |     /// let targets = Vector::new(vec![5.0, 6.0, 7.0]);
 82 |     ///
 83 |     /// lin_mod.train(&inputs, &targets).unwrap();
 84 |     /// ```
 85 |     fn train(&mut self, inputs: &Matrix<f64>, targets: &Vector<f64>) -> LearningResult<()> {
 86 |         let ones = Matrix::<f64>::ones(inputs.rows(), 1);
 87 |         let full_inputs = ones.hcat(inputs);
 88 | 
 89 |         let xt = full_inputs.transpose();
 90 |         self.parameters = Some((&xt * full_inputs).solve(&xt * targets)?);
 91 |         Ok(())
 92 |     }
 93 | 
 94 |     /// Predict output value from input data.
 95 |     ///
 96 |     /// Model must be trained before prediction can be made.
 97 |     fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Vector<f64>> {
 98 |         if let Some(ref v) = self.parameters {
 99 |             let ones = Matrix::<f64>::ones(inputs.rows(), 1);
100 |             let full_inputs = ones.hcat(inputs);
101 |             Ok(full_inputs * v)
102 |         } else {
103 |             Err(Error::new_untrained())
104 |         }
105 |     }
106 | }
107 | 
108 | impl Optimizable for LinRegressor {
109 |     type Inputs = Matrix<f64>;
110 |     type Targets = Vector<f64>;
111 | 
112 |     fn compute_grad(&self,
113 |                     params: &[f64],
114 |                     inputs: &Matrix<f64>,
115 |                     targets: &Vector<f64>)
116 |                     -> (f64, Vec<f64>) {
117 | 
118 |         let beta_vec = Vector::new(params.to_vec());
119 |         let outputs = inputs * beta_vec;
120 | 
121 |         let cost = MeanSqError::cost(&outputs, targets);
122 |         let grad = (inputs.transpose() * (outputs - targets)) / (inputs.rows() as f64);
123 | 
124 |         (cost, grad.into_vec())
125 |     }
126 | }
127 | 
128 | impl LinRegressor {
129 |     /// Train the linear regressor using Gradient Descent.
130 |     ///
131 |     /// # Examples
132 |     ///
133 |     /// ```
134 |     /// use rusty_machine::learning::lin_reg::LinRegressor;
135 |     /// use rusty_machine::learning::SupModel;
136 |     /// use rusty_machine::linalg::Matrix;
137 |     /// use rusty_machine::linalg::Vector;
138 |     ///
139 |     /// let inputs = Matrix::new(4,1,vec![1.0,3.0,5.0,7.0]);
140 |     /// let targets = Vector::new(vec![1.,5.,9.,13.]);
141 |     ///
142 |     /// let mut lin_mod = LinRegressor::default();
143 |     ///
144 |     /// // Train the model
145 |     /// lin_mod.train_with_optimization(&inputs, &targets);
146 |     ///
147 |     /// // Now we'll predict a new point
148 |     /// let new_point = Matrix::new(1,1,vec![10.]);
149 |     /// let _ = lin_mod.predict(&new_point).unwrap();
150 |     /// ```
151 |     pub fn train_with_optimization(&mut self, inputs: &Matrix<f64>, targets: &Vector<f64>) {
152 |         let ones = Matrix::<f64>::ones(inputs.rows(), 1);
153 |         let full_inputs = ones.hcat(inputs);
154 | 
155 |         let initial_params = vec![0.; full_inputs.cols()];
156 | 
157 |         let gd = GradientDesc::default();
158 |         let optimal_w = gd.optimize(self, &initial_params[..], &full_inputs, targets);
159 |         self.parameters = Some(Vector::new(optimal_w));
160 |     }
161 | }
162 | 


--------------------------------------------------------------------------------
/src/learning/logistic_reg.rs:
--------------------------------------------------------------------------------
  1 | //! Logistic Regression module
  2 | //!
  3 | //! Contains implemention of logistic regression using
  4 | //! gradient descent optimization.
  5 | //!
  6 | //! The regressor will automatically add the intercept term
  7 | //! so you do not need to format the input matrices yourself.
  8 | //!
  9 | //! # Usage
 10 | //!
 11 | //! ```
 12 | //! use rusty_machine::learning::logistic_reg::LogisticRegressor;
 13 | //! use rusty_machine::learning::SupModel;
 14 | //! use rusty_machine::linalg::Matrix;
 15 | //! use rusty_machine::linalg::Vector;
 16 | //!
 17 | //! let inputs = Matrix::new(4,1,vec![1.0,3.0,5.0,7.0]);
 18 | //! let targets = Vector::new(vec![0.,0.,1.,1.]);
 19 | //!
 20 | //! let mut log_mod = LogisticRegressor::default();
 21 | //!
 22 | //! // Train the model
 23 | //! log_mod.train(&inputs, &targets).unwrap();
 24 | //!
 25 | //! // Now we'll predict a new point
 26 | //! let new_point = Matrix::new(1,1,vec![10.]);
 27 | //! let output = log_mod.predict(&new_point).unwrap();
 28 | //!
 29 | //! // Hopefully we classified our new point correctly!
 30 | //! assert!(output[0] > 0.5, "Our classifier isn't very good!");
 31 | //! ```
 32 | //!
 33 | //! We could have been more specific about the learning of the model
 34 | //! by using the `new` constructor instead. This allows us to provide
 35 | //! a `GradientDesc` object with custom parameters.
 36 | 
 37 | use linalg::{Matrix, BaseMatrix};
 38 | use linalg::Vector;
 39 | use learning::{LearningResult, SupModel};
 40 | use learning::toolkit::activ_fn::{ActivationFunc, Sigmoid};
 41 | use learning::toolkit::cost_fn::{CostFunc, CrossEntropyError};
 42 | use learning::optim::grad_desc::GradientDesc;
 43 | use learning::optim::{OptimAlgorithm, Optimizable};
 44 | use learning::error::Error;
 45 | 
 46 | /// Logistic Regression Model.
 47 | ///
 48 | /// Contains option for optimized parameter.
 49 | #[derive(Debug)]
 50 | pub struct LogisticRegressor<A>
 51 |     where A: OptimAlgorithm<BaseLogisticRegressor>
 52 | {
 53 |     base: BaseLogisticRegressor,
 54 |     alg: A,
 55 | }
 56 | 
 57 | /// Constructs a default Logistic Regression model
 58 | /// using standard gradient descent.
 59 | impl Default for LogisticRegressor<GradientDesc> {
 60 |     fn default() -> LogisticRegressor<GradientDesc> {
 61 |         LogisticRegressor {
 62 |             base: BaseLogisticRegressor::new(),
 63 |             alg: GradientDesc::default(),
 64 |         }
 65 |     }
 66 | }
 67 | 
 68 | impl<A: OptimAlgorithm<BaseLogisticRegressor>> LogisticRegressor<A> {
 69 |     /// Constructs untrained logistic regression model.
 70 |     ///
 71 |     /// # Examples
 72 |     ///
 73 |     /// ```
 74 |     /// use rusty_machine::learning::logistic_reg::LogisticRegressor;
 75 |     /// use rusty_machine::learning::optim::grad_desc::GradientDesc;
 76 |     ///
 77 |     /// let gd = GradientDesc::default();
 78 |     /// let mut logistic_mod = LogisticRegressor::new(gd);
 79 |     /// ```
 80 |     pub fn new(alg: A) -> LogisticRegressor<A> {
 81 |         LogisticRegressor {
 82 |             base: BaseLogisticRegressor::new(),
 83 |             alg: alg,
 84 |         }
 85 |     }
 86 | 
 87 |     /// Get the parameters from the model.
 88 |     ///
 89 |     /// Returns an option that is None if the model has not been trained.
 90 |     pub fn parameters(&self) -> Option<&Vector<f64>> {
 91 |         self.base.parameters()
 92 |     }
 93 | }
 94 | 
 95 | impl<A> SupModel<Matrix<f64>, Vector<f64>> for LogisticRegressor<A>
 96 |     where A: OptimAlgorithm<BaseLogisticRegressor>
 97 | {
 98 |     /// Train the logistic regression model.
 99 |     ///
100 |     /// Takes training data and output values as input.
101 |     ///
102 |     /// # Examples
103 |     ///
104 |     /// ```
105 |     /// use rusty_machine::learning::logistic_reg::LogisticRegressor;
106 |     /// use rusty_machine::linalg::Matrix;
107 |     /// use rusty_machine::linalg::Vector;
108 |     /// use rusty_machine::learning::SupModel;
109 |     ///
110 |     /// let mut logistic_mod = LogisticRegressor::default();
111 |     /// let inputs = Matrix::new(3,2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]);
112 |     /// let targets = Vector::new(vec![5.0, 6.0, 7.0]);
113 |     ///
114 |     /// logistic_mod.train(&inputs, &targets).unwrap();
115 |     /// ```
116 |     fn train(&mut self, inputs: &Matrix<f64>, targets: &Vector<f64>) -> LearningResult<()> {
117 |         let ones = Matrix::<f64>::ones(inputs.rows(), 1);
118 |         let full_inputs = ones.hcat(inputs);
119 | 
120 |         let initial_params = vec![0.5; full_inputs.cols()];
121 | 
122 |         let optimal_w = self.alg.optimize(&self.base, &initial_params[..], &full_inputs, targets);
123 |         self.base.set_parameters(Vector::new(optimal_w));
124 |         Ok(())
125 |     }
126 | 
127 |     /// Predict output value from input data.
128 |     ///
129 |     /// Model must be trained before prediction can be made.
130 |     fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Vector<f64>> {
131 |         if let Some(v) = self.base.parameters() {
132 |             let ones = Matrix::<f64>::ones(inputs.rows(), 1);
133 |             let full_inputs = ones.hcat(inputs);
134 |             Ok((full_inputs * v).apply(&Sigmoid::func))
135 |         } else {
136 |             Err(Error::new_untrained())
137 |         }
138 |     }
139 | }
140 | 
141 | /// The Base Logistic Regression model.
142 | ///
143 | /// This struct cannot be instantianated and is used internally only.
144 | #[derive(Debug)]
145 | pub struct BaseLogisticRegressor {
146 |     parameters: Option<Vector<f64>>,
147 | }
148 | 
149 | impl BaseLogisticRegressor {
150 |     /// Construct a new BaseLogisticRegressor
151 |     /// with parameters set to None.
152 |     fn new() -> BaseLogisticRegressor {
153 |         BaseLogisticRegressor { parameters: None }
154 |     }
155 | }
156 | 
157 | impl BaseLogisticRegressor {
158 |     /// Returns a reference to the parameters.
159 |     fn parameters(&self) -> Option<&Vector<f64>> {
160 |         self.parameters.as_ref()
161 |     }
162 | 
163 |     /// Set the parameters to `Some` vector.
164 |     fn set_parameters(&mut self, params: Vector<f64>) {
165 |         self.parameters = Some(params);
166 |     }
167 | }
168 | 
169 | /// Computing the gradient of the underlying Logistic
170 | /// Regression model.
171 | ///
172 | /// The gradient is given by
173 | ///
174 | /// X<sup>T</sup>(h(Xb) - y) / m
175 | ///
176 | /// where `h` is the sigmoid function and `b` the underlying model parameters.
177 | impl Optimizable for BaseLogisticRegressor {
178 |     type Inputs = Matrix<f64>;
179 |     type Targets = Vector<f64>;
180 | 
181 |     fn compute_grad(&self,
182 |                     params: &[f64],
183 |                     inputs: &Matrix<f64>,
184 |                     targets: &Vector<f64>)
185 |                     -> (f64, Vec<f64>) {
186 | 
187 |         let beta_vec = Vector::new(params.to_vec());
188 |         let outputs = (inputs * beta_vec).apply(&Sigmoid::func);
189 | 
190 |         let cost = CrossEntropyError::cost(&outputs, targets);
191 |         let grad = (inputs.transpose() * (outputs - targets)) / (inputs.rows() as f64);
192 | 
193 |         (cost, grad.into_vec())
194 |     }
195 | }
196 | 


--------------------------------------------------------------------------------
/src/learning/nnet/net_layer.rs:
--------------------------------------------------------------------------------
  1 | //! Neural Network Layers
  2 | 
  3 | use linalg::{Matrix, MatrixSlice, BaseMatrix};
  4 | 
  5 | use learning::LearningResult;
  6 | use learning::error::{Error, ErrorKind};
  7 | use learning::toolkit::activ_fn::ActivationFunc;
  8 | 
  9 | use rand::thread_rng;
 10 | use rand::distributions::Sample;
 11 | use rand::distributions::normal::Normal;
 12 | 
 13 | use std::fmt::Debug;
 14 | 
 15 | /// Trait for neural net layers
 16 | pub trait NetLayer : Debug {
 17 |     /// The result of propogating data forward through this layer
 18 |     fn forward(&self, input: &Matrix<f64>, params: MatrixSlice<f64>) -> LearningResult<Matrix<f64>>;
 19 | 
 20 |     /// The gradient of the output of this layer with respect to its input
 21 |     fn back_input(&self, out_grad: &Matrix<f64>, input: &Matrix<f64>, output: &Matrix<f64>, params: MatrixSlice<f64>) -> Matrix<f64>;
 22 |     
 23 |     /// The gradient of the output of this layer with respect to its parameters
 24 |     fn back_params(&self, out_grad: &Matrix<f64>, input: &Matrix<f64>, output: &Matrix<f64>, params: MatrixSlice<f64>) -> Matrix<f64>;
 25 | 
 26 |     /// The default value of the parameters of this layer before training
 27 |     fn default_params(&self) -> Vec<f64>;
 28 | 
 29 |     /// The shape of the parameters used by this layer
 30 |     fn param_shape(&self) -> (usize, usize);
 31 | 
 32 |     /// The number of parameters used by this layer
 33 |     fn num_params(&self) -> usize {
 34 |         let shape = self.param_shape();
 35 |         shape.0 * shape.1
 36 |     }
 37 | }
 38 | 
 39 | /// Linear network layer
 40 | ///
 41 | /// Represents a fully connected layer with optional bias term
 42 | ///
 43 | /// The parameters are a matrix of weights of size I x N
 44 | /// where N is the dimensionality of the output and I the dimensionality of the input
 45 | #[derive(Debug, Clone, Copy)]
 46 | pub struct Linear { 
 47 |     /// The number of dimensions of the input
 48 |     input_size: usize,
 49 |     /// The number of dimensions of the output
 50 |     output_size: usize,
 51 |     /// Whether or not to include a bias term
 52 |     has_bias: bool,
 53 | }
 54 | 
 55 | impl Linear {
 56 |     /// Construct a new Linear layer
 57 |     pub fn new(input_size: usize, output_size: usize) -> Linear {
 58 |         Linear {
 59 |             input_size: input_size + 1, 
 60 |             output_size: output_size,
 61 |             has_bias: true
 62 |         }
 63 |     }
 64 | 
 65 |     /// Construct a Linear layer without a bias term
 66 |     pub fn without_bias(input_size: usize, output_size: usize) -> Linear {
 67 |         Linear {
 68 |             input_size: input_size, 
 69 |             output_size: output_size,
 70 |             has_bias: false
 71 |         }
 72 |     }
 73 | }
 74 | 
 75 | fn remove_first_col(mat: Matrix<f64>) -> Matrix<f64>
 76 | {
 77 |     let rows = mat.rows();
 78 |     let cols = mat.cols();
 79 |     let mut data = mat.into_vec();
 80 | 
 81 |     let len = data.len();
 82 |     let mut del = 0;
 83 |     {
 84 |         let v = &mut *data;
 85 | 
 86 |         for i in 0..len {
 87 |             if i % cols == 0 {
 88 |                 del += 1;
 89 |             } else if del > 0 {
 90 |                 v[i - del] = v[i];
 91 |             }
 92 |         }
 93 |     }
 94 |     if del > 0 {
 95 |         data.truncate(len - del);
 96 |     }
 97 |     Matrix::new(rows, cols - 1, data)
 98 | }
 99 | 
100 | impl NetLayer for Linear {
101 |     /// Computes a matrix product
102 |     ///
103 |     /// input should have dimensions N x I
104 |     /// where N is the number of samples and I is the dimensionality of the input
105 |     fn forward(&self, input: &Matrix<f64>, params: MatrixSlice<f64>) -> LearningResult<Matrix<f64>> {
106 |         if self.has_bias {
107 |             if input.cols()+1 != params.rows() {
108 |                 Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns"))
109 |             } else {
110 |                 Ok(&Matrix::ones(input.rows(), 1).hcat(input) * &params)
111 |             }
112 |         } else {
113 |             if input.cols() != params.rows() {
114 |                 Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns"))
115 |             } else {
116 |                 Ok(input * &params)
117 |             }
118 |         }
119 |     }
120 |     
121 |     fn back_input(&self, out_grad: &Matrix<f64>, _: &Matrix<f64>, _: &Matrix<f64>, params: MatrixSlice<f64>) -> Matrix<f64> {
122 |         debug_assert_eq!(out_grad.cols(), params.cols());
123 |         let gradient = out_grad * &params.transpose();
124 |         if self.has_bias {
125 |             remove_first_col(gradient)
126 |         } else {
127 |             gradient
128 |         }
129 |     }
130 |     
131 |     fn back_params(&self, out_grad: &Matrix<f64>, input: &Matrix<f64>, _: &Matrix<f64>, _: MatrixSlice<f64>) -> Matrix<f64> {
132 |         debug_assert_eq!(input.rows(), out_grad.rows());
133 |         if self.has_bias {
134 |             &Matrix::ones(input.rows(), 1).hcat(input).transpose() * out_grad
135 |         } else {
136 |             &input.transpose() * out_grad
137 |         }
138 |     }
139 | 
140 |     /// Initializes weights using Xavier initialization
141 |     ///
142 |     /// weights drawn from gaussian distribution with 0 mean and variance 2/(input_size+output_size)
143 |     fn default_params(&self) -> Vec<f64> {
144 |         let mut distro = Normal::new(0.0, (2.0/(self.input_size+self.output_size) as f64).sqrt());
145 |         let mut rng = thread_rng();
146 | 
147 |         (0..self.input_size*self.output_size).map(|_| distro.sample(&mut rng))
148 |                                              .collect()
149 |     }
150 | 
151 |     fn param_shape(&self) -> (usize, usize) {
152 |         (self.input_size, self.output_size)
153 |     }
154 | }
155 | 
156 | impl<T: ActivationFunc> NetLayer for T {
157 |     /// Applies the activation function to each element of the input
158 |     fn forward(&self, input: &Matrix<f64>, _: MatrixSlice<f64>) -> LearningResult<Matrix<f64>> {
159 |         let mut output = Vec::with_capacity(input.rows()*input.cols());
160 |         for val in input.data() {
161 |             output.push(T::func(*val));
162 |         }
163 |         Ok(Matrix::new(input.rows(), input.cols(), output))
164 |     }
165 | 
166 |     fn back_input(&self, out_grad: &Matrix<f64>, _: &Matrix<f64>, output: &Matrix<f64>, _: MatrixSlice<f64>) -> Matrix<f64> {
167 |         let mut in_grad = Vec::with_capacity(output.rows()*output.cols());
168 |         for (y, g) in output.data().iter().zip(out_grad.data()) {
169 |             in_grad.push(T::func_grad_from_output(*y) * g);
170 |         }
171 |         Matrix::new(output.rows(), output.cols(), in_grad)
172 |     }
173 |     
174 |     fn back_params(&self, _: &Matrix<f64>, _: &Matrix<f64>, _: &Matrix<f64>, _: MatrixSlice<f64>) -> Matrix<f64> {
175 |         Matrix::new(0, 0, Vec::new())
176 |     }
177 | 
178 |     fn default_params(&self) -> Vec<f64> {
179 |         Vec::new()
180 |     }
181 | 
182 |     fn param_shape(&self) -> (usize, usize) {
183 |         (0, 0)
184 |     }
185 | }
186 | 


--------------------------------------------------------------------------------
/src/learning/optim/fmincg.rs:
--------------------------------------------------------------------------------
  1 | //! Module for the fmincg optimization algorithm.
  2 | //!
  3 | //! This algorithm was taken from Andrew Ng's coursera machine
  4 | //! learning course. The function was translated from MATLAB into rust.
  5 | //! Original source code can be found [here](http://www.mathworks.com/matlabcentral/fileexchange/42770-logistic-regression-with-regularization-used-to-classify-hand-written-digits/content/Logistic%20Regression%20with%20regularisation/fmincg.m).
  6 | //!
  7 | //! The attached license permits use and modification for research
  8 | //! and education only.
  9 | //!
 10 | //! Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13
 11 | //!
 12 | //!
 13 | //! (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen
 14 | //!
 15 | //! Permission is granted for anyone to copy, use, or modify these
 16 | //! programs and accompanying documents for purposes of research or
 17 | //! education, provided this copyright notice is retained, and note is
 18 | //! made of any changes that have been made.
 19 | //!
 20 | //! These programs and documents are distributed without any warranty,
 21 | //! express or implied.  As the programs were written for research
 22 | //! purposes only, they have not been tested to the degree that would be
 23 | //! advisable in any important application.  All use of these programs is
 24 | //! entirely at the user's own risk.
 25 | //!
 26 | //! [rusty-machine] Changes made:
 27 | //!
 28 | //! - Conversion to Rust.
 29 | //! - Length hard defaults to the max iterations.
 30 | 
 31 | use learning::optim::{Optimizable, OptimAlgorithm};
 32 | use linalg::Vector;
 33 | 
 34 | use std::cmp;
 35 | use std::f64;
 36 | 
 37 | 
 38 | /// Conjugate Gradient Descent algorithm
 39 | #[derive(Clone, Copy, Debug)]
 40 | pub struct ConjugateGD {
 41 |     /// Constant in the Wolfe-Powell conditions.
 42 |     pub rho: f64,
 43 |     /// Constant in the Wolfe-Powell conditions.
 44 |     pub sig: f64,
 45 |     /// Don't reevaluate within `int` of the limit of the current bracket.
 46 |     pub int: f64,
 47 |     /// Extrapolate max of `ext` times the current bracket.
 48 |     pub ext: f64,
 49 |     /// Max of `max` function evaluations per line search
 50 |     pub max: usize,
 51 |     /// The maximum allowed slope ratio
 52 |     pub ratio: f64,
 53 | 
 54 |     /// The default number of max iterations.
 55 |     pub iters: usize,
 56 | }
 57 | 
 58 | /// The default Conjugate GD algorithm.
 59 | ///
 60 | /// The defaults are:
 61 | ///
 62 | /// - rho = 0.01
 63 | /// - sig = 0.5
 64 | /// - int = 0.1
 65 | /// - ext = 3
 66 | /// - max = 20
 67 | /// - ration = 100
 68 | /// - iters = 100
 69 | impl Default for ConjugateGD {
 70 |     fn default() -> ConjugateGD {
 71 |         ConjugateGD {
 72 |             rho: 0.01,
 73 |             sig: 0.5,
 74 |             int: 0.1,
 75 |             ext: 3.0,
 76 |             max: 20,
 77 |             ratio: 100.0,
 78 |             iters: 100,
 79 |         }
 80 |     }
 81 | }
 82 | 
 83 | impl<M: Optimizable> OptimAlgorithm<M> for ConjugateGD {
 84 |     fn optimize(&self,
 85 |                 model: &M,
 86 |                 start: &[f64],
 87 |                 inputs: &M::Inputs,
 88 |                 targets: &M::Targets)
 89 |                 -> Vec<f64> {
 90 |         let mut i = 0usize;
 91 |         let mut ls_failed = false;
 92 | 
 93 |         let (mut f1, vec_df1) = model.compute_grad(start, inputs, targets);
 94 |         let mut df1 = Vector::new(vec_df1);
 95 | 
 96 |         // The reduction in the function. Can also be specified as part of length
 97 |         let red = 1f64;
 98 | 
 99 |         let length = self.iters as i32;
100 | 
101 |         let mut s = -df1.clone();
102 |         let mut d1 = -s.dot(&s);
103 |         let mut z1 = red / (1f64 - d1);
104 | 
105 |         let mut x = Vector::new(start.to_vec());
106 | 
107 |         let (mut f2, mut df2): (f64, Vector<f64>);
108 | 
109 |         while (i as i32) < length.abs() {
110 |             if length > 0 {
111 |                 i += 1;
112 |             }
113 | 
114 |             let (x0, f0) = (x.clone(), f1);
115 | 
116 |             x += &s * z1;
117 | 
118 |             let cost = model.compute_grad(x.data(), inputs, targets);
119 |             f2 = cost.0;
120 |             df2 = Vector::new(cost.1);
121 | 
122 |             if length < 0 {
123 |                 i += 1;
124 |             }
125 | 
126 |             let mut d2 = df2.dot(&s);
127 | 
128 |             let (mut f3, mut d3, mut z3) = (f1, d1, -z1);
129 | 
130 |             let mut m = if length > 0 {
131 |                 self.max as i32
132 |             } else {
133 |                 cmp::min(self.max as i32, -length - (i as i32))
134 |             };
135 | 
136 |             let mut success = false;
137 |             let mut limit = -1f64;
138 | 
139 |             loop {
140 |                 let mut z2: f64;
141 | 
142 |                 while ((f2 > (f1 + z1 * self.rho * d1)) || (d2 > -self.sig * d1)) && (m > 0i32) {
143 | 
144 |                     limit = z1;
145 | 
146 |                     if f2 > f1 {
147 |                         z2 = z3 - (0.5 * d3 * z3 * z3) / (d3 * z3 + f2 - f3);
148 |                     } else {
149 |                         let a = 6f64 * (f2 - f3) / z3 + 3f64 * (d2 + d3);
150 |                         let b = 3f64 * (f3 - f2) - z3 * (2f64 * d2 + d3);
151 |                         z2 = ((b * b - a * d2 * z3 * z3).sqrt() - b) / a;
152 |                     }
153 | 
154 |                     if z2.is_nan() || z2.is_infinite() {
155 |                         z2 = z3 / 2f64;
156 |                     }
157 | 
158 |                     if z2 <= self.int * z3 {
159 |                         if z2 <= (1f64 - self.int) * z3 {
160 |                             z2 = (1f64 - self.int) * z3;
161 |                         }
162 |                     } else if self.int * z3 <= (1f64 - self.int) * z3 {
163 |                         z2 = (1f64 - self.int) * z3;
164 |                     } else {
165 |                         z2 = self.int * z3;
166 |                     }
167 | 
168 |                     z1 += z2;
169 |                     x += &s * z2;
170 |                     let cost_grad = model.compute_grad(x.data(), inputs, targets);
171 |                     f2 = cost_grad.0;
172 |                     df2 = Vector::new(cost_grad.1);
173 | 
174 |                     m -= 1i32;
175 |                     if length < 0 {
176 |                         i += 1;
177 |                     }
178 | 
179 |                     d2 = df2.dot(&s);
180 |                     z3 -= z2;
181 |                 }
182 | 
183 |                 if f2 > f1 + z1 * self.rho * d1 || d2 > -self.sig * d1 {
184 |                     break;
185 |                 } else if d2 > self.sig * d1 {
186 |                     success = true;
187 |                     break;
188 |                 } else if m == 0i32 {
189 |                     break;
190 |                 }
191 | 
192 |                 let a = 6f64 * (f2 - f3) / z3 + 3f64 * (d2 + d3);
193 |                 let b = 3f64 * (f3 - f2) - z3 * (2f64 * d2 + d3);
194 |                 z2 = -d2 * z3 * z3 / (b + (b * b - a * d2 * z3 * z3).sqrt());
195 | 
196 |                 if z2.is_nan() || z2.is_infinite() || z2 < 0f64 {
197 |                     if limit < -0.5 {
198 |                         z2 = z1 * (self.ext - 1f64);
199 |                     } else {
200 |                         z2 = (limit - z1) / 2f64;
201 |                     }
202 |                 } else if (limit > -0.5) && (z2 + z1 > limit) {
203 |                     z2 = (limit - z1) / 2f64;
204 |                 } else if (limit < -0.5) && (z2 + z1 > z1 * self.ext) {
205 |                     z2 = z1 * (self.ext - 1f64);
206 |                 } else if z2 < -z3 * self.int {
207 |                     z2 = -z3 * self.int;
208 |                 } else if (limit > -0.5) && (z2 < (limit - z1) * (1f64 - self.int)) {
209 |                     z2 = (limit - z1) * (1f64 - self.int);
210 |                 }
211 | 
212 |                 f3 = f2;
213 |                 d3 = d2;
214 |                 z3 = -z2;
215 |                 z1 += z2;
216 |                 x += &s * z2;
217 | 
218 |                 let cost_grad = model.compute_grad(x.data(), inputs, targets);
219 |                 f2 = cost_grad.0;
220 |                 df2 = Vector::new(cost_grad.1);
221 | 
222 |                 m -= 1;
223 |                 if length < 0 {
224 |                     i += 1;
225 |                 }
226 | 
227 |                 d2 = df2.dot(&s);
228 |             }
229 | 
230 |             if success {
231 |                 f1 = f2;
232 |                 s = s * (&df2 - &df1).dot(&df2) / df1.dot(&df1) - &df2;
233 | 
234 |                 df1 = df2;
235 | 
236 |                 d2 = df1.dot(&s);
237 | 
238 |                 if d2 > 0f64 {
239 |                     s = -&df1;
240 |                     d2 = -s.dot(&s);
241 |                 }
242 | 
243 |                 let ratio = d1 / (d2 - f64::MIN_POSITIVE);
244 |                 if self.ratio < ratio {
245 |                     z1 *= self.ratio;
246 |                 } else {
247 |                     z1 *= ratio;
248 |                 }
249 | 
250 |                 d1 = d2;
251 |                 ls_failed = false;
252 |             } else {
253 |                 x = x0;
254 |                 f1 = f0;
255 | 
256 |                 if ls_failed || i as i32 > length.abs() {
257 |                     break;
258 |                 }
259 | 
260 |                 df1 = df2;
261 | 
262 |                 s = -&df1;
263 |                 d1 = -s.dot(&s);
264 | 
265 |                 z1 = 1f64 / (1f64 - d1);
266 |                 ls_failed = true;
267 |             }
268 | 
269 |         }
270 |         x.into_vec()
271 |     }
272 | }
273 | 


--------------------------------------------------------------------------------
/src/learning/pca.rs:
--------------------------------------------------------------------------------
  1 | //! Principal Component Analysis Module
  2 | //!
  3 | //! Contains implementation of PCA.
  4 | //!
  5 | //! # Examples
  6 | //!
  7 | //! ```
  8 | //! use rusty_machine::learning::pca::PCA;
  9 | //! use rusty_machine::learning::UnSupModel;
 10 | //!
 11 | //! use rusty_machine::linalg::Matrix;
 12 | //! let mut pca = PCA::default();
 13 | //!
 14 | //! let inputs = Matrix::new(3, 2, vec![1., 0.1,
 15 | //!                                     3., 0.2,
 16 | //!                                     4., 0.2]);
 17 | //! // Train the model
 18 | //! pca.train(&inputs).unwrap();
 19 | //!
 20 | //! // Mapping a new point to principal component space
 21 | //! let new_data = Matrix::new(1, 2, vec![2., 0.1]);
 22 | //! let output = pca.predict(&new_data).unwrap();
 23 | //!
 24 | //! assert_eq!(output, Matrix::new(1, 2, vec![-0.6686215718235227, 0.042826190364433595]));
 25 | //! ```
 26 | 
 27 | use linalg::{Matrix, BaseMatrix, Axes};
 28 | use linalg::Vector;
 29 | 
 30 | use learning::{LearningResult, UnSupModel};
 31 | use learning::error::{Error, ErrorKind};
 32 | 
 33 | /// Principal Component Analysis
 34 | ///
 35 | /// - PCA uses rulinalg SVD which is experimental (not yet work for large data)
 36 | #[derive(Debug)]
 37 | pub struct PCA {
 38 |     /// number of componentsc considered
 39 |     n: Option<usize>,
 40 |     /// Flag whether to centering inputs
 41 |     center: bool,
 42 | 
 43 |     // Number of original input
 44 |     n_features: Option<usize>,
 45 |     // Center of inputs
 46 |     centers: Option<Vector<f64>>,
 47 |     // Principal components
 48 |     components: Option<Matrix<f64>>,
 49 |     // Whether components is inversed (trained with number of rows < cols data)
 50 |     inv: bool
 51 | }
 52 | 
 53 | impl PCA {
 54 | 
 55 |     /// Constructs untrained PCA model.
 56 |     ///
 57 |     /// # Parameters
 58 |     ///
 59 |     /// - `n` : number of principal components
 60 |     /// - `center` : flag whether centering inputs to be specified.
 61 |     ///
 62 |     /// # Examples
 63 |     ///
 64 |     /// ```
 65 |     /// use rusty_machine::learning::pca::PCA;
 66 |     ///
 67 |     /// let model = PCA::new(3, true);
 68 |     /// ```
 69 |     pub fn new(n: usize, center: bool) -> PCA {
 70 | 
 71 |         PCA {
 72 |             // accept n as usize, user should know the number of columns
 73 |             n: Some(n),
 74 |             center: center,
 75 | 
 76 |             n_features: None,
 77 |             centers: None,
 78 |             components: None,
 79 |             inv: false
 80 |         }
 81 |     }
 82 | 
 83 |     /// Returns principal components (matrix which contains eigenvectors as columns)
 84 |     pub fn components(&self) -> LearningResult<&Matrix<f64>>  {
 85 |         match self.components {
 86 |             None => Err(Error::new_untrained()),
 87 |             Some(ref rot) => { Ok(rot) }
 88 |         }
 89 |     }
 90 | }
 91 | 
 92 | /// The default PCA.
 93 | ///
 94 | /// Parameters:
 95 | ///
 96 | /// - `n` = `None` (keep all components)
 97 | /// - `center` = `true`
 98 | ///
 99 | /// # Examples
100 | ///
101 | /// ```
102 | /// use rusty_machine::learning::pca::PCA;
103 | ///
104 | /// let model = PCA::default();
105 | /// ```
106 | impl Default for PCA {
107 |     fn default() -> Self {
108 |         PCA {
109 |             // because number of columns is unknown,
110 |             // return all components by default
111 |             n: None,
112 |             center: true,
113 | 
114 |             n_features: None,
115 |             centers: None,
116 |             components: None,
117 |             inv: false
118 |         }
119 |     }
120 | }
121 | 
122 | /// Train the model and predict the model output from new data.
123 | impl UnSupModel<Matrix<f64>, Matrix<f64>> for PCA {
124 | 
125 |     fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>>  {
126 | 
127 |         match self.n_features {
128 |             None => { return Err(Error::new_untrained()); },
129 |             Some(f) => {
130 |                 if f != inputs.cols() {
131 |                     return Err(Error::new(ErrorKind::InvalidData,
132 |                                "Input data must have the same number of columns as training data"));
133 |                 }
134 |             }
135 |         };
136 | 
137 |         match self.components {
138 |             // this can't happen
139 |             None => { return Err(Error::new_untrained()); },
140 |             Some(ref comp) => {
141 |                 if self.center == true {
142 |                     match self.centers {
143 |                         // this can't happen
144 |                         None => return Err(Error::new_untrained()),
145 |                         Some(ref centers) => {
146 |                             let data = unsafe { centering(inputs, &centers) };
147 |                             if self.inv == true {
148 |                                 Ok(data * comp.transpose())
149 |                             } else {
150 |                                 Ok(data * comp)
151 |                             }
152 |                         }
153 |                     }
154 |                 } else {
155 |                     if self.inv == true {
156 |                         Ok(inputs * comp.transpose())
157 |                     } else {
158 |                         Ok(inputs * comp)
159 |                     }
160 |                 }
161 |             }
162 |         }
163 |     }
164 | 
165 |     fn train(&mut self, inputs: &Matrix<f64>) -> LearningResult<()> {
166 |         match self.n {
167 |             None => {},
168 |             Some(n) => {
169 |                 if n > inputs.cols() {
170 |                     return Err(Error::new(ErrorKind::InvalidData,
171 |                                "Input data must have equal or larger number of columns than n"));
172 |                 }
173 |             }
174 |         }
175 | 
176 |         let data = if self.center == true {
177 |             let centers = inputs.mean(Axes::Row);
178 |             let m = unsafe { centering(inputs, &centers) };
179 |             self.centers = Some(centers);
180 |             m
181 |         } else {
182 |             inputs.clone()
183 |         };
184 |         let (_, _, mut v) = data.svd().unwrap();
185 |         if inputs.cols() > inputs.rows() {
186 |             v = v.transpose();
187 |             self.inv = true;
188 |         }
189 | 
190 |         self.components = match self.n {
191 |             Some(c) => {
192 |                 let slicer: Vec<usize> = (0..c).collect();
193 |                 Some(v.select_cols(&slicer))
194 |             },
195 |             None => Some(v)
196 |         };
197 |         self.n_features = Some(inputs.cols());
198 |         Ok(())
199 |     }
200 | }
201 | 
202 | /// Subtract center Vector from each rows
203 | unsafe fn centering(inputs: &Matrix<f64>, centers: &Vector<f64>) -> Matrix<f64> {
204 |     // Number of inputs columns and centers length must be the same
205 |     Matrix::from_fn(inputs.rows(), inputs.cols(),
206 |                     |c, r| inputs.get_unchecked([r, c]) - centers.data().get_unchecked(c))
207 | }
208 | 
209 | #[cfg(test)]
210 | mod tests {
211 | 
212 |     use linalg::{Matrix, Axes, Vector};
213 |     use super::centering;
214 | 
215 |     #[test]
216 |     fn test_centering() {
217 |         let m = Matrix::new(2, 3, vec![1., 2., 3.,
218 |                                        2., 4., 4.]);
219 |         let centers = m.mean(Axes::Row);
220 |         assert_vector_eq!(centers, Vector::new(vec![1.5, 3., 3.5]), comp=abs, tol=1e-8);
221 | 
222 |         let centered = unsafe { centering(&m, &centers) };
223 |         let exp = Matrix::new(2, 3, vec![-0.5, -1., -0.5,
224 |                                          0.5, 1., 0.5]);
225 |         assert_matrix_eq!(centered, exp, comp=abs, tol=1e-8);
226 |     }
227 | }


--------------------------------------------------------------------------------
/src/learning/svm.rs:
--------------------------------------------------------------------------------
  1 | //! Support Vector Machine Module
  2 | //!
  3 | //! Contains implementation of Support Vector Machine using the
  4 | //! [Pegasos training algorithm](http://ttic.uchicago.edu/~nati/Publications/PegasosMPB.pdf).
  5 | //!
  6 | //! The SVM models currently only support binary classification.
  7 | //! The model inputs should be a matrix and the training targets are
  8 | //! in the form of a vector of `-1`s and `1`s.
  9 | //!
 10 | //! # Examples
 11 | //!
 12 | //! ```
 13 | //! use rusty_machine::learning::svm::SVM;
 14 | //! use rusty_machine::learning::SupModel;
 15 | //!
 16 | //! use rusty_machine::linalg::Matrix;
 17 | //! use rusty_machine::linalg::Vector;
 18 | //!
 19 | //! let inputs = Matrix::new(4,1,vec![1.0,3.0,5.0,7.0]);
 20 | //! let targets = Vector::new(vec![-1.,-1.,1.,1.]);
 21 | //!
 22 | //! let mut svm_mod = SVM::default();
 23 | //!
 24 | //! // Train the model
 25 | //! svm_mod.train(&inputs, &targets).unwrap();
 26 | //!
 27 | //! // Now we'll predict a new point
 28 | //! let new_point = Matrix::new(1,1,vec![10.]);
 29 | //! let output = svm_mod.predict(&new_point).unwrap();
 30 | //!
 31 | //! // Hopefully we classified our new point correctly!
 32 | //! assert!(output[0] == 1f64, "Our classifier isn't very good!");
 33 | //! ```
 34 | 
 35 | 
 36 | use linalg::{Matrix, BaseMatrix};
 37 | use linalg::Vector;
 38 | 
 39 | use learning::toolkit::kernel::{Kernel, SquaredExp};
 40 | use learning::{LearningResult, SupModel};
 41 | use learning::error::{Error, ErrorKind};
 42 | 
 43 | use rand;
 44 | use rand::Rng;
 45 | 
 46 | /// Support Vector Machine
 47 | #[derive(Debug)]
 48 | pub struct SVM<K: Kernel> {
 49 |     ker: K,
 50 |     alpha: Option<Vector<f64>>,
 51 |     train_inputs: Option<Matrix<f64>>,
 52 |     train_targets: Option<Vector<f64>>,
 53 |     lambda: f64,
 54 |     /// Number of iterations for training.
 55 |     pub optim_iters: usize,
 56 | }
 57 | 
 58 | /// The default Support Vector Machine.
 59 | ///
 60 | /// The defaults are:
 61 | ///
 62 | /// - `ker` = `SquaredExp::default()`
 63 | /// - `lambda` = `0.3`
 64 | /// - `optim_iters` = `100`
 65 | impl Default for SVM<SquaredExp> {
 66 |     fn default() -> SVM<SquaredExp> {
 67 |         SVM {
 68 |             ker: SquaredExp::default(),
 69 |             alpha: None,
 70 |             train_inputs: None,
 71 |             train_targets: None,
 72 |             lambda: 0.3f64,
 73 |             optim_iters: 100,
 74 |         }
 75 |     }
 76 | }
 77 | 
 78 | impl<K: Kernel> SVM<K> {
 79 |     /// Constructs an untrained SVM with specified
 80 |     /// kernel and lambda which determins the hardness
 81 |     /// of the margin.
 82 |     ///
 83 |     /// # Examples
 84 |     ///
 85 |     /// ```
 86 |     /// use rusty_machine::learning::svm::SVM;
 87 |     /// use rusty_machine::learning::toolkit::kernel::SquaredExp;
 88 |     ///
 89 |     /// let _ = SVM::new(SquaredExp::default(), 0.3);
 90 |     /// ```
 91 |     pub fn new(ker: K, lambda: f64) -> SVM<K> {
 92 |         SVM {
 93 |             ker: ker,
 94 |             alpha: None,
 95 |             train_inputs: None,
 96 |             train_targets: None,
 97 |             lambda: lambda,
 98 |             optim_iters: 100,
 99 |         }
100 |     }
101 | }
102 | 
103 | impl<K: Kernel> SVM<K> {
104 |     /// Construct a kernel matrix
105 |     fn ker_mat(&self, m1: &Matrix<f64>, m2: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
106 |         if m1.cols() != m2.cols() {
107 |             Err(Error::new(ErrorKind::InvalidState,
108 |                            "Inputs to kernel matrices have different column counts."))
109 |         } else {
110 |             let dim1 = m1.rows();
111 |             let dim2 = m2.rows();
112 | 
113 |             let mut ker_data = Vec::with_capacity(dim1 * dim2);
114 |             ker_data.extend(m1.row_iter().flat_map(|row1| {
115 |                 m2.row_iter()
116 |                     .map(move |row2| self.ker.kernel(row1.raw_slice(), row2.raw_slice()))
117 |             }));
118 | 
119 |             Ok(Matrix::new(dim1, dim2, ker_data))
120 |         }
121 |     }
122 | }
123 | 
124 | /// Train the model using the Pegasos algorithm and
125 | /// predict the model output from new data.
126 | impl<K: Kernel> SupModel<Matrix<f64>, Vector<f64>> for SVM<K> {
127 |     fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Vector<f64>> {
128 |         let ones = Matrix::<f64>::ones(inputs.rows(), 1);
129 |         let full_inputs = ones.hcat(inputs);
130 | 
131 |         if let (&Some(ref alpha), &Some(ref train_inputs), &Some(ref train_targets)) =
132 |                (&self.alpha, &self.train_inputs, &self.train_targets) {
133 |             let ker_mat = self.ker_mat(&full_inputs, train_inputs)?;
134 |             let weight_vec = alpha.elemul(train_targets) / self.lambda;
135 | 
136 |             let plane_dist = ker_mat * weight_vec;
137 | 
138 |             Ok(plane_dist.apply(&|d| d.signum()))
139 |         } else {
140 |             Err(Error::new_untrained())
141 |         }
142 |     }
143 | 
144 |     fn train(&mut self, inputs: &Matrix<f64>, targets: &Vector<f64>) -> LearningResult<()> {
145 |         let n = inputs.rows();
146 | 
147 |         let mut rng = rand::thread_rng();
148 | 
149 |         let mut alpha = vec![0f64; n];
150 | 
151 |         let ones = Matrix::<f64>::ones(inputs.rows(), 1);
152 |         let full_inputs = ones.hcat(inputs);
153 | 
154 |         for t in 0..self.optim_iters {
155 |             let i = rng.gen_range(0, n);
156 |             let row_i = full_inputs.select_rows(&[i]);
157 |             let sum = full_inputs.row_iter()
158 |                 .fold(0f64, |sum, row| sum + self.ker.kernel(row_i.data(), row.raw_slice())) *
159 |                       targets[i] / (self.lambda * (t as f64));
160 | 
161 |             if sum < 1f64 {
162 |                 alpha[i] += 1f64;
163 |             }
164 |         }
165 | 
166 |         self.alpha = Some(Vector::new(alpha) / (self.optim_iters as f64));
167 |         self.train_inputs = Some(full_inputs);
168 |         self.train_targets = Some(targets.clone());
169 | 
170 |         Ok(())
171 |     }
172 | }
173 | 


--------------------------------------------------------------------------------
/src/learning/toolkit/activ_fn.rs:
--------------------------------------------------------------------------------
  1 | //! Activation Functions.
  2 | //!
  3 | //! This module contains a number of structs implementing the `ActivationFunc` trait.
  4 | //!
  5 | //! These structs are used within Neural Networks and
  6 | //! Generalized Linear Regression (not yet implemented).
  7 | //!
  8 | //! You can also create your own custom activation Functions for use in your models.
  9 | //! Just create a unit struct implementing the `ActivationFunc` trait.
 10 | 
 11 | use std::fmt::Debug;
 12 | 
 13 | /// Trait for activation functions in models.
 14 | pub trait ActivationFunc: Clone + Debug {
 15 |     /// The activation function.
 16 |     fn func(x: f64) -> f64;
 17 | 
 18 |     /// The gradient of the activation function.
 19 |     fn func_grad(x: f64) -> f64;
 20 | 
 21 |     /// The gradient of the activation function calculated using the output of the function.
 22 |     /// Calculates f'(x) given f(x) as an input
 23 |     fn func_grad_from_output(y: f64) -> f64;
 24 | 
 25 |     /// The inverse of the activation function.
 26 |     fn func_inv(x: f64) -> f64;
 27 | }
 28 | 
 29 | /// Sigmoid activation function.
 30 | #[derive(Clone, Copy, Debug)]
 31 | pub struct Sigmoid;
 32 | 
 33 | impl ActivationFunc for Sigmoid {
 34 |     /// Sigmoid function.
 35 |     ///
 36 |     /// Returns 1 / ( 1 + e^-t).
 37 |     fn func(x: f64) -> f64 {
 38 |         1.0 / (1.0 + (-x).exp())
 39 |     }
 40 | 
 41 |     /// Gradient of sigmoid function.
 42 |     ///
 43 |     /// Evaluates to (1 - e^-t) / (1 + e^-t)^2
 44 |     fn func_grad(x: f64) -> f64 {
 45 |         Self::func(x) * (1f64 - Self::func(x))
 46 |     }
 47 | 
 48 |     fn func_grad_from_output(y: f64) -> f64 {
 49 |         y * (1f64 - y)
 50 |     }
 51 | 
 52 |     fn func_inv(x: f64) -> f64 {
 53 |         (x / (1f64 - x)).ln()
 54 |     }
 55 | }
 56 | 
 57 | /// Linear activation function.
 58 | #[derive(Clone, Copy, Debug)]
 59 | pub struct Linear;
 60 | 
 61 | impl ActivationFunc for Linear {
 62 |     fn func(x: f64) -> f64 {
 63 |         x
 64 |     }
 65 | 
 66 |     fn func_grad(_: f64) -> f64 {
 67 |         1f64
 68 |     }
 69 | 
 70 |     fn func_grad_from_output(_: f64) -> f64 {
 71 |         1f64
 72 |     }
 73 | 
 74 |     fn func_inv(x: f64) -> f64 {
 75 |         x
 76 |     }
 77 | }
 78 | 
 79 | /// Exponential activation function.
 80 | #[derive(Clone, Copy, Debug)]
 81 | pub struct Exp;
 82 | 
 83 | impl ActivationFunc for Exp {
 84 |     fn func(x: f64) -> f64 {
 85 |         x.exp()
 86 |     }
 87 | 
 88 |     fn func_grad(x: f64) -> f64 {
 89 |         Self::func(x)
 90 |     }
 91 | 
 92 |     fn func_grad_from_output(y: f64) -> f64 {
 93 |         y
 94 |     }
 95 | 
 96 |     fn func_inv(x: f64) -> f64 {
 97 |         x.ln()
 98 |     }
 99 | }
100 | 
101 | /// Hyperbolic tangent activation function
102 | #[derive(Clone, Copy, Debug)]
103 | pub struct Tanh;
104 | 
105 | impl ActivationFunc for Tanh {
106 |     fn func(x: f64) -> f64 {
107 |         x.tanh()
108 |     }
109 | 
110 |     fn func_grad(x: f64) -> f64 {
111 |         let y = x.tanh();
112 |         1.0 - y*y
113 |     }
114 | 
115 |     fn func_grad_from_output(y: f64) -> f64 {
116 |         1.0 - y*y
117 |     }
118 | 
119 |     fn func_inv(x: f64) -> f64 {
120 |         0.5*((1.0+x)/(1.0-x)).ln()
121 |     }
122 | }


--------------------------------------------------------------------------------
/src/learning/toolkit/cost_fn.rs:
--------------------------------------------------------------------------------
  1 | //! Cost Functions.
  2 | //!
  3 | //! This module contains a number of structs implementing the `CostFunc` trait.
  4 | //!
  5 | //! These structs are used within Neural Networks and
  6 | //! Generalized Linear Regression (not yet implemented).
  7 | //!
  8 | //! You can also create your own custom cost functions for use in your models.
  9 | //! Just create a struct implementing the `CostFunc` trait.
 10 | 
 11 | use linalg::{Matrix, BaseMatrix, BaseMatrixMut};
 12 | use linalg::Vector;
 13 | 
 14 | /// Trait for cost functions in models.
 15 | pub trait CostFunc<T> {
 16 |     /// The cost function.
 17 |     fn cost(outputs: &T, targets: &T) -> f64;
 18 | 
 19 |     /// The gradient of the cost function.
 20 |     fn grad_cost(outputs: &T, targets: &T) -> T;
 21 | }
 22 | 
 23 | /// The mean squared error cost function.
 24 | #[derive(Clone, Copy, Debug)]
 25 | pub struct MeanSqError;
 26 | 
 27 | // For generics we need a trait for "Hadamard product" here
 28 | // Which is "Elementwise multiplication".
 29 | impl CostFunc<Matrix<f64>> for MeanSqError {
 30 |     fn cost(outputs: &Matrix<f64>, targets: &Matrix<f64>) -> f64 {
 31 |         let diff = outputs - targets;
 32 |         let sq_diff = &diff.elemul(&diff);
 33 | 
 34 |         let n = diff.rows();
 35 | 
 36 |         sq_diff.sum() / (2f64 * (n as f64))
 37 |     }
 38 | 
 39 |     fn grad_cost(outputs: &Matrix<f64>, targets: &Matrix<f64>) -> Matrix<f64> {
 40 |         outputs - targets
 41 |     }
 42 | }
 43 | 
 44 | impl CostFunc<Vector<f64>> for MeanSqError {
 45 |     fn cost(outputs: &Vector<f64>, targets: &Vector<f64>) -> f64 {
 46 |         let diff = outputs - targets;
 47 |         let sq_diff = &diff.elemul(&diff);
 48 | 
 49 |         let n = diff.size();
 50 | 
 51 |         sq_diff.sum() / (2f64 * (n as f64))
 52 |     }
 53 | 
 54 |     fn grad_cost(outputs: &Vector<f64>, targets: &Vector<f64>) -> Vector<f64> {
 55 |         outputs - targets
 56 |     }
 57 | }
 58 | 
 59 | /// The cross entropy error cost function.
 60 | #[derive(Clone, Copy, Debug)]
 61 | pub struct CrossEntropyError;
 62 | 
 63 | impl CostFunc<Matrix<f64>> for CrossEntropyError {
 64 |     fn cost(outputs: &Matrix<f64>, targets: &Matrix<f64>) -> f64 {
 65 |         // The cost for a single
 66 |         let log_inv_output = (-outputs + 1f64).apply(&ln);
 67 |         let log_output = outputs.clone().apply(&ln);
 68 | 
 69 |         let mat_cost = targets.elemul(&log_output) + (-targets + 1f64).elemul(&log_inv_output);
 70 | 
 71 |         let n = outputs.rows();
 72 | 
 73 |         -(mat_cost.sum()) / (n as f64)
 74 |     }
 75 | 
 76 |     fn grad_cost(outputs: &Matrix<f64>, targets: &Matrix<f64>) -> Matrix<f64> {
 77 |         (outputs - targets).elediv(&(outputs.elemul(&(-outputs + 1f64))))
 78 |     }
 79 | }
 80 | 
 81 | impl CostFunc<Vector<f64>> for CrossEntropyError {
 82 |     fn cost(outputs: &Vector<f64>, targets: &Vector<f64>) -> f64 {
 83 |         // The cost for a single
 84 |         let log_inv_output = (-outputs + 1f64).apply(&ln);
 85 |         let log_output = outputs.clone().apply(&ln);
 86 | 
 87 |         let mat_cost = targets.elemul(&log_output) + (-targets + 1f64).elemul(&log_inv_output);
 88 | 
 89 |         let n = outputs.size();
 90 | 
 91 |         -(mat_cost.sum()) / (n as f64)
 92 |     }
 93 | 
 94 |     fn grad_cost(outputs: &Vector<f64>, targets: &Vector<f64>) -> Vector<f64> {
 95 |         (outputs - targets).elediv(&(outputs.elemul(&(-outputs + 1f64))))
 96 |     }
 97 | }
 98 | 
 99 | /// Logarithm for applying within cost function.
100 | fn ln(x: f64) -> f64 {
101 |     x.ln()
102 | }
103 | 


--------------------------------------------------------------------------------
/src/learning/toolkit/rand_utils.rs:
--------------------------------------------------------------------------------
  1 | //! Utility functions for random functionality.
  2 | //!
  3 | //! This module provides sampling and shuffling which are used
  4 | //! within the learning modules.
  5 | 
  6 | use rand::{Rng, thread_rng};
  7 | 
  8 | /// ```
  9 | /// use rusty_machine::learning::toolkit::rand_utils;
 10 | ///
 11 | /// let mut pool = &mut [1,2,3,4];
 12 | /// let sample = rand_utils::reservoir_sample(pool, 3);
 13 | ///
 14 | /// println!("{:?}", sample);
 15 | /// ```
 16 | pub fn reservoir_sample<T: Copy>(pool: &[T], reservoir_size: usize) -> Vec<T> {
 17 |     assert!(pool.len() >= reservoir_size,
 18 |             "Sample size is greater than total.");
 19 | 
 20 |     let mut pool_mut = &pool[..];
 21 | 
 22 |     let mut res = pool_mut[..reservoir_size].to_vec();
 23 |     pool_mut = &pool_mut[reservoir_size..];
 24 | 
 25 |     let mut ele_seen = reservoir_size;
 26 |     let mut rng = thread_rng();
 27 | 
 28 |     while !pool_mut.is_empty() {
 29 |         ele_seen += 1;
 30 |         let r = rng.gen_range(0, ele_seen);
 31 | 
 32 |         let p_0 = pool_mut[0];
 33 |         pool_mut = &pool_mut[1..];
 34 | 
 35 |         if r < reservoir_size {
 36 |             res[r] = p_0;
 37 |         }
 38 |     }
 39 | 
 40 |     res
 41 | }
 42 | 
 43 | /// The inside out Fisher-Yates algorithm.
 44 | ///
 45 | /// # Examples
 46 | ///
 47 | /// ```
 48 | /// use rusty_machine::learning::toolkit::rand_utils;
 49 | ///
 50 | /// // Collect the numbers 0..5
 51 | /// let a = (0..5).collect::<Vec<_>>();
 52 | ///
 53 | /// // Perform a Fisher-Yates shuffle to get a random permutation
 54 | /// let permutation = rand_utils::fisher_yates(&a);
 55 | /// ```
 56 | pub fn fisher_yates<T: Copy>(arr: &[T]) -> Vec<T> {
 57 |     let n = arr.len();
 58 |     let mut rng = thread_rng();
 59 | 
 60 |     let mut shuffled_arr = Vec::with_capacity(n);
 61 | 
 62 |     unsafe {
 63 |         // We set the length here
 64 |         // We only access data which has been initialized in the algorithm
 65 |         shuffled_arr.set_len(n);
 66 |     }
 67 | 
 68 |     for i in 0..n {
 69 |         let j = rng.gen_range(0, i + 1);
 70 | 
 71 |         // If j isn't the last point in the active shuffled array
 72 |         if j != i {
 73 |             // Copy value at position j to the end of the shuffled array
 74 |             // This is safe as we only read initialized data (j < i)
 75 |             let x = shuffled_arr[j];
 76 |             shuffled_arr[i] = x;
 77 |         }
 78 | 
 79 |         // Place value at end of active array into shuffled array
 80 |         shuffled_arr[j] = arr[i];
 81 |     }
 82 | 
 83 |     shuffled_arr
 84 | }
 85 | 
 86 | /// The in place Fisher-Yates shuffle.
 87 | ///
 88 | /// # Examples
 89 | ///
 90 | /// ```
 91 | /// use rusty_machine::learning::toolkit::rand_utils;
 92 | ///
 93 | /// // Collect the numbers 0..5
 94 | /// let mut a = (0..5).collect::<Vec<_>>();
 95 | ///
 96 | /// // Permute the values in place with Fisher-Yates
 97 | /// rand_utils::in_place_fisher_yates(&mut a);
 98 | /// ```
 99 | pub fn in_place_fisher_yates<T>(arr: &mut [T]) {
100 |     let n = arr.len();
101 |     let mut rng = thread_rng();
102 | 
103 |     for i in 0..n {
104 |         // Swap i with a random point after it
105 |         let j = rng.gen_range(0, n - i);
106 |         arr.swap(i, i + j);
107 |     }
108 | }
109 | 
110 | #[cfg(test)]
111 | mod tests {
112 |     use super::*;
113 | 
114 |     #[test]
115 |     fn test_reservoir_sample() {
116 |         let a = vec![1, 2, 3, 4, 5, 6, 7];
117 | 
118 |         let b = reservoir_sample(&a, 3);
119 | 
120 |         assert_eq!(b.len(), 3);
121 |     }
122 | 
123 |     #[test]
124 |     fn test_fisher_yates() {
125 |         let a = (0..10).collect::<Vec<_>>();
126 | 
127 |         let b = fisher_yates(&a);
128 | 
129 |         for val in a.iter() {
130 |             assert!(b.contains(val));
131 |         }
132 |     }
133 | 
134 |     #[test]
135 |     fn test_in_place_fisher_yates() {
136 |         let mut a = (0..10).collect::<Vec<_>>();
137 | 
138 |         in_place_fisher_yates(&mut a);
139 | 
140 |         for val in 0..10 {
141 |             assert!(a.contains(&val));
142 |         }
143 |     }
144 | }
145 | 


--------------------------------------------------------------------------------
/src/learning/toolkit/regularization.rs:
--------------------------------------------------------------------------------
  1 | //! Regularization Module
  2 | //!
  3 | //! This module contains some base utility methods for regularization
  4 | //! within machine learning algorithms.
  5 | //!
  6 | //! The module contains a `Regularization` enum which provides access to
  7 | //! `L1`, `L2` and `ElasticNet` regularization.
  8 | //!
  9 | //! # Examples
 10 | //!
 11 | //! ```
 12 | //! use rusty_machine::learning::toolkit::regularization::Regularization;
 13 | //!
 14 | //! let reg = Regularization::L1(0.5);
 15 | //! ```
 16 | 
 17 | use linalg::norm::{Euclidean, Lp, MatrixNorm};
 18 | use linalg::{Matrix, MatrixSlice, BaseMatrix};
 19 | use libnum::{FromPrimitive, Float};
 20 | 
 21 | /// Model Regularization
 22 | #[derive(Debug, Clone, Copy)]
 23 | pub enum Regularization<T: Float> {
 24 |     /// L1 Regularization
 25 |     L1(T),
 26 |     /// L2 Regularization
 27 |     L2(T),
 28 |     /// Elastic Net Regularization (L1 and L2)
 29 |     ElasticNet(T, T),
 30 |     /// No Regularization
 31 |     None,
 32 | }
 33 | 
 34 | impl<T: Float + FromPrimitive> Regularization<T> {
 35 |     /// Compute the regularization addition to the cost.
 36 |     pub fn reg_cost(&self, mat: MatrixSlice<T>) -> T {
 37 |         match *self {
 38 |             Regularization::L1(x) => Self::l1_reg_cost(&mat, x),
 39 |             Regularization::L2(x) => Self::l2_reg_cost(&mat, x),
 40 |             Regularization::ElasticNet(x, y) => {
 41 |                 Self::l1_reg_cost(&mat, x) + Self::l2_reg_cost(&mat, y)
 42 |             }
 43 |             Regularization::None => T::zero(),
 44 |         }
 45 |     }
 46 | 
 47 |     /// Compute the regularization addition to the gradient.
 48 |     pub fn reg_grad(&self, mat: MatrixSlice<T>) -> Matrix<T> {
 49 |         match *self {
 50 |             Regularization::L1(x) => Self::l1_reg_grad(&mat, x),
 51 |             Regularization::L2(x) => Self::l2_reg_grad(&mat, x),
 52 |             Regularization::ElasticNet(x, y) => {
 53 |                 Self::l1_reg_grad(&mat, x) + Self::l2_reg_grad(&mat, y)
 54 |             }
 55 |             Regularization::None => Matrix::zeros(mat.rows(), mat.cols()),
 56 |         }
 57 |     }
 58 | 
 59 |     fn l1_reg_cost(mat: &MatrixSlice<T>, x: T) -> T {
 60 |         let l1_norm = Lp::Integer(1).norm(mat);
 61 |         l1_norm * x / ((T::one() + T::one()) * FromPrimitive::from_usize(mat.rows()).unwrap())
 62 |     }
 63 | 
 64 |     fn l1_reg_grad(mat: &MatrixSlice<T>, x: T) -> Matrix<T> {
 65 |         let m_2 = (T::one() + T::one()) * FromPrimitive::from_usize(mat.rows()).unwrap();
 66 |         let out_mat_data = mat.iter()
 67 |             .map(|y| {
 68 |                 if y.is_sign_negative() {
 69 |                     -x / m_2
 70 |                 } else {
 71 |                     x / m_2
 72 |                 }
 73 |             })
 74 |             .collect::<Vec<_>>();
 75 |         Matrix::new(mat.rows(), mat.cols(), out_mat_data)
 76 |     }
 77 | 
 78 |     fn l2_reg_cost(mat: &MatrixSlice<T>, x: T) -> T {
 79 |         Euclidean.norm(mat) * x / ((T::one() + T::one()) * FromPrimitive::from_usize(mat.rows()).unwrap())
 80 |     }
 81 | 
 82 |     fn l2_reg_grad(mat: &MatrixSlice<T>, x: T) -> Matrix<T> {
 83 |         mat * (x / FromPrimitive::from_usize(mat.rows()).unwrap())
 84 |     }
 85 | }
 86 | 
 87 | #[cfg(test)]
 88 | mod tests {
 89 |     use super::Regularization;
 90 |     use linalg::{Matrix, BaseMatrix};
 91 |     use linalg::norm::{Euclidean, MatrixNorm};
 92 | 
 93 |     #[test]
 94 |     fn test_no_reg() {
 95 |         let input_mat = Matrix::new(3, 4, (0..12).map(|x| x as f64).collect::<Vec<_>>());
 96 |         let mat_slice = input_mat.as_slice();
 97 | 
 98 |         let no_reg: Regularization<f64> = Regularization::None;
 99 | 
100 |         let a = no_reg.reg_cost(mat_slice);
101 |         let b = no_reg.reg_grad(mat_slice);
102 | 
103 |         assert_eq!(a, 0f64);
104 |         assert_eq!(b, Matrix::zeros(3, 4));
105 |     }
106 | 
107 |     #[test]
108 |     fn test_l1_reg() {
109 |         let input_mat = Matrix::new(3, 4, (0..12).map(|x| x as f64 - 3f64).collect::<Vec<_>>());
110 |         let mat_slice = input_mat.as_slice();
111 | 
112 |         let no_reg: Regularization<f64> = Regularization::L1(0.5);
113 | 
114 |         let a = no_reg.reg_cost(mat_slice);
115 |         let b = no_reg.reg_grad(mat_slice);
116 | 
117 |         assert!((a - (42f64 / 12f64)) < 1e-18);
118 | 
119 |         let true_grad = vec![-1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]
120 |             .into_iter()
121 |             .map(|x| x / 12f64)
122 |             .collect::<Vec<_>>();
123 | 
124 |         for eps in (b - Matrix::new(3, 4, true_grad)).into_vec() {
125 |             assert!(eps < 1e-18);
126 |         }
127 |     }
128 | 
129 |     #[test]
130 |     fn test_l2_reg() {
131 |         let input_mat = Matrix::new(3, 4, (0..12).map(|x| x as f64 - 3f64).collect::<Vec<_>>());
132 |         let mat_slice = input_mat.as_slice();
133 | 
134 |         let no_reg: Regularization<f64> = Regularization::L2(0.5);
135 | 
136 |         let a = no_reg.reg_cost(mat_slice);
137 |         let b = no_reg.reg_grad(mat_slice);
138 | 
139 |         assert!((a - (Euclidean.norm(&input_mat) / 12f64)) < 1e-18);
140 | 
141 |         let true_grad = &input_mat / 6f64;
142 |         for eps in (b - true_grad).into_vec() {
143 |             assert!(eps < 1e-18);
144 |         }
145 |     }
146 | 
147 |     #[test]
148 |     fn test_elastic_net_reg() {
149 |         let input_mat = Matrix::new(3, 4, (0..12).map(|x| x as f64 - 3f64).collect::<Vec<_>>());
150 |         let mat_slice = input_mat.as_slice();
151 | 
152 |         let no_reg: Regularization<f64> = Regularization::ElasticNet(0.5, 0.25);
153 | 
154 |         let a = no_reg.reg_cost(mat_slice);
155 |         let b = no_reg.reg_grad(mat_slice);
156 | 
157 |         assert!(a - ((Euclidean.norm(&input_mat) / 24f64) + (42f64 / 12f64)) < 1e-18);
158 | 
159 |         let l1_true_grad = Matrix::new(3, 4,
160 |             vec![-1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]
161 |             .into_iter()
162 |             .map(|x| x / 12f64)
163 |             .collect::<Vec<_>>());
164 |         let l2_true_grad = &input_mat / 12f64;
165 | 
166 |         for eps in (b - l1_true_grad - l2_true_grad)
167 |             .into_vec() {
168 |             // Slightly lower boundary than others - more numerical error as more ops.
169 |             assert!(eps < 1e-12);
170 |         }
171 |     }
172 | }
173 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! # The rusty-machine crate.
  2 | //!
  3 | //! A crate built for machine learning that works out-of-the-box.
  4 | //!
  5 | //! ---
  6 | //!
  7 | //! ## Structure
  8 | //!
  9 | //! The crate is made up of two primary modules: learning and linalg.
 10 | //!
 11 | //! ### learning
 12 | //!
 13 | //! The learning module contains all of the machine learning modules.
 14 | //! This means the algorithms, models and related tools.
 15 | //!
 16 | //! The currently supported techniques are:
 17 | //!
 18 | //! - Linear Regression
 19 | //! - Logistic Regression
 20 | //! - Generalized Linear Models
 21 | //! - K-Means Clustering
 22 | //! - Neural Networks
 23 | //! - Gaussian Process Regression
 24 | //! - Support Vector Machines
 25 | //! - Gaussian Mixture Models
 26 | //! - Naive Bayes Classifiers
 27 | //! - DBSCAN
 28 | //! - k-Nearest Neighbor Classifiers
 29 | //! - Principal Component Analysis
 30 | //!
 31 | //! ### linalg
 32 | //!
 33 | //! The linalg module reexports some structs and traits from the
 34 | //! [rulinalg](https://crates.io/crates/rulinalg) crate. This is to provide
 35 | //! easy access to common linear algebra tools within this library.
 36 | //!
 37 | //! ---
 38 | //!
 39 | //! ## Usage
 40 | //!
 41 | //! Specific usage of modules is described within the modules themselves. This section
 42 | //! will focus on the general workflow for this library.
 43 | //!
 44 | //! The models contained within the learning module should implement either
 45 | //! `SupModel` or `UnSupModel`. These both provide a `train` and a `predict`
 46 | //! function which provide an interface to the model.
 47 | //!
 48 | //! You should instantiate the model, with your chosen options and then train using
 49 | //! the training data. Followed by predicting with your test data. *For now*
 50 | //! cross-validation, data handling, and many other things are left explicitly
 51 | //! to the user.
 52 | //!
 53 | //! Here is an example usage for Gaussian Process Regression:
 54 | //!
 55 | //! ```
 56 | //! use rusty_machine::linalg::Matrix;
 57 | //! use rusty_machine::linalg::Vector;
 58 | //! use rusty_machine::learning::gp::GaussianProcess;
 59 | //! use rusty_machine::learning::gp::ConstMean;
 60 | //! use rusty_machine::learning::toolkit::kernel;
 61 | //! use rusty_machine::learning::SupModel;
 62 | //!
 63 | //! // First we'll get some data.
 64 | //!
 65 | //! // Some example training data.
 66 | //! let inputs = Matrix::new(3,3,vec![1.,1.,1.,2.,2.,2.,3.,3.,3.]);
 67 | //! let targets = Vector::new(vec![0.,1.,0.]);
 68 | //!
 69 | //! // Some example test data.
 70 | //! let test_inputs = Matrix::new(2,3, vec![1.5,1.5,1.5,2.5,2.5,2.5]);
 71 | //!
 72 | //! // Now we'll set up our model.
 73 | //! // This is close to the most complicated a model in rusty-machine gets!
 74 | //!
 75 | //! // A squared exponential kernel with lengthscale 2, and amplitude 1.
 76 | //! let ker = kernel::SquaredExp::new(2., 1.);
 77 | //!
 78 | //! // The zero function
 79 | //! let zero_mean = ConstMean::default();
 80 | //!
 81 | //! // Construct a GP with the specified kernel, mean, and a noise of 0.5.
 82 | //! let mut gp = GaussianProcess::new(ker, zero_mean, 0.5);
 83 | //!
 84 | //!
 85 | //! // Now we can train and predict from the model.
 86 | //!
 87 | //! // Train the model!
 88 | //! gp.train(&inputs, &targets).unwrap();
 89 | //!
 90 | //! // Predict the output from test data.
 91 | //! let outputs = gp.predict(&test_inputs).unwrap();
 92 | //! ```
 93 | //!
 94 | //! This code could have been a lot simpler if we had simply adopted
 95 | //! `let mut gp = GaussianProcess::default();`. Conversely, you could also implement
 96 | //! your own kernels and mean functions by using the appropriate traits.
 97 | //!
 98 | //! Additionally you'll notice there's quite a few `use` statements at the top of this code.
 99 | //! We can remove some of these by utilizing the `prelude`:
100 | //!
101 | //! ```
102 | //! use rusty_machine::prelude::*;
103 | //!
104 | //! let _ = Matrix::new(2,2,vec![2.0;4]);
105 | //! ```
106 | 
107 | #![deny(missing_docs)]
108 | #![warn(missing_debug_implementations)]
109 | 
110 | #[macro_use]
111 | extern crate rulinalg;
112 | extern crate num as libnum;
113 | extern crate rand;
114 | 
115 | pub mod prelude;
116 | 
117 | /// The linear algebra module
118 | ///
119 | /// This module contains reexports of common tools from the rulinalg crate.
120 | pub mod linalg {
121 |     pub use rulinalg::matrix::{Axes, Matrix, MatrixSlice, MatrixSliceMut, BaseMatrix, BaseMatrixMut};
122 |     pub use rulinalg::vector::Vector;
123 |     pub use rulinalg::norm;
124 |     pub use rulinalg::matrix::decomposition::*;
125 | }
126 | 
127 | /// Module for data handling
128 | pub mod data {
129 |     pub mod transforms;
130 | }
131 | 
132 | /// Module for machine learning.
133 | pub mod learning {
134 |     pub mod dbscan;
135 |     pub mod glm;
136 |     pub mod gmm;
137 |     pub mod lin_reg;
138 |     pub mod logistic_reg;
139 |     pub mod k_means;
140 |     pub mod nnet;
141 |     pub mod gp;
142 |     pub mod svm;
143 |     pub mod naive_bayes;
144 |     pub mod knn;
145 |     pub mod pca;
146 | 
147 |     pub mod error;
148 | 
149 |     /// A new type which provides clean access to the learning errors
150 |     pub type LearningResult<T> = Result<T, error::Error>;
151 | 
152 |     /// Trait for supervised model.
153 |     pub trait SupModel<T, U> {
154 |         /// Predict output from inputs.
155 |         fn predict(&self, inputs: &T) -> LearningResult<U>;
156 | 
157 |         /// Train the model using inputs and targets.
158 |         fn train(&mut self, inputs: &T, targets: &U) -> LearningResult<()>;
159 |     }
160 | 
161 |     /// Trait for unsupervised model.
162 |     pub trait UnSupModel<T, U> {
163 |         /// Predict output from inputs.
164 |         fn predict(&self, inputs: &T) -> LearningResult<U>;
165 | 
166 |         /// Train the model using inputs.
167 |         fn train(&mut self, inputs: &T) -> LearningResult<()>;
168 |     }
169 | 
170 |     /// Module for optimization in machine learning setting.
171 |     pub mod optim {
172 | 
173 |         /// Trait for models which can be gradient-optimized.
174 |         pub trait Optimizable {
175 |             /// The input data type to the model.
176 |             type Inputs;
177 |             /// The target data type to the model.
178 |             type Targets;
179 | 
180 |             /// Compute the gradient for the model.
181 |             fn compute_grad(&self,
182 |                             params: &[f64],
183 |                             inputs: &Self::Inputs,
184 |                             targets: &Self::Targets)
185 |                             -> (f64, Vec<f64>);
186 |         }
187 | 
188 |         /// Trait for optimization algorithms.
189 |         pub trait OptimAlgorithm<M: Optimizable> {
190 |             /// Return the optimized parameter using gradient optimization.
191 |             ///
192 |             /// Takes in a set of starting parameters and related model data.
193 |             fn optimize(&self,
194 |                         model: &M,
195 |                         start: &[f64],
196 |                         inputs: &M::Inputs,
197 |                         targets: &M::Targets)
198 |                         -> Vec<f64>;
199 |         }
200 | 
201 |         pub mod grad_desc;
202 |         pub mod fmincg;
203 |     }
204 | 
205 |     /// Module for learning tools.
206 |     pub mod toolkit {
207 |         pub mod activ_fn;
208 |         pub mod cost_fn;
209 |         pub mod kernel;
210 |         pub mod rand_utils;
211 |         pub mod regularization;
212 |     }
213 | }
214 | 
215 | #[cfg(feature = "stats")]
216 | /// Module for computational statistics
217 | pub mod stats {
218 | 
219 |     /// Module for statistical distributions.
220 |     pub mod dist;
221 | }
222 | 
223 | /// Module for evaluating models.
224 | pub mod analysis {
225 |     pub mod confusion_matrix;
226 |     pub mod cross_validation;
227 |     pub mod score;
228 | }
229 | 
230 | #[cfg(feature = "datasets")]
231 | /// Module for datasets.
232 | pub mod datasets;
233 | 


--------------------------------------------------------------------------------
/src/prelude.rs:
--------------------------------------------------------------------------------
 1 | //! The rusty-machine prelude.
 2 | //!
 3 | //! This module alleviates some common imports used within rusty-machine.
 4 | 
 5 | pub use linalg::{Matrix, MatrixSlice, MatrixSliceMut, BaseMatrix, BaseMatrixMut};
 6 | pub use linalg::Vector;
 7 | pub use linalg::Axes;
 8 | 
 9 | pub use learning::SupModel;
10 | pub use learning::UnSupModel;
11 | 
12 | #[cfg(test)]
13 | mod tests {
14 |     use super::super::prelude::*;
15 | 
16 |     #[test]
17 |     fn create_mat_from_prelude() {
18 |         let _ = Matrix::new(2, 2, vec![4.0;4]);
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/stats/dist/exponential.rs:
--------------------------------------------------------------------------------
  1 | //! Exponential distribution module.
  2 | //!
  3 | //! Contains extension methods for the Exp struct
  4 | //! found in the rand crate. This is provided through
  5 | //! traits added within the containing stats module.
  6 | 
  7 | use stats::dist::Distribution;
  8 | use rand::Rng;
  9 | use rand::distributions::{Sample, IndependentSample};
 10 | use rand::distributions::exponential::Exp1;
 11 | 
 12 | /// An Exponential random variable.
 13 | #[derive(Debug, Clone, Copy)]
 14 | pub struct Exponential {
 15 |     lambda: f64,
 16 | }
 17 | 
 18 | /// The default Exponential random variable.
 19 | ///
 20 | /// The defaults are:
 21 | ///
 22 | /// - lambda = 1
 23 | impl Default for Exponential {
 24 |     fn default() -> Exponential {
 25 |         Exponential { lambda: 1f64 }
 26 |     }
 27 | }
 28 | 
 29 | impl Exponential {
 30 |     /// Constructs a new Exponential random variable with given
 31 |     /// lambda parameter.
 32 |     pub fn new(lambda: f64) -> Exponential {
 33 |         Exponential { lambda: lambda }
 34 |     }
 35 | 
 36 |     /// Returns the lambda parameter.
 37 |     pub fn lambda(&self) -> f64 {
 38 |         self.lambda
 39 |     }
 40 | }
 41 | 
 42 | impl Distribution<f64> for Exponential {
 43 |     /// The pdf of the exponential distribution.
 44 |     ///
 45 |     /// # Examples
 46 |     ///
 47 |     /// ```
 48 |     /// use rusty_machine::stats::dist::Exponential;
 49 |     /// use rusty_machine::stats::dist::Distribution;
 50 |     ///
 51 |     /// // Construct an exponential with lambda parameter 7.0.
 52 |     /// let exp = Exponential::new(7f64);
 53 |     ///
 54 |     /// let pdf_zero = exp.pdf(0f64);
 55 |     /// assert!((pdf_zero - exp.lambda()).abs() < 1e-20);
 56 |     /// ```
 57 |     fn pdf(&self, x: f64) -> f64 {
 58 |         assert!(x >= 0., "Input to pdf must be positive for exponential.");
 59 |         (-x * self.lambda).exp() * self.lambda
 60 |     }
 61 | 
 62 |     /// The log pdf of the exponential distribution.
 63 |     ///
 64 |     /// # Examples
 65 |     ///
 66 |     /// ```
 67 |     /// // Construct an exponential with lambda parameter 5.0.
 68 |     /// use rusty_machine::stats::dist::Exponential;
 69 |     /// use rusty_machine::stats::dist::Distribution;
 70 |     ///
 71 |     /// // Construct an exponential with lambda parameter 5.0.
 72 |     /// let exp = Exponential::new(5f64);
 73 |     ///
 74 |     /// let log_pdf = exp.logpdf(3f64);
 75 |     ///
 76 |     /// assert!((log_pdf - exp.lambda().ln() + exp.lambda() * 3f64).abs() < 1e-20);
 77 |     /// ```
 78 |     fn logpdf(&self, x: f64) -> f64 {
 79 |         assert!(x >= 0.,
 80 |                 "Input to log pdf must be positive for exponential.");
 81 |         self.lambda.ln() - (x * self.lambda)
 82 |     }
 83 | 
 84 |     /// The cdf of the exponential distribution.
 85 |     ///
 86 |     /// # Examples
 87 |     ///
 88 |     /// ```
 89 |     /// use rusty_machine::stats::dist::Exponential;
 90 |     /// use rusty_machine::stats::dist::Distribution;
 91 |     ///
 92 |     /// // Construct an exponential with lambda parameter 5.0.
 93 |     /// let exp = Exponential::new(5f64);
 94 |     ///
 95 |     /// let cdf_zero = exp.cdf(0f64);
 96 |     ///
 97 |     /// assert!((cdf_zero).abs() < 1e-20);
 98 |     /// ```
 99 |     fn cdf(&self, x: f64) -> f64 {
100 |         assert!(x >= 0., "Input to cdf must be positive for exponential.");
101 |         1.0 - (-x * self.lambda).exp()
102 |     }
103 | }
104 | 
105 | impl Sample<f64> for Exponential {
106 |     fn sample<R: Rng>(&mut self, rng: &mut R) -> f64 {
107 |         self.ind_sample(rng)
108 |     }
109 | }
110 | 
111 | impl IndependentSample<f64> for Exponential {
112 |     fn ind_sample<R: Rng>(&self, rng: &mut R) -> f64 {
113 |         let Exp1(n) = rng.gen::<Exp1>();
114 |         n / self.lambda
115 |     }
116 | }
117 | 


--------------------------------------------------------------------------------
/src/stats/dist/gaussian.rs:
--------------------------------------------------------------------------------
  1 | //! Gaussian distribution module.
  2 | //!
  3 | //! Contains extension methods for the Normal struct
  4 | //! found in the rand crate. This is provided through
  5 | //! traits added within the containing stats module.
  6 | 
  7 | use stats::dist::Distribution;
  8 | use rand::Rng;
  9 | use rand::distributions::{Sample, IndependentSample};
 10 | use rand::distributions::normal::StandardNormal;
 11 | use super::consts as stat_consts;
 12 | use std::f64::consts as float_consts;
 13 | 
 14 | /// A Gaussian random variable.
 15 | ///
 16 | /// This struct stores both the variance and the standard deviation.
 17 | /// This is to minimize the computation required for computing
 18 | /// the distribution functions and sampling.
 19 | ///
 20 | /// It is most efficient to construct the struct using the `from_std_dev` constructor.
 21 | #[derive(Debug, Clone, Copy)]
 22 | pub struct Gaussian {
 23 |     mean: f64,
 24 |     variance: f64,
 25 |     _std_dev: f64,
 26 | }
 27 | 
 28 | /// The default Gaussian random variable.
 29 | /// This is the Standard Normal random variable.
 30 | ///
 31 | /// The defaults are:
 32 | ///
 33 | /// - mean = 0
 34 | /// - variance = 1
 35 | impl Default for Gaussian {
 36 |     fn default() -> Gaussian {
 37 |         Gaussian {
 38 |             mean: 0f64,
 39 |             variance: 1f64,
 40 |             _std_dev: 1f64,
 41 |         }
 42 |     }
 43 | }
 44 | 
 45 | impl Gaussian {
 46 |     /// Creates a new Gaussian random variable from
 47 |     /// a given mean and variance.
 48 |     pub fn new(mean: f64, variance: f64) -> Gaussian {
 49 |         Gaussian {
 50 |             mean: mean,
 51 |             variance: variance,
 52 |             _std_dev: variance.sqrt(),
 53 |         }
 54 |     }
 55 | 
 56 |     /// Creates a new Gaussian random variable from
 57 |     /// a given mean and standard deviation.
 58 |     pub fn from_std_dev(mean: f64, std_dev: f64) -> Gaussian {
 59 |         Gaussian {
 60 |             mean: mean,
 61 |             variance: std_dev * std_dev,
 62 |             _std_dev: std_dev,
 63 |         }
 64 |     }
 65 | }
 66 | 
 67 | /// The distribution of the gaussian random variable.
 68 | ///
 69 | /// Accurately computes the PDF and log PDF.
 70 | /// Estimates the CDF accurate only to 0.003.
 71 | impl Distribution<f64> for Gaussian {
 72 |     /// The pdf of the normal distribution
 73 |     ///
 74 |     /// # Examples
 75 |     ///
 76 |     /// ```
 77 |     /// use rusty_machine::stats::dist::Gaussian;
 78 |     /// use rusty_machine::stats::dist::Distribution;
 79 |     /// use rusty_machine::stats::dist::consts;
 80 |     ///
 81 |     /// let gauss = Gaussian::default();
 82 |     ///
 83 |     /// let lpdf_zero = gauss.pdf(0f64);
 84 |     ///
 85 |     /// // The value should be very close to 1/sqrt(2 * pi)
 86 |     /// assert!((lpdf_zero - (1f64/consts::SQRT_2_PI).abs()) < 1e-20);
 87 |     /// ```
 88 |     fn pdf(&self, x: f64) -> f64 {
 89 |         (-(x - self.mean) * (x - self.mean) / (2.0 * self.variance)).exp() /
 90 |         (stat_consts::SQRT_2_PI * self._std_dev)
 91 |     }
 92 | 
 93 |     /// The log pdf of the normal distribution.
 94 |     ///
 95 |     /// # Examples
 96 |     ///
 97 |     /// ```
 98 |     /// use rusty_machine::stats::dist::Gaussian;
 99 |     /// use rusty_machine::stats::dist::Distribution;
100 |     /// use rusty_machine::stats::dist::consts;
101 |     ///
102 |     /// let gauss = Gaussian::default();
103 |     ///
104 |     /// let lpdf_zero = gauss.logpdf(0f64);
105 |     ///
106 |     /// // The value should be very close to -0.5*Ln(2 * pi)
107 |     /// assert!((lpdf_zero + 0.5*consts::LN_2_PI).abs() < 1e-20);
108 |     /// ```
109 |     fn logpdf(&self, x: f64) -> f64 {
110 |         -self._std_dev.ln() - (stat_consts::LN_2_PI / 2.0) -
111 |         ((x - self.mean) * (x - self.mean) / (2.0 * self.variance))
112 |     }
113 | 
114 |     /// Rough estimate for the cdf of the gaussian distribution.
115 |     /// Accurate to 0.003.
116 |     ///
117 |     /// # Examples
118 |     ///
119 |     /// ```
120 |     /// use rusty_machine::stats::dist::Gaussian;
121 |     /// use rusty_machine::stats::dist::Distribution;
122 |     ///
123 |     /// let gauss = Gaussian::new(10f64, 5f64);
124 |     /// let cdf_mid = gauss.cdf(10f64);
125 |     ///
126 |     /// assert!((0.5 - cdf_mid).abs() < 0.004);
127 |     /// ```
128 |     ///
129 |     /// A slightly more involved test:
130 |     ///
131 |     /// ```
132 |     /// use rusty_machine::stats::dist::Gaussian;
133 |     /// use rusty_machine::stats::dist::Distribution;
134 |     ///
135 |     /// let gauss = Gaussian::new(10f64, 4f64);
136 |     /// let cdf = gauss.cdf(9f64);
137 |     ///
138 |     /// assert!((0.5*(1f64 - 0.382924922548) - cdf).abs() < 0.004);
139 |     /// ```
140 |     fn cdf(&self, x: f64) -> f64 {
141 |         0.5 *
142 |         (1f64 +
143 |          (x - self.mean).signum() *
144 |          (1f64 -
145 |           (-float_consts::FRAC_2_PI * (x - self.mean) * (x - self.mean) / self.variance).exp())
146 |             .sqrt())
147 |     }
148 | }
149 | 
150 | impl Sample<f64> for Gaussian {
151 |     fn sample<R: Rng>(&mut self, rng: &mut R) -> f64 {
152 |         self.ind_sample(rng)
153 |     }
154 | }
155 | 
156 | impl IndependentSample<f64> for Gaussian {
157 |     fn ind_sample<R: Rng>(&self, rng: &mut R) -> f64 {
158 |         let StandardNormal(n) = rng.gen::<StandardNormal>();
159 |         self.mean + self._std_dev * n
160 |     }
161 | }
162 | 


--------------------------------------------------------------------------------
/src/stats/dist/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod exponential;
 2 | pub mod gaussian;
 3 | 
 4 | pub use self::gaussian::Gaussian;
 5 | pub use self::exponential::Exponential;
 6 | 
 7 | /// Statistical constants
 8 | ///
 9 | /// This module may be moved to the containing stats module in future.
10 | pub mod consts {
11 |     /// Sqrt(2 * pi)
12 |     pub const SQRT_2_PI: f64 = 2.50662827463100050241576528481104525_f64;
13 |     /// Ln(2 * pi)
14 |     pub const LN_2_PI: f64 = 1.83787706640934548356065947281123527_f64;
15 | }
16 | 
17 | /// Trait for statistical distributions.
18 | pub trait Distribution<T> {
19 |     /// The pdf of the distribution.
20 |     fn pdf(&self, x: T) -> f64;
21 | 
22 |     /// The logpdf of the distribution.
23 |     ///
24 |     /// By default this takes the logarithm of the pdf.
25 |     /// More efficient functions should be implemented.
26 |     fn logpdf(&self, x: T) -> f64 {
27 |         self.pdf(x).ln()
28 |     }
29 | 
30 |     /// The cdf of the distribution.
31 |     fn cdf(&self, x: T) -> f64;
32 | }
33 | 


--------------------------------------------------------------------------------
/tests/datasets.rs:
--------------------------------------------------------------------------------
 1 | extern crate rusty_machine as rm;
 2 | 
 3 | #[cfg(feature = "datasets")]
 4 | pub mod test {
 5 | 
 6 |     use rm::datasets;
 7 |     use rm::linalg::BaseMatrix;
 8 | 
 9 |     #[test]
10 |     fn test_iris() {
11 |         let dt = datasets::iris::load();
12 |         assert_eq!(dt.data().rows(), 150);
13 |         assert_eq!(dt.data().cols(), 4);
14 | 
15 |         assert_eq!(dt.target().size(), 150);
16 |     }
17 | 
18 |     #[test]
19 |     fn test_trees() {
20 |         let dt = datasets::trees::load();
21 |         assert_eq!(dt.data().rows(), 31);
22 |         assert_eq!(dt.data().cols(), 2);
23 | 
24 |         assert_eq!(dt.target().size(), 31);
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/tests/learning/dbscan.rs:
--------------------------------------------------------------------------------
 1 | use rm::linalg::Matrix;
 2 | 
 3 | use rm::learning::dbscan::DBSCAN;
 4 | use rm::learning::UnSupModel;
 5 | 
 6 | #[test]
 7 | fn test_basic_clusters() {
 8 |     let inputs = Matrix::new(6, 2, vec![1.0, 2.0,
 9 |                                         1.1, 2.2,
10 |                                         0.9, 1.9,
11 |                                         1.0, 2.1,
12 |                                         -2.0, 3.0,
13 |                                         -2.2, 3.1]);
14 | 
15 |     let mut model = DBSCAN::new(0.5, 2);
16 |     model.train(&inputs).unwrap();
17 | 
18 |     let clustering = model.clusters().unwrap();
19 | 
20 |     assert!(clustering.data().iter().take(4).all(|x| *x == Some(0)));
21 |     assert!(clustering.data().iter().skip(4).all(|x| *x == Some(1)));
22 | }
23 | 
24 | 
25 | #[test]
26 | fn test_basic_prediction() {
27 |     let inputs = Matrix::new(6, 2, vec![1.0, 2.0,
28 |                                         1.1, 2.2,
29 |                                         0.9, 1.9,
30 |                                         1.0, 2.1,
31 |                                         -2.0, 3.0,
32 |                                         -2.2, 3.1]);
33 | 
34 |     let mut model = DBSCAN::new(0.5, 2);
35 |     model.set_predictive(true);
36 |     model.train(&inputs).unwrap();
37 | 
38 |     let new_points = Matrix::new(2,2, vec![1.0, 2.0, 4.0, 4.0]);
39 | 
40 |     let classes = model.predict(&new_points).unwrap();
41 |     assert!(classes[0] == Some(0));
42 |     assert!(classes[1] == None);
43 | }
44 | 


--------------------------------------------------------------------------------
/tests/learning/gp.rs:
--------------------------------------------------------------------------------
 1 | use rm::linalg::Matrix;
 2 | use rm::linalg::Vector;
 3 | use rm::learning::SupModel;
 4 | use rm::learning::gp::GaussianProcess;
 5 | 
 6 | #[test]
 7 | fn test_default_gp() {
 8 | 	let mut gp = GaussianProcess::default();
 9 | 	gp.noise = 10f64;
10 | 
11 | 	let inputs = Matrix::new(10,1,vec![0.,1.,2.,3.,4.,5.,6.,7.,8.,9.]);
12 | 	let targets = Vector::new(vec![0.,1.,2.,3.,4.,4.,3.,2.,1.,0.]);
13 | 
14 | 	gp.train(&inputs, &targets).unwrap();
15 | 
16 | 	let test_inputs = Matrix::new(5,1,vec![2.3,4.4,5.1,6.2,7.1]);
17 | 
18 | 	let _outputs = gp.predict(&test_inputs).unwrap();
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/learning/k_means.rs:
--------------------------------------------------------------------------------
  1 | use rm::linalg::Matrix;
  2 | use rm::learning::UnSupModel;
  3 | use rm::learning::k_means::KMeansClassifier;
  4 | use rm::learning::k_means::{Forgy, RandomPartition, KPlusPlus};
  5 | 
  6 | #[test]
  7 | fn test_model_default() {
  8 |     let mut model = KMeansClassifier::<KPlusPlus>::new(3);
  9 |     let inputs = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]);
 10 |     let targets = Matrix::new(3,2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]);
 11 | 
 12 |     model.train(&inputs).unwrap();
 13 | 
 14 |     let outputs = model.predict(&targets).unwrap();
 15 | 
 16 |     assert_eq!(outputs.size(), 3);
 17 | }
 18 | 
 19 | #[test]
 20 | fn test_model_iter() {
 21 |     let mut model = KMeansClassifier::<KPlusPlus>::new(3);
 22 |     let inputs = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]);
 23 |     let targets = Matrix::new(3,2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]);
 24 | 
 25 |     model.set_iters(1000);
 26 |     model.train(&inputs).unwrap();
 27 | 
 28 |     let outputs = model.predict(&targets).unwrap();
 29 | 
 30 |     assert_eq!(outputs.size(), 3);
 31 | }
 32 | 
 33 | #[test]
 34 | fn test_model_forgy() {
 35 |     let mut model = KMeansClassifier::new_specified(3, 100, Forgy);
 36 |     let inputs = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]);
 37 |     let targets = Matrix::new(3,2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]);
 38 | 
 39 |     model.train(&inputs).unwrap();
 40 | 
 41 |     let outputs = model.predict(&targets).unwrap();
 42 | 
 43 |     assert_eq!(outputs.size(), 3);
 44 | }
 45 | 
 46 | #[test]
 47 | fn test_model_ran_partition() {
 48 |     let mut model = KMeansClassifier::new_specified(3, 100, RandomPartition);
 49 |     let inputs = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]);
 50 |     let targets = Matrix::new(3,2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]);
 51 | 
 52 |     model.train(&inputs).unwrap();
 53 | 
 54 |     let outputs = model.predict(&targets).unwrap();
 55 | 
 56 |     assert_eq!(outputs.size(), 3);
 57 | }
 58 | 
 59 | #[test]
 60 | fn test_model_kplusplus() {
 61 |     let mut model = KMeansClassifier::new_specified(3, 100, KPlusPlus);
 62 |     let inputs = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]);
 63 |     let targets = Matrix::new(3,2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]);
 64 | 
 65 |     model.train(&inputs).unwrap();
 66 | 
 67 |     let outputs = model.predict(&targets).unwrap();
 68 | 
 69 |     assert_eq!(outputs.size(), 3);
 70 | }
 71 | 
 72 | #[test]
 73 | #[should_panic]
 74 | fn test_no_train_predict() {
 75 |     let model = KMeansClassifier::<KPlusPlus>::new(3);
 76 |     let inputs = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]);
 77 | 
 78 |     model.predict(&inputs).unwrap();
 79 | 
 80 | }
 81 | 
 82 | #[test]
 83 | fn test_two_centroids() {
 84 |     let mut model = KMeansClassifier::new(2);
 85 |     let inputs = Matrix::new(6, 2, vec![59.59375, 270.6875,
 86 |                                         51.59375, 307.6875,
 87 |                                         86.59375, 286.6875,
 88 |                                         319.59375, 145.6875,
 89 |                                         314.59375, 174.6875,
 90 |                                         350.59375, 161.6875]);
 91 | 
 92 |     model.train(&inputs).unwrap();
 93 | 
 94 |     let classes = model.predict(&inputs).unwrap();
 95 |     let class_a = classes[0];
 96 | 
 97 |     let class_b = if class_a == 0 { 1 } else { 0 };
 98 | 
 99 |     assert!(classes.data().iter().take(3).all(|x| *x == class_a));
100 |     assert!(classes.data().iter().skip(3).all(|x| *x == class_b));
101 | }
102 | 


--------------------------------------------------------------------------------
/tests/learning/knn.rs:
--------------------------------------------------------------------------------
  1 | use rm::linalg::{Matrix, Vector};
  2 | use rm::learning::SupModel;
  3 | use rm::learning::knn::KNNClassifier;
  4 | 
  5 | #[test]
  6 | fn test_knn() {
  7 |     let data = matrix![1., 1., 1.;
  8 |                        1., 2., 3.;
  9 |                        2., 3., 1.;
 10 |                        2., 2., 0.];
 11 |     let target = Vector::new(vec![0, 0, 1, 1]);
 12 | 
 13 |     let mut knn = KNNClassifier::new(2);
 14 |     let _ = knn.train(&data, &target).unwrap();
 15 | 
 16 |     let res = knn.predict(&matrix![2., 3., 0.; 1., 1., 2.]).unwrap();
 17 |     let exp = Vector::new(vec![1, 0]);
 18 |     assert_eq!(res, exp);
 19 | }
 20 | 
 21 | #[test]
 22 | fn test_knn_long() {
 23 |     let vals = (0..200000).map(|x: usize| x as f64).collect::<Vec<f64>>();
 24 |     let data = Matrix::new(100000, 2, vals);
 25 | 
 26 |     let mut tvals = vec![0; 50000];
 27 |     tvals.extend(vec![1; 50000]);
 28 |     let target = Vector::new(tvals);
 29 | 
 30 |     // check stack doesn't overflow
 31 |     let mut knn = KNNClassifier::new(10);
 32 |     let _ = knn.train(&data, &target).unwrap();
 33 | 
 34 |     let res = knn.predict(&matrix![5., 10.; 60000., 550000.]).unwrap();
 35 |     let exp = Vector::new(vec![0, 1]);
 36 |     assert_eq!(res, exp);
 37 | 
 38 |     // check stack doesn't overflow
 39 |     let mut knn = KNNClassifier::new(1000);
 40 |     let _ = knn.train(&data, &target).unwrap();
 41 |     assert_eq!(res, exp);
 42 | }
 43 | 
 44 | #[cfg(feature = "datasets")]
 45 | pub mod tests_datasets {
 46 | 
 47 |     use rm::linalg::{BaseMatrix, Vector};
 48 |     use rm::learning::SupModel;
 49 |     use rm::learning::knn::{KNNClassifier, KDTree, BallTree, BruteForce};
 50 |     use rm::datasets::iris;
 51 | 
 52 |     #[test]
 53 |     fn test_knn_iris_2cols() {
 54 |         let dataset = iris::load();
 55 |         // slice first 2 columns
 56 |         let data = dataset.data().select_cols(&[0, 1]);
 57 | 
 58 |         let mut knn = KNNClassifier::new(1);
 59 |         let _ = knn.train(&data, &dataset.target()).unwrap();
 60 |         let res = knn.predict(&matrix![5.9, 3.6]).unwrap();
 61 |         assert_eq!(res, Vector::new(vec![1]));
 62 | 
 63 |         let mut knn = KNNClassifier::new(4);
 64 |         let _ = knn.train(&data, &dataset.target()).unwrap();
 65 |         let res = knn.predict(&matrix![5.9, 3.6]).unwrap();
 66 |         assert_eq!(res, Vector::new(vec![1]));
 67 | 
 68 |         let mut knn = KNNClassifier::new(4);
 69 |         let _ = knn.train(&data, &dataset.target()).unwrap();
 70 |         let res = knn.predict(&matrix![6.0, 3.5]).unwrap();
 71 |         assert_eq!(res, Vector::new(vec![1]));
 72 | 
 73 |         let mut knn = KNNClassifier::new(5);
 74 |         let _ = knn.train(&data, &dataset.target()).unwrap();
 75 |         let res = knn.predict(&matrix![7.1, 2.8]).unwrap();
 76 |         assert_eq!(res, Vector::new(vec![2]));
 77 |     }
 78 | 
 79 |     #[test]
 80 |     fn test_knn_iris_default() {
 81 |         let dataset = iris::load();
 82 | 
 83 |         let mut knn = KNNClassifier::default();
 84 |         let _ = knn.train(&dataset.data(), &dataset.target()).unwrap();
 85 |         let res = knn.predict(&dataset.data()).unwrap();
 86 | 
 87 |         let exp = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 88 |                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 89 |                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 90 |                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1,
 91 |                        1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 92 |                        2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1,
 93 |                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 94 |                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2];
 95 |         assert_eq!(res, Vector::new(exp));
 96 |     }
 97 | 
 98 |     #[test]
 99 |     fn test_knn_iris_different_neighbors() {
100 |         let dataset = iris::load();
101 | 
102 |         let mut knn = KNNClassifier::new(3);
103 |         let _ = knn.train(&dataset.data(), &dataset.target()).unwrap();
104 |         let res = knn.predict(&dataset.data()).unwrap();
105 | 
106 |         let exp = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
107 |                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
108 |                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 |                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1,
110 |                        1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 |                        2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1,
112 |                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2,
113 |                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2];
114 |         assert_eq!(res, Vector::new(exp));
115 | 
116 |         let mut knn = KNNClassifier::new(10);
117 |         let _ = knn.train(&dataset.data(), &dataset.target()).unwrap();
118 |         let res = knn.predict(&dataset.data()).unwrap();
119 | 
120 |         let exp = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
121 |                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
122 |                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
123 |                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
124 |                        1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
125 |                        2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
126 |                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2,
127 |                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2];
128 |         assert_eq!(res, Vector::new(exp));
129 |     }
130 | 
131 |     #[test]
132 |     fn test_knn_iris_new_specified() {
133 |         let dataset = iris::load();
134 | 
135 |         let mut knn = KNNClassifier::new_specified(5, KDTree::default());
136 |         let _ = knn.train(&dataset.data(), &dataset.target()).unwrap();
137 |         let res = knn.predict(&dataset.data()).unwrap();
138 | 
139 |         let exp = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
140 |                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
141 |                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 |                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1,
143 |                        1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 |                        2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1,
145 |                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
146 |                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2];
147 |         let expv = Vector::new(exp);
148 |         assert_eq!(res, expv);
149 | 
150 |         let mut knn = KNNClassifier::new_specified(5, BallTree::default());
151 |         let _ = knn.train(&dataset.data(), &dataset.target()).unwrap();
152 |         let res = knn.predict(&dataset.data()).unwrap();
153 |         assert_eq!(res, expv);
154 | 
155 |         let mut knn = KNNClassifier::new_specified(5, BruteForce::default());
156 |         let _ = knn.train(&dataset.data(), &dataset.target()).unwrap();
157 |         let res = knn.predict(&dataset.data()).unwrap();
158 |         assert_eq!(res, expv);
159 |     }
160 | }


--------------------------------------------------------------------------------
/tests/learning/lin_reg.rs:
--------------------------------------------------------------------------------
 1 | use rm::linalg::Matrix;
 2 | use rm::linalg::Vector;
 3 | use rm::learning::SupModel;
 4 | use rm::learning::lin_reg::LinRegressor;
 5 | use libnum::abs;
 6 | 
 7 | #[test]
 8 | fn test_optimized_regression() {
 9 |     let mut lin_mod = LinRegressor::default();
10 |     let inputs = Matrix::new(3, 1, vec![2.0, 3.0, 4.0]);
11 |     let targets = Vector::new(vec![5.0, 6.0, 7.0]);
12 | 
13 |     lin_mod.train_with_optimization(&inputs, &targets);
14 | 
15 |     let _ = lin_mod.parameters().unwrap();
16 | }
17 | 
18 | #[test]
19 | fn test_regression() {
20 |     let mut lin_mod = LinRegressor::default();
21 |     let inputs = Matrix::new(3, 1, vec![2.0, 3.0, 4.0]);
22 |     let targets = Vector::new(vec![5.0, 6.0, 7.0]);
23 | 
24 |     lin_mod.train(&inputs, &targets).unwrap();
25 | 
26 |     let parameters = lin_mod.parameters().unwrap();
27 | 
28 |     let err_1 = abs(parameters[0] - 3.0);
29 |     let err_2 = abs(parameters[1] - 1.0);
30 | 
31 |     assert!(err_1 < 1e-8);
32 |     assert!(err_2 < 1e-8);
33 | }
34 | 
35 | #[test]
36 | #[should_panic]
37 | fn test_no_train_params() {
38 |     let lin_mod = LinRegressor::default();
39 | 
40 |     let _ = lin_mod.parameters().unwrap();
41 | }
42 | 
43 | #[test]
44 | #[should_panic]
45 | fn test_no_train_predict() {
46 |     let lin_mod = LinRegressor::default();
47 |     let inputs = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]);
48 | 
49 |     let _ = lin_mod.predict(&inputs).unwrap();
50 | }
51 | 
52 | #[cfg(feature = "datasets")]
53 | #[test]
54 | fn test_regression_datasets_trees() {
55 |     use rm::datasets::trees;
56 |     let trees = trees::load();
57 | 
58 |     let mut lin_mod = LinRegressor::default();
59 |     lin_mod.train(&trees.data(), &trees.target()).unwrap();
60 |     let params = lin_mod.parameters().unwrap();
61 |     assert_eq!(params, &Vector::new(vec![-57.98765891838409, 4.708160503017506, 0.3392512342447438]));
62 | 
63 |     let predicted = lin_mod.predict(&trees.data()).unwrap();
64 |     let expected = vec![4.837659653793278, 4.55385163347481, 4.816981265588826, 15.874115228921276,
65 |                         19.869008437727473, 21.018326956518717, 16.192688074961563, 19.245949183164257,
66 |                         21.413021404689726, 20.187581283767756, 22.015402271048487, 21.468464618616007,
67 |                         21.468464618616007, 20.50615412980805, 23.954109686181766, 27.852202904652785,
68 |                         31.583966481344966, 33.806481916796706, 30.60097760433255, 28.697035014921106,
69 |                         34.388184394951004, 36.008318964043994, 35.38525970948079, 41.76899799551756,
70 |                         44.87770231764652, 50.942867757643015, 52.223751092491256, 53.42851282520877,
71 |                         53.899328875510534, 53.899328875510534, 68.51530482306926];
72 |     assert_eq!(predicted, Vector::new(expected));
73 | }
74 | 
75 | #[test]
76 | #[ignore = "FIXME #183 fails nondeterministically"]
77 | fn test_train_no_data() {
78 |     let inputs = Matrix::new(0, 1, vec![]);
79 |     let targets = Vector::new(vec![]);
80 | 
81 |     let mut lin_mod = LinRegressor::default();
82 |     let res = lin_mod.train(&inputs, &targets);
83 | 
84 |     assert!(res.is_err());
85 | }
86 | 


--------------------------------------------------------------------------------
/tests/learning/optim/grad_desc.rs:
--------------------------------------------------------------------------------
  1 | use rm::learning::optim::Optimizable;
  2 | use rm::learning::optim::fmincg::ConjugateGD;
  3 | use rm::learning::optim::grad_desc::{GradientDesc, StochasticGD, AdaGrad, RMSProp};
  4 | use rm::learning::optim::OptimAlgorithm;
  5 | 
  6 | use rm::linalg::Matrix;
  7 | 
  8 | /// A model which uses the cost function
  9 | /// y = (x - c)^2
 10 | ///
 11 | /// The goal is to learn the true value c which minimizes the cost.
 12 | struct XSqModel {
 13 |     c: f64,
 14 | }
 15 | 
 16 | impl Optimizable for XSqModel {
 17 |     type Inputs = Matrix<f64>;
 18 | 	type Targets = Matrix<f64>;
 19 | 
 20 |     fn compute_grad(&self, params: &[f64], _: &Matrix<f64>, _: &Matrix<f64>) -> (f64, Vec<f64>) {
 21 | 
 22 |         ((params[0] - self.c) * (params[0] - self.c),
 23 |          vec![2f64 * (params[0] - self.c)])
 24 |     }
 25 | }
 26 | 
 27 | #[test]
 28 | fn convex_fmincg_training() {
 29 |     let x_sq = XSqModel { c: 20f64 };
 30 | 
 31 |     let cgd = ConjugateGD::default();
 32 |     let test_data = vec![500f64];
 33 |     let params = cgd.optimize(&x_sq,
 34 |                               &test_data[..],
 35 |                               &Matrix::zeros(1, 1),
 36 |                               &Matrix::zeros(1, 1));
 37 | 
 38 |     assert!(params[0] - 20f64 < 1e-10);
 39 |     assert!(x_sq.compute_grad(&params, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)).0 < 1e-10);
 40 | }
 41 | 
 42 | #[test]
 43 | fn convex_gd_training() {
 44 |     let x_sq = XSqModel { c: 20f64 };
 45 | 
 46 |     let gd = GradientDesc::default();
 47 |     let test_data = vec![500f64];
 48 |     let params = gd.optimize(&x_sq,
 49 |                               &test_data[..],
 50 |                               &Matrix::zeros(1, 1),
 51 |                               &Matrix::zeros(1, 1));
 52 | 
 53 |     assert!(params[0] - 20f64 < 1e-10);
 54 |     assert!(x_sq.compute_grad(&params, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)).0 < 1e-10);
 55 | }
 56 | 
 57 | #[test]
 58 | fn convex_stochastic_gd_training() {
 59 |     let x_sq = XSqModel { c: 20f64 };
 60 | 
 61 |     let gd = StochasticGD::new(0.9f64, 0.1f64, 100);
 62 |     let test_data = vec![100f64];
 63 |     let params = gd.optimize(&x_sq,
 64 |                               &test_data[..],
 65 |                               &Matrix::zeros(100, 1),
 66 |                               &Matrix::zeros(100, 1));
 67 | 
 68 |     assert!(params[0] - 20f64 < 1e-10);
 69 |     assert!(x_sq.compute_grad(&params, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)).0 < 1e-10);
 70 | }
 71 | 
 72 | #[test]
 73 | fn convex_adagrad_training() {
 74 |     let x_sq = XSqModel { c: 20f64 };
 75 | 
 76 |     let gd = AdaGrad::new(5f64, 1f64, 100);
 77 |     let test_data = vec![100f64];
 78 |     let params = gd.optimize(&x_sq,
 79 |                               &test_data[..],
 80 |                               &Matrix::zeros(100, 1),
 81 |                               &Matrix::zeros(100, 1));
 82 | 
 83 |     assert!(params[0] - 20f64 < 1e-10);
 84 |     assert!(x_sq.compute_grad(&params, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)).0 < 1e-10);
 85 | }
 86 | 
 87 | #[test]
 88 | fn convex_rmsprop_training() {
 89 |   let x_sq = XSqModel { c: 20f64 };
 90 | 
 91 |   let rms = RMSProp::new(0.05, 0.9, 1e-5, 50);
 92 |   let test_data = vec![100f64];
 93 |   let params = rms.optimize(&x_sq,
 94 |                               &test_data[..],
 95 |                               &Matrix::zeros(100, 1),
 96 |                               &Matrix::zeros(100, 1));
 97 | 
 98 |   assert!(params[0] - 20f64 < 1e-10);
 99 |   assert!(x_sq.compute_grad(&params, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)).0 < 1e-10);
100 | }


--------------------------------------------------------------------------------
/tests/learning/pca.rs:
--------------------------------------------------------------------------------
  1 | use rm::linalg::Matrix;
  2 | use rm::learning::UnSupModel;
  3 | use rm::learning::pca::PCA;
  4 | 
  5 | #[test]
  6 | fn test_default() {
  7 |     let mut model = PCA::default();
  8 | 
  9 |     let inputs = Matrix::new(7, 3, vec![8.3, 50., 23.,
 10 |                                         10.2, 55., 21.,
 11 |                                         11.1, 57., 22.,
 12 |                                         12.5, 60., 15.,
 13 |                                         11.3, 59., 20.,
 14 |                                         12.4, 61., 11.,
 15 |                                         11.2, 58., 23.]);
 16 |     model.train(&inputs).unwrap();
 17 | 
 18 |     let cexp = Matrix::new(3, 3, vec![0.2304196717022202, 0.2504639278931734, -0.9403055863478447,
 19 |                                       0.5897383434061588, 0.7326863014098074, 0.3396755364211204,
 20 |                                       -0.7740254913174374, 0.6328021843757651, -0.021117155112842168]);
 21 |     let cmp = model.components().unwrap();
 22 |     assert_matrix_eq!(cmp, cexp, comp=abs, tol=1e-8);
 23 | 
 24 |     let new_data = Matrix::new(1, 3, vec![9., 45., 22.]);
 25 |     let outputs = model.predict(&new_data).unwrap();
 26 | 
 27 |     let exp = Matrix::new(1, 3, vec![-9.72287413262656, -7.680227015314077, -2.301338333438487]);
 28 |     assert_matrix_eq!(outputs, exp, comp=abs, tol=1e-8);
 29 | }
 30 | 
 31 | #[test]
 32 | fn test_not_centering() {
 33 |     let mut model = PCA::new(3, false);
 34 | 
 35 |     let inputs = Matrix::new(7, 3, vec![8.3, 50., 23.,
 36 |                                         10.2, 55., 21.,
 37 |                                         11.1, 57., 22.,
 38 |                                         12.5, 60., 15.,
 39 |                                         11.3, 59., 20.,
 40 |                                         12.4, 61., 11.,
 41 |                                         11.2, 58., 23.]);
 42 |     model.train(&inputs).unwrap();
 43 | 
 44 |     let cexp = Matrix::new(3, 3, vec![0.17994480617740657, -0.16908609066166264, 0.9690354795746806,
 45 |                                       0.9326216647416523, -0.2839205184846983, -0.2227239763426676,
 46 |                                       0.3127885822473139, 0.9438215049087068, 0.10660332868901998]);
 47 |     let cmp = model.components().unwrap();
 48 |     assert_matrix_eq!(cmp, cexp, comp=abs, tol=1e-8);
 49 | 
 50 |     let new_data = Matrix::new(1, 3, vec![9., 45., 22.]);
 51 |     let outputs = model.predict(&new_data).unwrap();
 52 | 
 53 |     let exp = Matrix::new(1, 3, vec![50.468826978411926, 6.465874960225161, 1.0440136119105228]);
 54 |     assert_matrix_eq!(outputs, exp, comp=abs, tol=1e-8);
 55 | }
 56 | 
 57 | #[test]
 58 | fn test_filter_component() {
 59 |     let mut model = PCA::new(2, false);
 60 | 
 61 |     let inputs = Matrix::new(7, 3, vec![8.3, 50., 23.,
 62 |                                         10.2, 55., 21.,
 63 |                                         11.1, 57., 22.,
 64 |                                         12.5, 60., 15.,
 65 |                                         11.3, 59., 20.,
 66 |                                         12.4, 61., 11.,
 67 |                                         11.2, 58., 23.]);
 68 |     model.train(&inputs).unwrap();
 69 | 
 70 |     let cexp = Matrix::new(3, 2, vec![0.17994480617740657, -0.16908609066166264,
 71 |                                       0.9326216647416523, -0.2839205184846983,
 72 |                                       0.3127885822473139, 0.9438215049087068]);
 73 |     let cmp = model.components().unwrap();
 74 |     assert_matrix_eq!(cmp, cexp, comp=abs, tol=1e-8);
 75 | 
 76 |     let new_data = Matrix::new(1, 3, vec![9., 45., 22.]);
 77 |     let outputs = model.predict(&new_data).unwrap();
 78 | 
 79 |     let exp = Matrix::new(1, 2, vec![50.468826978411926, 6.465874960225161]);
 80 |     assert_matrix_eq!(outputs, exp, comp=abs, tol=1e-8);
 81 | }
 82 | 
 83 | #[test]
 84 | fn test_predict_different_dimension() {
 85 |     let mut model = PCA::new(2, false);
 86 | 
 87 |     let inputs = Matrix::new(7, 3, vec![8.3, 50., 23.,
 88 |                                         10.2, 55., 21.,
 89 |                                         11.1, 57., 22.,
 90 |                                         12.5, 60., 15.,
 91 |                                         11.3, 59., 20.,
 92 |                                         12.4, 61., 11.,
 93 |                                         11.2, 58., 23.]);
 94 |     model.train(&inputs).unwrap();
 95 | 
 96 |     let new_data = Matrix::new(1, 2, vec![1., 2.]);
 97 |     let err = model.predict(&new_data);
 98 |     assert!(err.is_err());
 99 | 
100 |     let new_data = Matrix::new(1, 4, vec![1., 2., 3., 4.]);
101 |     let err = model.predict(&new_data);
102 |     assert!(err.is_err());
103 | 
104 |     let mut model = PCA::new(5, false);
105 |     let err = model.train(&inputs);
106 |     assert!(err.is_err());
107 | }
108 | 
109 | #[test]
110 | fn test_wide() {
111 |     let mut model = PCA::default();
112 | 
113 |     let inputs = Matrix::new(2, 4, vec![8.3, 50., 23., 2.,
114 |                                         10.2, 55., 21., 3.]);
115 |     model.train(&inputs).unwrap();
116 | 
117 |     let cexp = Matrix::new(2, 4, vec![0.3277323746171723, 0.8624536174136117, -0.3449814469654447, 0.17249072348272235,
118 |                                       0.933710591152088, -0.23345540994181946, 0.23959824886246414, -0.1275765757549414]);
119 |     let cmp = model.components().unwrap();
120 |     assert_matrix_eq!(cmp, cexp, comp=abs, tol=1e-8);
121 | 
122 |     let new_data = Matrix::new(1, 4, vec![9., 45., 22., 2.5]);
123 |     let outputs = model.predict(&new_data).unwrap();
124 | 
125 |     let exp = Matrix::new(1, 2, vec![-6.550335224256381, 1.517487926775624]);
126 |     assert_matrix_eq!(outputs, exp, comp=abs, tol=1e-8);
127 | }


--------------------------------------------------------------------------------
/tests/lib.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | extern crate rulinalg;
 3 | extern crate rusty_machine as rm;
 4 | extern crate num as libnum;
 5 | 
 6 | pub mod learning {
 7 |     mod dbscan;
 8 |     mod lin_reg;
 9 |     mod k_means;
10 |     mod gp;
11 |     mod knn;
12 |     mod pca;
13 | 
14 |     pub mod optim {
15 |     	mod grad_desc;
16 |     }
17 | }
18 | 
19 | pub mod datasets;
20 | 


--------------------------------------------------------------------------------