├── .gitignore
├── .travis.yml
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── examples
    ├── classification_task.rs
    └── regression_task.rs
└── src
    ├── baseline
        ├── mod.rs
        ├── naive_bayes_classifier.rs
        └── naive_linear_regression.rs
    ├── dataset.rs
    ├── error.rs
    ├── lib.rs
    ├── measure_accumulator.rs
    ├── openml_api
        ├── api_types.rs
        ├── file_lock.rs
        ├── impls_from_json.rs
        ├── impls_from_openml.rs
        ├── mod.rs
        └── web_access.rs
    ├── prelude.rs
    ├── procedures
        ├── frozen_sets.rs
        └── mod.rs
    └── tasks
        ├── mod.rs
        ├── supervised_classification.rs
        └── supervised_regression.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | /target
3 | **/*.rs.bk
4 | Cargo.lock
5 | 
6 | .idea
7 | 
8 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: rust
 2 | rust:
 3 |   - stable
 4 |   - beta
 5 |   - nightly
 6 | matrix:
 7 |   allow_failures:
 8 |     - rust: nightly
 9 |   fast_finish: true
10 |   
11 | cache: cargo
12 | 
13 | # Taken from Trust
14 | before_cache:
15 |   # Travis can't cache files that are not readable by "others"
16 |   - chmod -R a+r $HOME/.cargo
17 | 
18 | # Only check that the project builds. Do not run tests to avoid
19 | # straining the openml server
20 | script:
21 |   - cargo build --verbose --all
22 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "openml"
 3 | version = "0.1.2"
 4 | authors = ["Martin Billinger <flkazemakase@gmail.com>"]
 5 | 
 6 | description = "A rust interface to [OpenML](http://openml.org/)."
 7 | keywords = ["machine-learning", "openml", "data", "dataset"]
 8 | categories = ["science"]
 9 | 
10 | repository = "https://github.com/mbillingr/openml-rust"
11 | readme = "README.md"
12 | 
13 | license = "MIT/Apache-2.0"
14 | 
15 | [badges]
16 | travis-ci = { repository = "mbillingr/openml-rust" }
17 | 
18 | [dev-dependencies]
19 | simple_logger = "0.5"
20 | time = "0.1"
21 | 
22 | [dependencies]
23 | app_dirs = "1.2.1"
24 | arff = "0.3"
25 | fs2 = "0.4.3"
26 | futures = "0.1"
27 | hyper = "0.11"
28 | hyper-tls = "0.1"
29 | log = "0.4"
30 | num-traits = "0.2"
31 | serde = "1.0"
32 | serde_derive = "1.0"
33 | serde_json = "1.0"
34 | tokio-core = "0.1"
35 | 
36 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                               Apache License
  2 |                         Version 2.0, January 2004
  3 |                      http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 |    "License" shall mean the terms and conditions for use, reproduction,
 10 |    and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |    "Licensor" shall mean the copyright owner or entity authorized by
 13 |    the copyright owner that is granting the License.
 14 | 
 15 |    "Legal Entity" shall mean the union of the acting entity and all
 16 |    other entities that control, are controlled by, or are under common
 17 |    control with that entity. For the purposes of this definition,
 18 |    "control" means (i) the power, direct or indirect, to cause the
 19 |    direction or management of such entity, whether by contract or
 20 |    otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |    outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |    "You" (or "Your") shall mean an individual or Legal Entity
 24 |    exercising permissions granted by this License.
 25 | 
 26 |    "Source" form shall mean the preferred form for making modifications,
 27 |    including but not limited to software source code, documentation
 28 |    source, and configuration files.
 29 | 
 30 |    "Object" form shall mean any form resulting from mechanical
 31 |    transformation or translation of a Source form, including but
 32 |    not limited to compiled object code, generated documentation,
 33 |    and conversions to other media types.
 34 | 
 35 |    "Work" shall mean the work of authorship, whether in Source or
 36 |    Object form, made available under the License, as indicated by a
 37 |    copyright notice that is included in or attached to the work
 38 |    (an example is provided in the Appendix below).
 39 | 
 40 |    "Derivative Works" shall mean any work, whether in Source or Object
 41 |    form, that is based on (or derived from) the Work and for which the
 42 |    editorial revisions, annotations, elaborations, or other modifications
 43 |    represent, as a whole, an original work of authorship. For the purposes
 44 |    of this License, Derivative Works shall not include works that remain
 45 |    separable from, or merely link (or bind by name) to the interfaces of,
 46 |    the Work and Derivative Works thereof.
 47 | 
 48 |    "Contribution" shall mean any work of authorship, including
 49 |    the original version of the Work and any modifications or additions
 50 |    to that Work or Derivative Works thereof, that is intentionally
 51 |    submitted to Licensor for inclusion in the Work by the copyright owner
 52 |    or by an individual or Legal Entity authorized to submit on behalf of
 53 |    the copyright owner. For the purposes of this definition, "submitted"
 54 |    means any form of electronic, verbal, or written communication sent
 55 |    to the Licensor or its representatives, including but not limited to
 56 |    communication on electronic mailing lists, source code control systems,
 57 |    and issue tracking systems that are managed by, or on behalf of, the
 58 |    Licensor for the purpose of discussing and improving the Work, but
 59 |    excluding communication that is conspicuously marked or otherwise
 60 |    designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |    "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |    on behalf of whom a Contribution has been received by Licensor and
 64 |    subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 |    this License, each Contributor hereby grants to You a perpetual,
 68 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |    copyright license to reproduce, prepare Derivative Works of,
 70 |    publicly display, publicly perform, sublicense, and distribute the
 71 |    Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 |    this License, each Contributor hereby grants to You a perpetual,
 75 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |    (except as stated in this section) patent license to make, have made,
 77 |    use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |    where such license applies only to those patent claims licensable
 79 |    by such Contributor that are necessarily infringed by their
 80 |    Contribution(s) alone or by combination of their Contribution(s)
 81 |    with the Work to which such Contribution(s) was submitted. If You
 82 |    institute patent litigation against any entity (including a
 83 |    cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |    or a Contribution incorporated within the Work constitutes direct
 85 |    or contributory patent infringement, then any patent licenses
 86 |    granted to You under this License for that Work shall terminate
 87 |    as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 |    Work or Derivative Works thereof in any medium, with or without
 91 |    modifications, and in Source or Object form, provided that You
 92 |    meet the following conditions:
 93 | 
 94 |    (a) You must give any other recipients of the Work or
 95 |        Derivative Works a copy of this License; and
 96 | 
 97 |    (b) You must cause any modified files to carry prominent notices
 98 |        stating that You changed the files; and
 99 | 
100 |    (c) You must retain, in the Source form of any Derivative Works
101 |        that You distribute, all copyright, patent, trademark, and
102 |        attribution notices from the Source form of the Work,
103 |        excluding those notices that do not pertain to any part of
104 |        the Derivative Works; and
105 | 
106 |    (d) If the Work includes a "NOTICE" text file as part of its
107 |        distribution, then any Derivative Works that You distribute must
108 |        include a readable copy of the attribution notices contained
109 |        within such NOTICE file, excluding those notices that do not
110 |        pertain to any part of the Derivative Works, in at least one
111 |        of the following places: within a NOTICE text file distributed
112 |        as part of the Derivative Works; within the Source form or
113 |        documentation, if provided along with the Derivative Works; or,
114 |        within a display generated by the Derivative Works, if and
115 |        wherever such third-party notices normally appear. The contents
116 |        of the NOTICE file are for informational purposes only and
117 |        do not modify the License. You may add Your own attribution
118 |        notices within Derivative Works that You distribute, alongside
119 |        or as an addendum to the NOTICE text from the Work, provided
120 |        that such additional attribution notices cannot be construed
121 |        as modifying the License.
122 | 
123 |    You may add Your own copyright statement to Your modifications and
124 |    may provide additional or different license terms and conditions
125 |    for use, reproduction, or distribution of Your modifications, or
126 |    for any such Derivative Works as a whole, provided Your use,
127 |    reproduction, and distribution of the Work otherwise complies with
128 |    the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 |    any Contribution intentionally submitted for inclusion in the Work
132 |    by You to the Licensor shall be under the terms and conditions of
133 |    this License, without any additional terms or conditions.
134 |    Notwithstanding the above, nothing herein shall supersede or modify
135 |    the terms of any separate license agreement you may have executed
136 |    with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 |    names, trademarks, service marks, or product names of the Licensor,
140 |    except as required for reasonable and customary use in describing the
141 |    origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 |    agreed to in writing, Licensor provides the Work (and each
145 |    Contributor provides its Contributions) on an "AS IS" BASIS,
146 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |    implied, including, without limitation, any warranties or conditions
148 |    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |    PARTICULAR PURPOSE. You are solely responsible for determining the
150 |    appropriateness of using or redistributing the Work and assume any
151 |    risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 |    whether in tort (including negligence), contract, or otherwise,
155 |    unless required by applicable law (such as deliberate and grossly
156 |    negligent acts) or agreed to in writing, shall any Contributor be
157 |    liable to You for damages, including any direct, indirect, special,
158 |    incidental, or consequential damages of any character arising as a
159 |    result of this License or out of the use or inability to use the
160 |    Work (including but not limited to damages for loss of goodwill,
161 |    work stoppage, computer failure or malfunction, or any and all
162 |    other commercial damages or losses), even if such Contributor
163 |    has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 |    the Work or Derivative Works thereof, You may choose to offer,
167 |    and charge a fee for, acceptance of support, warranty, indemnity,
168 |    or other liability obligations and/or rights consistent with this
169 |    License. However, in accepting such obligations, You may act only
170 |    on Your own behalf and on Your sole responsibility, not on behalf
171 |    of any other Contributor, and only if You agree to indemnify,
172 |    defend, and hold each Contributor harmless for any liability
173 |    incurred by, or claims asserted against, such Contributor by reason
174 |    of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 
178 | APPENDIX: How to apply the Apache License to your work.
179 | 
180 |    To apply the Apache License to your work, attach the following
181 |    boilerplate notice, with the fields enclosed by brackets "[]"
182 |    replaced with your own identifying information. (Don't include
183 |    the brackets!)  The text should be enclosed in the appropriate
184 |    comment syntax for the file format. We also recommend that a
185 |    file or class name and description of purpose be included on the
186 |    same "printed page" as the copyright notice for easier
187 |    identification within third-party archives.
188 | 
189 | Copyright [yyyy] [name of copyright owner]
190 | 
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 | 
195 | 	http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Martin Billinger
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # openml-rust
 2 | A rust interface to [OpenML](http://openml.org/).
 3 | 
 4 | The aim of this crate is to give rust code access to Machine Learning data hosted by OpenML.
 5 | Thus, Machine Learning algorithms developed in Rust can be easily applied to state-of-the-art
 6 | data sets and their performance compared to existing implementations in a reproducable way.
 7 | 
 8 | ## Example
 9 | 
10 | ```rust
11 | extern crate openml;
12 | 
13 | use openml::prelude::*;
14 | use openml::{PredictiveAccuracy, SupervisedClassification};
15 | use openml::baseline::NaiveBayesClassifier;
16 | 
17 | fn main() {
18 |     // Load "Supervised Classification on iris" task (https://www.openml.org/t/59)
19 |     let task = SupervisedClassification::from_openml(59).unwrap();
20 | 
21 |     println!("Task: {}", task.name());
22 | 
23 |     // run the task
24 |     let result: PredictiveAccuracy<_> = task.run(|train, test| {
25 |         // train classifier
26 |         let nbc: NaiveBayesClassifier<u8> = train
27 |             .map(|(x, y)| (x, y))
28 |             .collect();
29 | 
30 |         // test classifier
31 |         let y_out: Vec<_> = test
32 |             .map(|x| nbc.predict(x))
33 |             .collect();
34 | 
35 |         Box::new(y_out.into_iter())
36 |     });
37 | 
38 |     println!("Classification Accuracy: {}", result.result());
39 | }
40 | ```
41 | 
42 | ## Goals
43 | - [x] get data sets
44 | - [x] get tasks
45 |   - Runtime check panics if the wrong task type is loaded (`SupervisedRegression` attempts to load a Clustering Task)
46 | - [x] get split sets
47 | - [ ] task types
48 |   - [x] Supervised Classification
49 |   - [x] Supervised Regression
50 |   - [ ] Learning Curve
51 |   - [ ] Clustering
52 | - [x] run tasks 
53 |   - runner takes a closure where the user defines learning and prediction
54 | - [ ] make openml.org optional (manual construction of tasks)
55 |     
56 |   
57 | ## Future Maybe-Goals
58 | - flow support
59 | - run support
60 | - full OpenML API support
61 | - authentication
62 | - more tasks
63 |   - Supervised Datastream Classification
64 |   - Machine Learning Challenge
65 |   - Survival Analysis
66 |   - Subgroup Discovery
67 | 
68 | ## Non-Goals
69 | - implementations of machine learning algorithms
70 | 


--------------------------------------------------------------------------------
/examples/classification_task.rs:
--------------------------------------------------------------------------------
 1 | extern crate openml;
 2 | 
 3 | use openml::prelude::*;
 4 | use openml::{PredictiveAccuracy, SupervisedClassification};
 5 | use openml::baseline::NaiveBayesClassifier;
 6 | 
 7 | fn main() {
 8 |     // Load "Supervised Classification on iris" task (https://www.openml.org/t/59)
 9 |     let task = SupervisedClassification::from_openml(59).unwrap();
10 | 
11 |     println!("Task: {}", task.name());
12 | 
13 |     // run the task
14 |     let result: PredictiveAccuracy<_> = task.run(|train, test| {
15 |         // train classifier
16 |         let nbc: NaiveBayesClassifier<u8> = train
17 |             .map(|(x, y)| (x, y))
18 |             .collect();
19 | 
20 |         // test classifier
21 |         let y_out: Vec<_> = test
22 |             .map(|x| nbc.predict(x))
23 |             .collect();
24 | 
25 |         Box::new(y_out.into_iter())
26 |     });
27 | 
28 |     println!("Classification Accuracy: {}", result.result());
29 | }
30 | 


--------------------------------------------------------------------------------
/examples/regression_task.rs:
--------------------------------------------------------------------------------
 1 | extern crate openml;
 2 | 
 3 | use openml::prelude::*;
 4 | use openml::{RootMeanSquaredError, SupervisedRegression};
 5 | use openml::baseline::NaiveLinearRegression;
 6 | 
 7 | fn main() {
 8 |     // Load "Supervised Regression on liver-disorders" task (https://www.openml.org/t/52948)
 9 |     let task = SupervisedRegression::from_openml(52948).unwrap();
10 | 
11 |     println!("Task: {}", task.name());
12 | 
13 |     // run the task
14 |     let result: RootMeanSquaredError<_> = task.run(|train, test| {
15 |         // train model
16 |         let model: NaiveLinearRegression = train
17 |             .map(|(x, y)| (x, y))
18 |             .collect();
19 | 
20 |         // test model
21 |         let y_out: Vec<_> = test
22 |             .map(|x| model.predict(x))
23 |             .collect();
24 | 
25 |         Box::new(y_out.into_iter())
26 |     });
27 | 
28 |     println!("Root Mean Squared Error: {}", result.result());
29 | }
30 | 


--------------------------------------------------------------------------------
/src/baseline/mod.rs:
--------------------------------------------------------------------------------
1 | //! Implementation of simple baseline models, used for testing and demonstration.
2 | 
3 | mod naive_bayes_classifier;
4 | mod naive_linear_regression;
5 | 
6 | pub use self::naive_bayes_classifier::NaiveBayesClassifier;
7 | pub use self::naive_linear_regression::NaiveLinearRegression;


--------------------------------------------------------------------------------
/src/baseline/naive_bayes_classifier.rs:
--------------------------------------------------------------------------------
  1 | //! Implementation of a Gaussian Naive Bayes Classifier
  2 | 
  3 | use std::cmp::Ordering;
  4 | use std::collections::HashMap;
  5 | use std::f64;
  6 | use std::fmt;
  7 | use std::hash::Hash;
  8 | use std::iter::FromIterator;
  9 | 
 10 | /// A Gaussian Naive Bayes Classifier
 11 | ///
 12 | /// The classifier is trained by consuming an iterator over the training data:
 13 | /// ```
 14 | /// # use openml::baseline::NaiveBayesClassifier;
 15 | /// # let data: Vec<(&[f64], &u8)> = vec![];
 16 | /// let nbc: NaiveBayesClassifier<_> = data
 17 | ///     .into_iter()
 18 | ///     .collect();
 19 | /// ```
 20 | #[derive(Debug)]
 21 | pub struct NaiveBayesClassifier<C>
 22 | where C: Eq + Hash
 23 | {
 24 |     class_distributions: HashMap<C, FeatureDistribution>,
 25 | }
 26 | 
 27 | /// Distribution of each feature column
 28 | #[derive(Debug, Clone)]
 29 | struct FeatureDistribution {
 30 |     distributions: Vec<NormalDistribution>
 31 | }
 32 | 
 33 | /// Univariate Normal Distribution
 34 | #[derive(Copy, Clone)]
 35 | struct NormalDistribution {
 36 |     sum: f64,
 37 |     sqsum: f64,
 38 |     n: usize
 39 | }
 40 | 
 41 | impl<'a, C: 'a, J> FromIterator<(J, &'a C)> for NaiveBayesClassifier<C>
 42 | where
 43 |     J: IntoIterator<Item=&'a f64>,
 44 |     C: Eq + Hash + Copy,
 45 | {
 46 |     fn from_iter<I: IntoIterator<Item=(J, &'a C)>>(iter: I) -> Self {
 47 |         let mut class_distributions = HashMap::new();
 48 | 
 49 |         for (x, &y) in iter {
 50 |             let distributions = &mut class_distributions
 51 |                 .entry(y)
 52 |                 .or_insert(FeatureDistribution::new())
 53 |                 .distributions;
 54 | 
 55 |             for (i, &xi) in x.into_iter().enumerate() {
 56 |                 if i >= distributions.len() {
 57 |                     distributions.resize(1 + i, NormalDistribution::new());
 58 |                 }
 59 | 
 60 |                 distributions[i].update(xi);
 61 |             }
 62 |         }
 63 | 
 64 |         NaiveBayesClassifier {
 65 |             class_distributions
 66 |         }
 67 |     }
 68 | }
 69 | 
 70 | impl<C> NaiveBayesClassifier<C>
 71 | where  C: Eq + Hash + Copy,
 72 | {
 73 |     /// predict target class for a single feature vector
 74 |     pub fn predict(&self, x: &[f64]) -> C {
 75 |         self.class_distributions
 76 |             .iter()
 77 |             .map(|(c, dists)| {
 78 |                 let mut lnprob = 0.0;
 79 |                 for (&xi, dist) in x.iter().zip(dists.distributions.iter()) {
 80 |                     lnprob += dist.lnprob(xi);
 81 |                 }
 82 |                 (c, lnprob)
 83 |             })
 84 |             .max_by(|(_, lnp1), (_, lnp2)| {
 85 |                 if lnp1 > lnp2 {
 86 |                     Ordering::Greater
 87 |                 } else if lnp1 == lnp2 {
 88 |                     Ordering::Equal
 89 |                 } else {
 90 |                     Ordering::Less
 91 |                 }
 92 |             })
 93 |             .map(|(&c, _)| c)
 94 |             .unwrap()
 95 |     }
 96 | }
 97 | 
 98 | impl FeatureDistribution {
 99 |     fn new() -> Self {
100 |         FeatureDistribution {
101 |             distributions: Vec::new()
102 |         }
103 |     }
104 | }
105 | 
106 | impl NormalDistribution {
107 |     fn new() -> Self {
108 |         NormalDistribution {
109 |             sum: 0.0,
110 |             sqsum: 0.0,
111 |             n: 0
112 |         }
113 |     }
114 | 
115 |     fn update(&mut self, x: f64) {
116 |         self.sum += x;
117 |         self.sqsum += x * x;
118 |         self.n += 1;
119 |     }
120 | 
121 |     fn mean(&self) -> f64 {
122 |         self.sum / self.n as f64
123 |     }
124 | 
125 |     fn variance(&self) -> f64 {
126 |         (self.sqsum - (self.sum * self.sum) / self.n as f64) / (self.n as f64 - 1.0)
127 |     }
128 | 
129 |     fn lnprob(&self, x: f64) -> f64 {
130 |         let v = self.variance();
131 |         let xm = x - self.mean();
132 | 
133 |         0.5 * ((1.0 / (2.0 * f64::consts::PI * v)).ln() - (xm * xm) / v)
134 | 
135 |     }
136 | }
137 | 
138 | impl fmt::Debug for NormalDistribution {
139 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
140 |         write!(f, "N{{{}; {}}}", self.mean(), self.variance())
141 |     }
142 | }
143 | 
144 | #[test]
145 | fn nbc() {
146 |     let data = vec![(vec![1.0, 2.0], 'A'),
147 |                     (vec![2.0, 1.0], 'A'),
148 |                     (vec![1.0, 5.0], 'B'),
149 |                     (vec![2.0, 6.0], 'B')];
150 | 
151 |     let nbc: NaiveBayesClassifier<_> = data
152 |         .iter()
153 |         .map(|(x, y)| (x, y))
154 |         .collect();
155 | 
156 |     assert_eq!(nbc.predict(&[1.5, 1.5]), 'A');
157 |     assert_eq!(nbc.predict(&[5.5, 1.5]), 'A');
158 |     assert_eq!(nbc.predict(&[1.5, 5.5]), 'B');
159 |     assert_eq!(nbc.predict(&[5.5, 5.5]), 'B');
160 | }
161 | 


--------------------------------------------------------------------------------
/src/baseline/naive_linear_regression.rs:
--------------------------------------------------------------------------------
  1 | //! Implementation of a Naive Linear Regression model
  2 | 
  3 | use std::f64;
  4 | use std::iter::FromIterator;
  5 | 
  6 | /// A Naive Linear Regression model
  7 | ///
  8 | /// This is univariate regression on a single feature. During training the best feature is selected.
  9 | /// The model is trained by consuming an iterator over the training data:
 10 | /// ```
 11 | /// # use openml::baseline::NaiveLinearRegression;
 12 | /// # let data: Vec<(&[f64], &f64)> = vec![];
 13 | /// let model: NaiveLinearRegression = data
 14 | ///     .into_iter()
 15 | ///     .collect();
 16 | /// ```
 17 | #[derive(Debug)]
 18 | pub struct NaiveLinearRegression
 19 | {
 20 |     slope: f64,
 21 |     intercept: f64,
 22 |     feature: usize,
 23 | }
 24 | 
 25 | impl<'a, J> FromIterator<(J, &'a f64)> for NaiveLinearRegression
 26 |     where
 27 |         J: IntoIterator<Item=&'a f64>,
 28 | {
 29 |     fn from_iter<I: IntoIterator<Item=(J, &'a f64)>>(iter: I) -> Self {
 30 |         let mut feature_columns = Vec::new();
 31 |         let mut target_column = Vec::new();
 32 | 
 33 |         for (x, &y) in iter {
 34 |             target_column.push(y);
 35 |             for (i, &xi) in x.into_iter().enumerate() {
 36 |                 if i >= feature_columns.len() {
 37 |                     feature_columns.push(Vec::new());
 38 |                 }
 39 | 
 40 |                 feature_columns[i].push(xi);
 41 |             }
 42 |         }
 43 | 
 44 |         let mut y_mean = 0.0;
 45 |         for y in &target_column {
 46 |             y_mean += *y;
 47 |         }
 48 |         y_mean /= target_column.len() as f64;
 49 | 
 50 |         let mut best_err = f64::INFINITY;
 51 |         let mut best_slope = f64::NAN;
 52 |         let mut best_intercept = f64::NAN;
 53 |         let mut best_feature = 0;
 54 | 
 55 |         for (i, feature) in feature_columns.iter().enumerate() {
 56 |             let mut x_mean = 0.0;
 57 |             for x in feature {
 58 |                 x_mean += *x;
 59 |             }
 60 |             x_mean /= feature.len() as f64;
 61 | 
 62 |             let mut x_var = 0.0;
 63 |             let mut covar = 0.0;
 64 |             for (x, y) in feature.iter().zip(target_column.iter()) {
 65 |                 let x = *x - x_mean;
 66 |                 let y = *y - y_mean;
 67 | 
 68 |                 x_var += x * x;
 69 |                 covar += x * y;
 70 |             }
 71 | 
 72 |             let slope = covar / x_var;
 73 |             let intercept = y_mean - slope * x_mean;
 74 | 
 75 |             let err: f64 = feature.iter()
 76 |                 .zip(target_column.iter())
 77 |                 .map(|(&x, &y)| intercept + slope * x - y)
 78 |                 .map(|r| r * r)
 79 |                 .sum();
 80 | 
 81 |             if err < best_err {
 82 |                 best_err = err;
 83 |                 best_slope = slope;
 84 |                 best_intercept = intercept;
 85 |                 best_feature = i;
 86 |             }
 87 |         }
 88 | 
 89 |         NaiveLinearRegression {
 90 |             slope: best_slope,
 91 |             intercept: best_intercept,
 92 |             feature: best_feature,
 93 |         }
 94 |     }
 95 | }
 96 | 
 97 | impl NaiveLinearRegression
 98 | {
 99 |     /// predict target value for a single feature vector
100 |     pub fn predict(&self, x: &[f64]) -> f64 {
101 |         self.intercept + x[self.feature] * self.slope
102 |     }
103 | }
104 | 
105 | #[test]
106 | fn nbc_flat() {
107 |     let data = vec![(vec![1.0, 2.0], 3.0),
108 |                     (vec![2.0, 1.0], 3.0),
109 |                     (vec![1.0, 5.0], 3.0),
110 |                     (vec![2.0, 6.0], 3.0)];
111 | 
112 |     let nlr: NaiveLinearRegression = data
113 |         .iter()
114 |         .map(|(x, y)| (x, y))
115 |         .collect();
116 | 
117 |     assert_eq!(nlr.predict(&[1.5, 1.5]), 3.0);
118 |     assert_eq!(nlr.predict(&[5.5, 1.5]), 3.0);
119 |     assert_eq!(nlr.predict(&[1.5, 5.5]), 3.0);
120 |     assert_eq!(nlr.predict(&[5.5, 5.5]), 3.0);
121 | }
122 | 
123 | #[test]
124 | fn nbc_slope() {
125 |     let data = vec![(vec![1.0, 2.0], 8.0),
126 |                     (vec![2.0, 1.0], 9.0),
127 |                     (vec![1.0, 5.0], 5.0),
128 |                     (vec![2.0, 6.0], 4.0)];
129 | 
130 |     let nlr: NaiveLinearRegression = data
131 |         .iter()
132 |         .map(|(x, y)| (x, y))
133 |         .collect();
134 | 
135 |     assert_eq!(nlr.predict(&[1.5, 1.5]), 8.5);
136 |     assert_eq!(nlr.predict(&[5.5, 1.5]), 8.5);
137 |     assert_eq!(nlr.predict(&[1.5, 5.5]), 4.5);
138 |     assert_eq!(nlr.predict(&[5.5, 5.5]), 4.5);
139 | }
140 | 


--------------------------------------------------------------------------------
/src/dataset.rs:
--------------------------------------------------------------------------------
 1 | use arff::dynamic::DataSet as ArffDataSet;
 2 | 
 3 | /// An arbitrary data set
 4 | #[derive(Debug)]
 5 | pub(crate) struct DataSet {
 6 |     pub(crate) arff: ArffDataSet,
 7 |     pub(crate) target: Option<String>,
 8 | }
 9 | 
10 | impl DataSet {
11 |     /// return two `ArffDataSet`s; one containing the features and the other containing the target
12 |     /// variable.
13 |     pub(crate) fn clone_split(&self) -> Option<(ArffDataSet, ArffDataSet)> {
14 |         match self.target {
15 |             None => None,
16 |             Some(ref col) => {
17 |                 let data = self.arff.clone();
18 |                 Some(data.split_one(col))
19 |             }
20 |         }
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/error.rs:
--------------------------------------------------------------------------------
 1 | use std::io::Error as IoError;
 2 | use std::result::Result as StdResult;
 3 | use std::string::FromUtf8Error;
 4 | 
 5 | use app_dirs::AppDirsError;
 6 | use arff::Error as ArffError;
 7 | use hyper::Error as HyperError;
 8 | use hyper::error::UriError;
 9 | use hyper_tls::Error as TlsError;
10 | use serde_json::Error as JsonError;
11 | 
12 | pub type Result<T> = StdResult<T, Error>;
13 | 
14 | #[derive(Debug)]
15 | pub enum Error {
16 |     IoError(IoError),
17 |     Utf8Error(FromUtf8Error),
18 |     HyperError(HyperError),
19 |     HyperUriError(UriError),
20 |     HyperTlsError(TlsError),
21 |     JsonError(JsonError),
22 |     ArffError(ArffError),
23 |     AppDirsError(AppDirsError),
24 | }
25 | 
26 | impl From<IoError> for Error {
27 |     fn from(e: IoError) -> Self {
28 |         Error::IoError(e)
29 |     }
30 | }
31 | 
32 | impl From<FromUtf8Error> for Error {
33 |     fn from(e: FromUtf8Error) -> Self {
34 |         Error::Utf8Error(e)
35 |     }
36 | }
37 | 
38 | impl From<HyperError> for Error {
39 |     fn from(e: HyperError) -> Self {
40 |         Error::HyperError(e)
41 |     }
42 | }
43 | 
44 | impl From<UriError> for Error {
45 |     fn from(e: UriError) -> Self {
46 |         Error::HyperUriError(e)
47 |     }
48 | }
49 | 
50 | impl From<TlsError> for Error {
51 |     fn from(e: TlsError) -> Self {
52 |         Error::HyperTlsError(e)
53 |     }
54 | }
55 | 
56 | impl From<JsonError> for Error {
57 |     fn from(e: JsonError) -> Self {
58 |         Error::JsonError(e)
59 |     }
60 | }
61 | 
62 | impl From<ArffError> for Error {
63 |     fn from(e: ArffError) -> Self {
64 |         Error::ArffError(e)
65 |     }
66 | }
67 | 
68 | impl From<AppDirsError> for Error {
69 |     fn from(e: AppDirsError) -> Self {
70 |         match e {
71 |             AppDirsError::Io(e) => Error::IoError(e),
72 |             _ => Error::AppDirsError(e)
73 |         }
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! # openml-rust
  2 | //!
  3 | //! The openml crate provides functions to fetch tasks and data sets from https://openml.org, and
  4 | //! run them with machine learning models.
  5 | //!
  6 | //! ## Example
  7 | //!
  8 | //! ```rust
  9 | //!extern crate openml;
 10 | //!
 11 | //!use openml::prelude::*;
 12 | //!use openml::{PredictiveAccuracy, SupervisedClassification};
 13 | //!use openml::baseline::NaiveBayesClassifier;
 14 | //!
 15 | //!fn main() {
 16 | //!    // Load "Supervised Classification on iris" task (https://www.openml.org/t/59)
 17 | //!    let task = SupervisedClassification::from_openml(59).unwrap();
 18 | //!
 19 | //!    println!("Task: {}", task.name());
 20 | //!
 21 | //!    // run the task
 22 | //!    let result: PredictiveAccuracy<_> = task.run(|train, test| {
 23 | //!        // train classifier
 24 | //!        let nbc: NaiveBayesClassifier<u8> = train
 25 | //!            .map(|(x, y)| (x, y))
 26 | //!            .collect();
 27 | //!
 28 | //!        // test classifier
 29 | //!        let y_out: Vec<_> = test
 30 | //!            .map(|x| nbc.predict(x))
 31 | //!            .collect();
 32 | //!
 33 | //!        Box::new(y_out.into_iter())
 34 | //!    });
 35 | //!
 36 | //!    println!("Classification Accuracy: {}", result.result());
 37 | //!}
 38 | //! ```
 39 | 
 40 | extern crate app_dirs;
 41 | extern crate arff;
 42 | extern crate fs2;
 43 | extern crate futures;
 44 | extern crate hyper;
 45 | extern crate hyper_tls;
 46 | #[macro_use]
 47 | extern crate log;
 48 | extern crate num_traits;
 49 | extern crate serde;
 50 | #[macro_use]
 51 | extern crate serde_derive;
 52 | extern crate serde_json;
 53 | #[cfg(test)]
 54 | extern crate simple_logger;
 55 | #[cfg(test)]
 56 | extern crate time;
 57 | extern crate tokio_core;
 58 | 
 59 | pub mod baseline;
 60 | mod dataset;
 61 | mod error;
 62 | mod measure_accumulator;
 63 | mod openml_api;
 64 | pub mod prelude;
 65 | mod procedures;
 66 | mod tasks;
 67 | 
 68 | pub use measure_accumulator::{
 69 |     MeasureAccumulator,
 70 |     PredictiveAccuracy,
 71 |     RootMeanSquaredError
 72 | };
 73 | 
 74 | pub use tasks::{
 75 |     SupervisedClassification,
 76 |     SupervisedRegression,
 77 |     Task
 78 | };
 79 | 
 80 | #[cfg(test)]
 81 | mod tests {
 82 |     use log::Level;
 83 |     use time::PreciseTime;
 84 | 
 85 |     use baseline::NaiveBayesClassifier;
 86 |     use measure_accumulator::PredictiveAccuracy;
 87 | 
 88 |     use super::*;
 89 | 
 90 |     #[test]
 91 |     fn apidev() {
 92 |         let task = SupervisedClassification::from_openml(59).unwrap();
 93 | 
 94 |         println!("{}", task.name());
 95 | 
 96 |         let result: PredictiveAccuracy<_> = task.run_static(|_train, test| {
 97 |             let y_out: Vec<_> = test.map(|_row: &[f64; 4]| 0).collect();
 98 |             Box::new(y_out.into_iter())
 99 |         });
100 | 
101 |         println!("{:#?}", result);
102 | 
103 |         #[allow(dead_code)]
104 |         #[derive(Deserialize)]
105 |         struct Row {
106 |             sepallength: f32,
107 |             sepalwidth: f32,
108 |             petallength: f32,
109 |             petalwidth: f32,
110 |         }
111 | 
112 |         let result: PredictiveAccuracy<_> = task.run_static(|train, test| {
113 |             let (_x_train, _y_train): (Vec<&Row>, Vec<i32>) = train.unzip();
114 |             let y_out: Vec<_> = test.map(|_row: &Row| 0).collect();
115 |             Box::new(y_out.into_iter())
116 |         });
117 | 
118 |         println!("{:#?}", result);
119 | 
120 |         let result: PredictiveAccuracy<_> = task.run(|train, test| {
121 |             // train classifier
122 |             let nbc: NaiveBayesClassifier<u8> = train
123 |                 .map(|(x, y)| (x, y))
124 |                 .collect();
125 | 
126 |             // test classifier
127 |             let y_out: Vec<_> = test
128 |                 .map(|x| nbc.predict(x))
129 |                 .collect();
130 | 
131 |             Box::new(y_out.into_iter())
132 |         });
133 | 
134 |         println!("{:#?}", result);
135 |     }
136 | 
137 |     #[test]
138 |     fn apidev2() {
139 |         use simple_logger;
140 |         simple_logger::init_with_level(Level::Info).unwrap();
141 | 
142 |         let start = PreciseTime::now();
143 | 
144 |         let task = SupervisedClassification::from_openml(146825).unwrap();
145 |         //let task = SupervisedClassification::from_openml(167147).unwrap();
146 | 
147 |         let end = PreciseTime::now();
148 | 
149 |         let result: PredictiveAccuracy<_> = task.run(|_train, test| {
150 |             let y_out: Vec<_> = test.map(|_row: &[u8]| 0).collect();
151 |             Box::new(y_out.into_iter())
152 |         });
153 | 
154 |         println!("{:#?}", result);
155 | 
156 |         println!("loading took {} seconds.", start.to(end));
157 |     }
158 | }
159 | 


--------------------------------------------------------------------------------
/src/measure_accumulator.rs:
--------------------------------------------------------------------------------
  1 | //! Measure accumulators are summaries of model performance, such as classification accuracy or
  2 | //! regression error.
  3 | 
  4 | use std::cmp::Eq;
  5 | use std::collections::HashMap;
  6 | use std::hash::Hash;
  7 | use std::marker::PhantomData;
  8 | 
  9 | use num_traits::AsPrimitive;
 10 | 
 11 | /// Trait implemented by performance measures
 12 | pub trait MeasureAccumulator<T> {
 13 |     /// initialize new measure
 14 |     fn new() -> Self;
 15 | 
 16 |     /// update with one prediction
 17 |     fn update_one(&mut self, known: &T, pred: &T);
 18 | 
 19 |     /// get resulting performance
 20 |     fn result(&self) -> f64;
 21 | 
 22 |     /// update with multiple predictions
 23 |     fn update<I: Iterator<Item = T>>(&mut self, known: I, predicted: I) {
 24 |         for (k, p) in known.zip(predicted) {
 25 |             self.update_one(&k, &p)
 26 |         }
 27 |     }
 28 | }
 29 | 
 30 | /// Classification Accuracy: relative amount of correctly classified labels
 31 | #[derive(Debug)]
 32 | pub struct PredictiveAccuracy<T> {
 33 |     n_correct: usize,
 34 |     n_wrong: usize,
 35 |     _t: PhantomData<T>,
 36 | }
 37 | 
 38 | impl<T> MeasureAccumulator<T> for PredictiveAccuracy<T>
 39 | where
 40 |     T: PartialEq,
 41 | {
 42 |     fn new() -> Self {
 43 |         PredictiveAccuracy {
 44 |             n_correct: 0,
 45 |             n_wrong: 0,
 46 |             _t: PhantomData,
 47 |         }
 48 |     }
 49 | 
 50 |     fn update_one(&mut self, known: &T, pred: &T) {
 51 |         if known == pred {
 52 |             self.n_correct += 1;
 53 |         } else {
 54 |             self.n_wrong += 1;
 55 |         }
 56 |     }
 57 | 
 58 |     fn result(&self) -> f64 {
 59 |         self.n_correct as f64 / (self.n_correct + self.n_wrong) as f64
 60 |     }
 61 | }
 62 | 
 63 | /// Root Mean Squared Error
 64 | #[derive(Debug)]
 65 | pub struct RootMeanSquaredError<T> {
 66 |     sum_of_squares: f64,
 67 |     n: usize,
 68 |     _t: PhantomData<T>,
 69 | }
 70 | 
 71 | impl<T> MeasureAccumulator<T> for RootMeanSquaredError<T>
 72 | where
 73 |     T: AsPrimitive<f64>,
 74 | {
 75 |     fn new() -> Self {
 76 |         RootMeanSquaredError {
 77 |             sum_of_squares: 0.0,
 78 |             n: 0,
 79 |             _t: PhantomData,
 80 |         }
 81 |     }
 82 | 
 83 |     fn update_one(&mut self, known: &T, pred: &T) {
 84 |         let diff = known.as_() - pred.as_();
 85 |         self.sum_of_squares += diff * diff;
 86 |         self.n += 1;
 87 |     }
 88 | 
 89 |     fn result(&self) -> f64 {
 90 |         (self.sum_of_squares / self.n as f64).sqrt()
 91 |     }
 92 | }
 93 | 
 94 | 
 95 | /// Adjusted Rand Index
 96 | #[derive(Debug)]
 97 | pub struct AdjustedRandIndex<T>
 98 | where T: Eq + Hash,
 99 | {
100 |     contingency_table: HashMap<(T, T), usize>
101 | }
102 | 
103 | 
104 | 
105 | impl<T> MeasureAccumulator<T> for AdjustedRandIndex<T>
106 |     where T: Eq + Hash + Clone,
107 | {
108 |     fn new() -> Self {
109 |         AdjustedRandIndex {
110 |             contingency_table: HashMap::new()
111 |         }
112 |     }
113 | 
114 |     fn update_one(&mut self, known: &T, pred: &T) {
115 |         let n = self.contingency_table
116 |             .entry((known.clone(), pred.clone()))
117 |             .or_insert(0);
118 |         *n += 1;
119 |     }
120 | 
121 |     fn result(&self) -> f64 {
122 |         let mut a = HashMap::new();
123 |         let mut b = HashMap::new();
124 | 
125 |         let mut ri = 0usize;
126 |         let mut n_tot = 0usize;
127 | 
128 |         for ((ak, bk), &n) in self.contingency_table.iter() {
129 |             n_tot += n;
130 |             ri += combinations(n);
131 | 
132 |             *a.entry(ak).or_insert(0usize) += n;
133 |             *b.entry(bk).or_insert(0usize) += n;
134 |         }
135 | 
136 |         let a_sum: usize = a.iter().map(|(_, &n)| combinations(n)).sum();
137 |         let b_sum: usize = b.iter().map(|(_, &n)| combinations(n)).sum();
138 | 
139 |         let expected_ri = (a_sum as f64) * (b_sum as f64) / combinations(n_tot) as f64;
140 |         let max_ri = (a_sum + b_sum) as f64 / 2.0;
141 | 
142 |         (ri as f64 - expected_ri) / (max_ri - expected_ri)
143 |     }
144 | }
145 | 
146 | fn combinations(n: usize) -> usize {
147 |     if n % 2 == 0 {
148 |         (n - 1) * (n / 2)
149 |     } else {
150 |         n * ((n - 1) / 2)
151 |     }
152 | }
153 | 
154 | #[test]
155 | fn ari() {
156 |     let labels_true = [0, 0, 0, 1, 1, 1];
157 |     let labels_pred = [0, 0, 1, 1, 2, 2];
158 | 
159 |     let mut ari = AdjustedRandIndex::new();
160 |     ari.update(labels_true.iter(), labels_pred.iter());
161 | 
162 |     assert_eq!(ari.result(), 0.24242424242424246);
163 | }
164 | 


--------------------------------------------------------------------------------
/src/openml_api/api_types.rs:
--------------------------------------------------------------------------------
 1 | use serde_json;
 2 | 
 3 | /// Generic JSON response as returned by the OpenML API
 4 | #[derive(Debug, Serialize, Deserialize)]
 5 | pub struct GenericResponse(serde_json::Value);
 6 | 
 7 | impl GenericResponse {
 8 |     #[inline(always)]
 9 |     pub fn look_up<'a>(&'a self, p: &str) -> Option<&'a serde_json::Value> {
10 |         self.0.pointer(p)
11 |     }
12 | }
13 | 
14 | /// A row in a split file
15 | #[derive(Debug, Deserialize)]
16 | pub(crate) struct CrossValItem {
17 |     #[serde(rename = "type")]
18 |     pub purpose: TrainTest,
19 | 
20 |     pub rowid: usize,
21 | 
22 |     pub repeat: usize,
23 | 
24 |     pub fold: usize,
25 | }
26 | 
27 | #[derive(Debug, Deserialize)]
28 | pub(crate) enum TrainTest {
29 |     #[serde(rename = "TRAIN")]
30 |     Train,
31 | 
32 |     #[serde(rename = "TEST")]
33 |     Test,
34 | }
35 | 
36 | /// Cost matrix, used by some classification tasks - currently UNIMPLEMENTED
37 | #[derive(Debug)]
38 | pub(crate) enum CostMatrix {
39 |     None,
40 | }
41 | 
42 | impl<'a> From<&'a serde_json::Value> for CostMatrix {
43 |     fn from(item: &serde_json::Value) -> Self {
44 |         let v = &item["cost_matrix"];
45 |         match v.as_array() {
46 |             None => panic!("invalid cots matrix"),
47 |             Some(c) if c.is_empty() => CostMatrix::None,
48 |             Some(_) => unimplemented!("cost matrix"),
49 |         }
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/src/openml_api/file_lock.rs:
--------------------------------------------------------------------------------
 1 | //! file locking mechanisms
 2 | 
 3 | use std::fs::File;
 4 | use std::io::{self, Read, Write};
 5 | 
 6 | use fs2::FileExt;
 7 | 
 8 | /// A scoped exclusive lock for use by file writers
 9 | pub struct ExclusiveLock {
10 |     file: File,
11 | }
12 | 
13 | impl ExclusiveLock {
14 |     /// acquire locked file
15 |     pub fn new(file: File) -> io::Result<Self> {
16 |         file.lock_exclusive()?;
17 |         Ok(ExclusiveLock { file })
18 |     }
19 | }
20 | 
21 | impl Drop for ExclusiveLock {
22 |     /// release locked file
23 |     fn drop(&mut self) {
24 |         self.file.unlock().unwrap();
25 |     }
26 | }
27 | 
28 | impl Read for ExclusiveLock {
29 |     /// read from locked file
30 |     #[inline(always)]
31 |     fn read(&mut self, data: &mut [u8]) -> io::Result<usize> {
32 |         self.file.read(data)
33 |     }
34 | }
35 | 
36 | impl Write for ExclusiveLock {
37 |     /// write to locked file
38 |     #[inline(always)]
39 |     fn write(&mut self, data: &[u8]) -> io::Result<usize> {
40 |         self.file.write(data)
41 |     }
42 | 
43 |     /// flush buffer of locked file
44 |     #[inline(always)]
45 |     fn flush(&mut self) -> io::Result<()> {
46 |         self.file.flush()
47 |     }
48 | }
49 | 
50 | pub struct SharedLock {
51 |     file: File,
52 | }
53 | 
54 | /// A scoped shared lock for use by file readers
55 | impl SharedLock {
56 |     /// acquire locked file
57 |     pub fn new(file: File) -> io::Result<Self> {
58 |         file.lock_shared()?;
59 |         Ok(SharedLock { file })
60 |     }
61 | }
62 | 
63 | impl Drop for SharedLock {
64 |     /// release locked file
65 |     fn drop(&mut self) {
66 |         self.file.unlock().unwrap();
67 |     }
68 | }
69 | 
70 | impl Read for SharedLock {
71 |     /// read from locked file
72 |     #[inline(always)]
73 |     fn read(&mut self, data: &mut [u8]) -> io::Result<usize> {
74 |         self.file.read(data)
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/openml_api/impls_from_json.rs:
--------------------------------------------------------------------------------
  1 | //! implementations to convert the API's JSON responses into corresponding Rust structures
  2 | use arff;
  3 | use arff::dynamic::DataSet as ArffDataSet;
  4 | use serde_json;
  5 | 
  6 | use dataset::DataSet;
  7 | use error::Result;
  8 | use procedures::{Fold, FrozenSets};
  9 | use tasks::{SupervisedClassification, SupervisedRegression};
 10 | 
 11 | use super::api_types::{CrossValItem, GenericResponse, TrainTest};
 12 | use super::web_access::get_cached;
 13 | 
 14 | impl DataSet {
 15 |     fn from_json(item: &serde_json::Value) -> Self {
 16 |         let v = &item["data_set"];
 17 |         let id = v["data_set_id"].as_str().unwrap();
 18 |         let target = v["target_feature"].as_str();
 19 | 
 20 |         let info_url = format!("https://www.openml.org/api/v1/json/data/{}", id);
 21 |         let info: GenericResponse = serde_json::from_str(&get_cached(&info_url).unwrap()).unwrap();
 22 | 
 23 |         let default_target = info.look_up("/data_set_description/default_target_attribute")
 24 |             .and_then(|v| v.as_str());
 25 | 
 26 |         let target = match (default_target, target) {
 27 |             (Some(s), None) | (_, Some(s)) => Some(s.to_owned()),
 28 |             (None, None) => None,
 29 |         };
 30 | 
 31 |         let dset_url = info.look_up("/data_set_description/url")
 32 |             .unwrap()
 33 |             .as_str()
 34 |             .unwrap();
 35 |         let dset_str = get_cached(&dset_url).unwrap();
 36 |         let dset = ArffDataSet::from_str(&dset_str).unwrap();
 37 | 
 38 |         DataSet { arff: dset, target }
 39 |     }
 40 | }
 41 | 
 42 | impl SupervisedClassification {
 43 |     pub fn from_json(task_json: &serde_json::Value) -> Self {
 44 |         let mut source_data = None;
 45 |         let mut estimation_procedure = None;
 46 |         //let mut cost_matrix = None;
 47 | 
 48 |         for input_item in task_json["input"].as_array().unwrap() {
 49 |             match input_item["name"].as_str() {
 50 |                 Some("source_data") => source_data = Some(DataSet::from_json(input_item)),
 51 |                 Some("estimation_procedure") => {
 52 |                     estimation_procedure = Some(Box::new(FrozenSets::from_json(input_item)))
 53 |                 }
 54 |                 //Some("cost_matrix") => cost_matrix = Some(input_item.into()),
 55 |                 Some(_) => {}
 56 |                 None => panic!("/task/input/name is not a string"),
 57 |             }
 58 |         }
 59 | 
 60 |         SupervisedClassification {
 61 |             id: task_json["task_id"].as_str().unwrap().to_owned(),
 62 |             name: task_json["task_name"].as_str().unwrap().to_owned(),
 63 |             source_data: source_data.unwrap(),
 64 |             estimation_procedure: estimation_procedure.unwrap(),
 65 |             //cost_matrix: cost_matrix.unwrap(),
 66 |         }
 67 |     }
 68 | }
 69 | 
 70 | impl SupervisedRegression {
 71 |     pub fn from_json(task_json: &serde_json::Value) -> Self {
 72 |         let mut source_data = None;
 73 |         let mut estimation_procedure = None;
 74 | 
 75 |         for input_item in task_json["input"].as_array().unwrap() {
 76 |             match input_item["name"].as_str() {
 77 |                 Some("source_data") => source_data = Some(DataSet::from_json(input_item)),
 78 |                 Some("estimation_procedure") => {
 79 |                     estimation_procedure = Some(Box::new(FrozenSets::from_json(input_item)))
 80 |                 }
 81 |                 Some(_) => {}
 82 |                 None => panic!("/task/input/name is not a string"),
 83 |             }
 84 |         }
 85 | 
 86 |         SupervisedRegression {
 87 |             id: task_json["task_id"].as_str().unwrap().to_owned(),
 88 |             name: task_json["task_name"].as_str().unwrap().to_owned(),
 89 |             source_data: source_data.unwrap(),
 90 |             estimation_procedure: estimation_procedure.unwrap(),
 91 |         }
 92 |     }
 93 | }
 94 | 
 95 | impl FrozenSets {
 96 |     fn from_json(item: &serde_json::Value) -> Self {
 97 |         let v = &item["estimation_procedure"];
 98 |         let typ = v["type"].as_str();
 99 |         let splits = v["data_splits_url"].as_str();
100 | 
101 |         match (typ, splits) {
102 |             (_, Some(url)) => FrozenSets::from_url(url).unwrap(),
103 |             _ => unimplemented!(),
104 |         }
105 |     }
106 | 
107 |     fn from_url(url: &str) -> Result<Self> {
108 |         let raw = get_cached(url)?;
109 |         let data: Vec<CrossValItem> = arff::from_str(&raw)?;
110 | 
111 |         let mut folds = vec![];
112 |         for item in data {
113 |             if item.repeat >= folds.len() {
114 |                 folds.resize(item.repeat + 1, vec![]);
115 |             }
116 |             let mut rep = &mut folds[item.repeat];
117 | 
118 |             if item.fold >= rep.len() {
119 |                 rep.resize(item.fold + 1, Fold::new());
120 |             }
121 |             let mut fold = &mut rep[item.fold];
122 | 
123 |             match item.purpose {
124 |                 TrainTest::Train => fold.trainset.push(item.rowid),
125 |                 TrainTest::Test => fold.testset.push(item.rowid),
126 |             }
127 |         }
128 | 
129 |         Ok(FrozenSets { folds })
130 |     }
131 | }
132 | 


--------------------------------------------------------------------------------
/src/openml_api/impls_from_openml.rs:
--------------------------------------------------------------------------------
 1 | //! implementations to load tasks from the OpenML API.
 2 | use serde_json;
 3 | 
 4 | use error::Result;
 5 | use tasks::{SupervisedClassification, SupervisedRegression};
 6 | 
 7 | use super::Id;
 8 | use super::api_types::GenericResponse;
 9 | use super::web_access::get_cached;
10 | 
11 | impl SupervisedClassification {
12 |     pub fn from_openml<'a, T: Id>(id: T) -> Result<Self> {
13 |         let url = format!("https://www.openml.org/api/v1/json/task/{}", id.as_string());
14 |         let raw_task = get_cached(&url)?;
15 |         let response: GenericResponse = serde_json::from_str(&raw_task)?;
16 | 
17 |         let task = response.look_up("/task").unwrap();
18 | 
19 |         match response.look_up("/task/task_type_id").unwrap().as_str() {
20 |             Some("1") => Ok(SupervisedClassification::from_json(task)),
21 |             Some(id) => panic!("Wrong task type ID. Expected \"1\" but got \"{}\"", id),
22 |             None => panic!("Invalid task type ID")
23 |         }
24 |     }
25 | }
26 | 
27 | impl SupervisedRegression {
28 |     pub fn from_openml<'a, T: Id>(id: T) -> Result<Self> {
29 |         let url = format!("https://www.openml.org/api/v1/json/task/{}", id.as_string());
30 |         let raw_task = get_cached(&url)?;
31 |         let response: GenericResponse = serde_json::from_str(&raw_task)?;
32 | 
33 |         let task = response.look_up("/task").unwrap();
34 | 
35 |         match response.look_up("/task/task_type_id").unwrap().as_str() {
36 |             Some("2") => Ok(SupervisedRegression::from_json(task)),
37 |             Some(id) => panic!("Wrong task type ID. Expected \"2\" but got \"{}\"", id),
38 |             None => panic!("Invalid task type ID")
39 |         }
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/src/openml_api/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Cached access to the OpenML REST API
 2 | 
 3 | mod api_types;
 4 | mod file_lock;
 5 | mod impls_from_json;
 6 | mod impls_from_openml;
 7 | mod web_access;
 8 | 
 9 | use std::borrow::Cow;
10 | 
11 | pub trait Id {
12 |     fn as_string(&self) -> Cow<str>;
13 |     fn as_u32(&self) -> u32;
14 | }
15 | 
16 | impl Id for String {
17 |     #[inline(always)]
18 |     fn as_string(&self) -> Cow<str> {
19 |         Cow::from(self.as_str())
20 |     }
21 | 
22 |     #[inline(always)]
23 |     fn as_u32(&self) -> u32 {
24 |         self.parse().unwrap()
25 |     }
26 | }
27 | 
28 | impl<'a> Id for &'a str {
29 |     #[inline(always)]
30 |     fn as_string(&self) -> Cow<str> {
31 |         Cow::from(*self)
32 |     }
33 | 
34 |     #[inline(always)]
35 |     fn as_u32(&self) -> u32 {
36 |         self.parse().unwrap()
37 |     }
38 | }
39 | 
40 | impl Id for u32 {
41 |     #[inline(always)]
42 |     fn as_string(&self) -> Cow<str> {
43 |         Cow::from(format!("{}", self))
44 |     }
45 | 
46 |     #[inline(always)]
47 |     fn as_u32(&self) -> u32 {
48 |         *self
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/openml_api/web_access.rs:
--------------------------------------------------------------------------------
 1 | //! Access the OpenML REST API
 2 | 
 3 | use std::fs::{File, OpenOptions};
 4 | use std::io::{self, Read, Write};
 5 | 
 6 | use app_dirs::{app_root, AppDataType, AppInfo};
 7 | use futures::{Future, Stream};
 8 | use hyper::Client;
 9 | use hyper_tls::HttpsConnector;
10 | use tokio_core::reactor::Core;
11 | 
12 | use error::Result;
13 | 
14 | use super::file_lock::{ExclusiveLock, SharedLock};
15 | 
16 | const APP_INFO: AppInfo = AppInfo{name: "openml-rust", author: "openml-rust"};
17 | 
18 | /// Query a URL. If possible read the response from local cache
19 | pub fn get_cached(url: &str) -> Result<String> {
20 |     // todo: is there a potential race condition with a process locking the file for reading while
21 |     //       the writer has created but not yet locked the file?
22 | 
23 |     let mut path = app_root(AppDataType::UserCache, &APP_INFO)?;
24 |     path.push(url_to_file(url));
25 | 
26 |     loop {
27 |         match File::open(&path) {
28 |             Ok(f) => {
29 |                 info!("Loading cached {}", url);
30 |                 let mut file = SharedLock::new(f)?;
31 |                 let mut data = String::new();
32 |                 file.read_to_string(&mut data)?;
33 |                 return Ok(data);
34 |             }
35 |             Err(_) => {}
36 |         }
37 | 
38 |         match OpenOptions::new().create_new(true).write(true).open(&path) {
39 |             Err(e) => {
40 |                 // todo: is this the correct io error raised if another thread has locked the file currently?
41 |                 if let io::ErrorKind::PermissionDenied = e.kind() {
42 |                     continue;
43 |                 }
44 |                 error!("Error while opening cache for writing: {:?}", e);
45 |                 return Err(e.into());
46 |             }
47 |             Ok(f) => {
48 |                 info!("Downloading {}", url);
49 |                 let mut file = ExclusiveLock::new(f)?;
50 |                 let data = download(url)?;
51 |                 file.write_all(data.as_bytes())?;
52 |                 return Ok(data);
53 |             }
54 |         }
55 |     }
56 | }
57 | 
58 | /// Query a URL.
59 | fn download(url: &str) -> Result<String> {
60 |     let mut core = Core::new()?;
61 |     let handle = core.handle();
62 |     let client = Client::configure()
63 |         .connector(HttpsConnector::new(4, &handle)?)
64 |         .build(&handle);
65 | 
66 |     let req = client.get(url.parse()?);
67 | 
68 |     let mut bytes = Vec::new();
69 |     {
70 |         let work = req.and_then(|res| {
71 |             res.body().for_each(|chunk| {
72 |                 bytes.extend_from_slice(&chunk);
73 |                 Ok(())
74 |             })
75 |         });
76 |         core.run(work)?
77 |     }
78 |     let result = String::from_utf8(bytes)?;
79 |     Ok(result)
80 | }
81 | 
82 | /// Convert URL to file name for chching
83 | fn url_to_file(s: &str) -> String {
84 |     s.replace('/', "_").replace(':', "")
85 | }
86 | 


--------------------------------------------------------------------------------
/src/prelude.rs:
--------------------------------------------------------------------------------
1 | pub use measure_accumulator::MeasureAccumulator;


--------------------------------------------------------------------------------
/src/procedures/frozen_sets.rs:
--------------------------------------------------------------------------------
 1 | use super::{Fold, Procedure};
 2 | 
 3 | /// pre-defined cross-validation
 4 | #[derive(Debug)]
 5 | pub(crate) struct FrozenSets {
 6 |     pub(crate) folds: Vec<Vec<Fold>>,
 7 | }
 8 | 
 9 | impl Procedure for FrozenSets {
10 |     fn iter<'a>(&'a self) -> Box<'a + Iterator<Item = &'a Fold>> {
11 |         let iter = self.folds.iter().flat_map(|inner| inner.iter());
12 |         Box::new(iter)
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/src/procedures/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Validation procedures
 2 | 
 3 | mod frozen_sets;
 4 | 
 5 | pub(crate) use self::frozen_sets::FrozenSets;
 6 | 
 7 | /// Validation procedures support iteration over cross-validation folds
 8 | pub(crate) trait Procedure {
 9 |     fn iter<'a>(&'a self) -> Box<'a + Iterator<Item = &'a Fold>>;
10 | }
11 | 
12 | /// A single cross-validation fold, consisting of a training set and a testing set
13 | #[derive(Debug, Clone)]
14 | pub(crate) struct Fold {
15 |     pub(crate) trainset: Vec<usize>,
16 |     pub(crate) testset: Vec<usize>,
17 | }
18 | 
19 | impl Fold {
20 |     pub fn new() -> Self {
21 |         Fold {
22 |             trainset: Vec::new(),
23 |             testset: Vec::new(),
24 |         }
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/tasks/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Implementations of specific OpenML task types
 2 | 
 3 | mod supervised_classification;
 4 | mod supervised_regression;
 5 | 
 6 | use serde::de::DeserializeOwned;
 7 | 
 8 | pub use self::supervised_classification::SupervisedClassification;
 9 | pub use self::supervised_regression::SupervisedRegression;
10 | 
11 | use measure_accumulator::MeasureAccumulator;
12 | 
13 | pub trait Task {
14 |     /// get task ID
15 |     fn id(&self) -> &str;
16 | 
17 |     /// get task name
18 |     fn name(&self) -> &str;
19 | 
20 |     /// run task, specifying the type of an entire feature column in `X`. This allows to run
21 |     /// machine learning models that take features of different types, or named features in form
22 |     /// of structs.
23 |     fn run_static<X, Y, F, M>(&self, flow: F) -> M
24 |     where
25 |         F: Fn(&mut Iterator<Item = (&X, &Y)>, &mut Iterator<Item = &X>) -> Box<Iterator<Item = Y>>,
26 |         X: DeserializeOwned,
27 |         Y: DeserializeOwned,
28 |         M: MeasureAccumulator<Y>;
29 | 
30 |     /// run task, specifying the feature type in `X`. This allows to run machine learning models
31 |     /// that expect every feature to have the same type.
32 |     fn run<X, Y, F, M>(&self, flow: F) -> M
33 |     where
34 |         F: Fn(&mut Iterator<Item = (&[X], &Y)>, &mut Iterator<Item = &[X]>)
35 |             -> Box<Iterator<Item = Y>>,
36 |         X: DeserializeOwned,
37 |         Y: DeserializeOwned,
38 |         M: MeasureAccumulator<Y>;
39 | }
40 | 


--------------------------------------------------------------------------------
/src/tasks/supervised_classification.rs:
--------------------------------------------------------------------------------
  1 | use arff::dynamic::de::from_dataset;
  2 | use serde::de::DeserializeOwned;
  3 | 
  4 | use dataset::DataSet;
  5 | use measure_accumulator::MeasureAccumulator;
  6 | use procedures::Procedure;
  7 | 
  8 | /// Classification task
  9 | pub struct SupervisedClassification {
 10 |     pub(crate) id: String,
 11 |     pub(crate) name: String,
 12 |     pub(crate) source_data: DataSet,
 13 |     pub(crate) estimation_procedure: Box<Procedure>,
 14 |     //pub(crate) cost_matrix: CostMatrix,
 15 | }
 16 | 
 17 | impl SupervisedClassification {
 18 |     /// get task ID
 19 |     pub fn id(&self) -> &str {
 20 |         &self.id
 21 |     }
 22 | 
 23 |     /// get task name
 24 |     pub fn name(&self) -> &str {
 25 |         &self.name
 26 |     }
 27 | 
 28 |     /// run task, specifying the type of an entire feature column in `X`. This allows to run
 29 |     /// machine learning models that take features of different types, or named features in form
 30 |     /// of structs.
 31 |     pub fn run_static<X, Y, F, M>(&self, flow: F) -> M
 32 |     where
 33 |         F: Fn(&mut Iterator<Item = (&X, &Y)>, &mut Iterator<Item = &X>) -> Box<Iterator<Item = Y>>,
 34 |         X: DeserializeOwned,
 35 |         Y: DeserializeOwned,
 36 |         M: MeasureAccumulator<Y>,
 37 |     {
 38 |         let (dx, dy) = self.source_data
 39 |             .clone_split()
 40 |             .expect("Supervised Classification requires a target column");
 41 | 
 42 |         let x: Vec<X> = from_dataset(&dx).unwrap();
 43 |         let y: Vec<Y> = from_dataset(&dy).unwrap();
 44 | 
 45 |         let mut measure = M::new();
 46 | 
 47 |         for fold in self.estimation_procedure.iter() {
 48 |             let mut train = fold.trainset.iter().map(|&i| (&x[i], &y[i]));
 49 | 
 50 |             let mut test = fold.testset.iter().map(|&i| &x[i]);
 51 | 
 52 |             let predictit = flow(&mut train, &mut test);
 53 | 
 54 |             for (known, pred) in fold.testset.iter().map(|&i| &y[i]).zip(predictit) {
 55 |                 measure.update_one(known, &pred);
 56 |             }
 57 |         }
 58 | 
 59 |         measure
 60 |     }
 61 | 
 62 |     /// run task, specifying the feature type in `X`. This allows to run machine learning models
 63 |     /// that expect every feature to have the same type.
 64 |     pub fn run<X, Y, F, M>(&self, flow: F) -> M
 65 |     where
 66 |         F: Fn(&mut Iterator<Item = (&[X], &Y)>, &mut Iterator<Item = &[X]>)
 67 |             -> Box<Iterator<Item = Y>>,
 68 |         X: DeserializeOwned,
 69 |         Y: DeserializeOwned,
 70 |         M: MeasureAccumulator<Y>,
 71 |     {
 72 |         let (dx, dy) = self.source_data
 73 |             .clone_split()
 74 |             .expect("Supervised Classification requires a target column");
 75 | 
 76 |         let x: Vec<X> = from_dataset(&dx).unwrap();
 77 |         let y: Vec<Y> = from_dataset(&dy).unwrap();
 78 | 
 79 |         let mut measure = M::new();
 80 | 
 81 |         for fold in self.estimation_procedure.iter() {
 82 |             let mut train = fold.trainset
 83 |                 .iter()
 84 |                 .map(|&i| (&x[i * dx.n_cols()..(i + 1) * dx.n_cols()], &y[i]));
 85 | 
 86 |             let mut test = fold.testset
 87 |                 .iter()
 88 |                 .map(|&i| &x[i * dx.n_cols()..(i + 1) * dx.n_cols()]);
 89 | 
 90 |             let predictit = flow(&mut train, &mut test);
 91 | 
 92 |             for (known, pred) in fold.testset.iter().map(|&i| &y[i]).zip(predictit) {
 93 |                 measure.update_one(known, &pred);
 94 |             }
 95 |         }
 96 | 
 97 |         measure
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/src/tasks/supervised_regression.rs:
--------------------------------------------------------------------------------
 1 | use arff::dynamic::de::from_dataset;
 2 | use serde::de::DeserializeOwned;
 3 | 
 4 | use dataset::DataSet;
 5 | use measure_accumulator::MeasureAccumulator;
 6 | use procedures::Procedure;
 7 | 
 8 | /// Regression task
 9 | pub struct SupervisedRegression {
10 |     pub(crate) id: String,
11 |     pub(crate) name: String,
12 |     pub(crate) source_data: DataSet,
13 |     pub(crate) estimation_procedure: Box<Procedure>,
14 | }
15 | 
16 | impl SupervisedRegression {
17 |     /// get task ID
18 |     pub fn id(&self) -> &str {
19 |         &self.id
20 |     }
21 | 
22 |     /// get task name
23 |     pub fn name(&self) -> &str {
24 |         &self.name
25 |     }
26 | 
27 |     /// run task, specifying the type of an entire feature column in `X`. This allows to run
28 |     /// machine learning models that take features of different types, or named features in form
29 |     /// of structs.
30 |     pub fn run_static<X, Y, F, M>(&self, flow: F) -> M
31 |     where
32 |         F: Fn(&mut Iterator<Item = (&X, &Y)>, &mut Iterator<Item = &X>) -> Box<Iterator<Item = Y>>,
33 |         X: DeserializeOwned,
34 |         Y: DeserializeOwned,
35 |         M: MeasureAccumulator<Y>,
36 |     {
37 |         let (dx, dy) = self.source_data
38 |             .clone_split()
39 |             .expect("Supervised Regression requires a target column");
40 | 
41 |         let x: Vec<X> = from_dataset(&dx).unwrap();
42 |         let y: Vec<Y> = from_dataset(&dy).unwrap();
43 | 
44 |         let mut measure = M::new();
45 | 
46 |         for fold in self.estimation_procedure.iter() {
47 |             let mut train = fold.trainset.iter().map(|&i| (&x[i], &y[i]));
48 | 
49 |             let mut test = fold.testset.iter().map(|&i| &x[i]);
50 | 
51 |             let predictit = flow(&mut train, &mut test);
52 | 
53 |             for (known, pred) in fold.testset.iter().map(|&i| &y[i]).zip(predictit) {
54 |                 measure.update_one(known, &pred);
55 |             }
56 |         }
57 | 
58 |         measure
59 |     }
60 | 
61 |     /// run task, specifying the feature type in `X`. This allows to run machine learning models
62 |     /// that expect every feature to have the same type.
63 |     pub fn run<X, Y, F, M>(&self, flow: F) -> M
64 |     where
65 |         F: Fn(&mut Iterator<Item = (&[X], &Y)>, &mut Iterator<Item = &[X]>)
66 |             -> Box<Iterator<Item = Y>>,
67 |         X: DeserializeOwned,
68 |         Y: DeserializeOwned,
69 |         M: MeasureAccumulator<Y>,
70 |     {
71 |         let (dx, dy) = self.source_data
72 |             .clone_split()
73 |             .expect("Supervised Regression requires a target column");
74 | 
75 |         let x: Vec<X> = from_dataset(&dx).unwrap();
76 |         let y: Vec<Y> = from_dataset(&dy).unwrap();
77 | 
78 |         let mut measure = M::new();
79 | 
80 |         for fold in self.estimation_procedure.iter() {
81 |             let mut train = fold.trainset
82 |                 .iter()
83 |                 .map(|&i| (&x[i * dx.n_cols()..(i + 1) * dx.n_cols()], &y[i]));
84 | 
85 |             let mut test = fold.testset
86 |                 .iter()
87 |                 .map(|&i| &x[i * dx.n_cols()..(i + 1) * dx.n_cols()]);
88 | 
89 |             let predictit = flow(&mut train, &mut test);
90 | 
91 |             for (known, pred) in fold.testset.iter().map(|&i| &y[i]).zip(predictit) {
92 |                 measure.update_one(known, &pred);
93 |             }
94 |         }
95 | 
96 |         measure
97 |     }
98 | }
99 | 


--------------------------------------------------------------------------------