├── .DS_Store ├── .gitattributes ├── 9781484251201.jpg ├── Contributing.md ├── LICENSE.txt ├── README.md ├── chapter1 └── simple-scripts │ ├── conditions.rs │ ├── enumerations.rs │ ├── functions.rs │ ├── match.rs │ ├── oops.rs │ ├── ownership1.rs │ ├── ownership2.rs │ ├── ownership3.rs │ ├── ownership4.rs │ ├── ownership5.rs │ ├── unittestingexample │ ├── Cargo.toml │ └── src │ │ └── main.rs │ ├── variables.rs │ ├── variables1.rs │ ├── variables2.rs │ ├── variables3.rs │ ├── variables4.rs │ └── variables5.rs ├── chapter2 ├── datasets │ ├── housing.csv │ └── iris.csv ├── iris_classification_tchrs │ ├── .DS_Store │ ├── Cargo.toml │ ├── mklml_mac_2019.0.5.20190502.tgz │ └── src │ │ ├── linear_with_sgd.rs │ │ ├── main.rs │ │ └── simple_nn.rs ├── iris_classification_xgboost │ ├── .DS_Store │ ├── Cargo.toml │ ├── iris.csv │ └── src │ │ └── main.rs ├── kmeans_rusty_machine │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── logistic_regression_rustlearn │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── ml-utils │ ├── Cargo.toml │ ├── calc_prob.py │ ├── clusim stuff.ipynb │ ├── easy_bayesian_AB.py │ ├── examples │ │ └── measures.rs │ └── src │ │ ├── datasets.rs │ │ ├── hypothesis_testing.rs │ │ ├── lib.rs │ │ ├── main.rs │ │ ├── sup_metrics.rs │ │ └── unsup_metrics.rs ├── nb_classification │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── rust_and_tf │ ├── Cargo.toml │ ├── data │ │ └── housing.csv │ ├── fullcode.ipynb │ ├── src │ │ ├── conv_nets.rs │ │ ├── conv_nets_maxpooling.rs │ │ ├── graph_variables.rs │ │ ├── graph_with_placeholder.rs │ │ ├── linear_regression.rs │ │ ├── linear_regression_from_model.rs │ │ ├── main.rs │ │ └── seq_nodes.rs │ └── tensorflow create model.ipynb ├── rustlearn_classification_tasks │ ├── Cargo.toml │ └── src │ │ ├── binary_class_scores.rs │ │ ├── logistic_reg.rs │ │ ├── main.rs │ │ ├── svm.rs │ │ └── trees.rs ├── rusty_machine_classification │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── rusty_machine_supervised_algos │ ├── Cargo.toml │ └── src │ │ └── main.rs └── rustymachine_regression │ ├── Cargo.toml │ ├── data │ └── housing.csv │ └── src │ ├── gaussian_process_reg.rs │ ├── glms.rs │ ├── lin_reg.rs │ └── main.rs ├── chapter3 ├── reinforcement-learning-frozenlake │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── rsrl_custom │ ├── Cargo.toml │ └── src │ │ └── main.rs └── rusty_machine_unsupervised │ ├── Cargo.toml │ ├── data │ └── iris.csv │ └── src │ └── main.rs ├── chapter4 ├── SQL_db │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── data_formats │ ├── Cargo.toml │ ├── data │ │ ├── prize.json │ │ └── sample_2.xml │ └── src │ │ ├── csvreading.rs │ │ ├── jsonreading.rs │ │ ├── main.rs │ │ └── xmlreading.rs ├── data_transformations_datafusion │ ├── Cargo.toml │ ├── src │ │ └── main.rs │ └── titanic │ │ ├── test.csv │ │ └── train.csv ├── databases │ ├── Cargo.toml │ └── src │ │ ├── main.rs │ │ ├── neo4j_db.rs │ │ └── postgres_db.rs ├── graph_db │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── matrix_transformations │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── s3_files │ ├── Cargo.toml │ └── src │ │ └── main.rs └── scraping │ ├── Cargo.toml │ └── src │ └── main.rs ├── chapter5 ├── crfsuite-model │ ├── Cargo.toml │ ├── data │ │ └── ner.csv │ └── src │ │ └── main.rs ├── fasttext-model │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── jigsaw │ ├── Cargo.toml │ ├── data │ │ └── train.csv │ ├── references.txt │ └── src │ │ └── main.rs └── snips-model │ ├── Cargo.toml │ ├── snips_training.md │ └── src │ └── main.rs ├── chapter6 ├── adversarial │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── face-detection-tf │ ├── Cargo.toml │ ├── mtcnn.pb │ └── src │ │ └── main.rs ├── finetuning_pytorch_image_models │ ├── Cargo.toml │ ├── README.md │ ├── resnet.py │ └── src │ │ └── main.rs ├── model_inference │ ├── Cargo.toml │ └── src │ │ └── main.rs └── pytorch-image-classification │ ├── Cargo.toml │ └── src │ └── main.rs ├── chapter7 ├── goodbooks-recommender │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── high-performance-computing │ ├── Cargo.toml │ └── src │ │ └── main.rs └── statistics │ ├── Cargo.toml │ └── src │ └── main.rs ├── chapter8 ├── cpp_demangle │ ├── Cargo.toml │ ├── mangle_ex.py │ ├── setup.py │ └── src │ │ └── lib.rs ├── crfsuite-model │ ├── Cargo.toml │ ├── MANIFEST.in │ ├── crfsuite_model │ │ └── __init__.py │ ├── crfsuite_model_prediction.py │ ├── crfsuite_model_training.py │ ├── data │ │ ├── ner.csv │ │ └── ner_predict.csv │ ├── pyproject.toml │ ├── requirements-dev.txt │ ├── setup.py │ └── src │ │ └── lib.rs ├── iris_classification_xgboost │ ├── IrisClassificationXgboost.java │ ├── Makefile │ ├── data │ │ ├── iris.csv │ │ └── predict.csv │ └── iris_classification_library │ │ ├── Cargo.toml │ │ └── src │ │ └── lib.rs └── my_lambda_function │ ├── .cargo │ └── config │ ├── Cargo.toml │ ├── buildthis.sh │ └── src │ └── main.rs └── errata.md /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/practical-machine-learning-w-rust/b0fd379ee4f0f7bcd9276ae6d31576aa655b08d7/.DS_Store -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /9781484251201.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/practical-machine-learning-w-rust/b0fd379ee4f0f7bcd9276ae6d31576aa655b08d7/9781484251201.jpg -------------------------------------------------------------------------------- /Contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing to Apress Source Code 2 | 3 | Copyright for Apress source code belongs to the author(s). However, under fair use you are encouraged to fork and contribute minor corrections and updates for the benefit of the author(s) and other readers. 4 | 5 | ## How to Contribute 6 | 7 | 1. Make sure you have a GitHub account. 8 | 2. Fork the repository for the relevant book. 9 | 3. Create a new branch on which to make your change, e.g. 10 | `git checkout -b my_code_contribution` 11 | 4. Commit your change. Include a commit message describing the correction. Please note that if your commit message is not clear, the correction will not be accepted. 12 | 5. Submit a pull request. 13 | 14 | Thank you for your contribution! -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Freeware License, some rights reserved 2 | 3 | Copyright (c) 2020 Joydeep Bhattacharjee 4 | 5 | Permission is hereby granted, free of charge, to anyone obtaining a copy 6 | of this software and associated documentation files (the "Software"), 7 | to work with the Software within the limits of freeware distribution and fair use. 8 | This includes the rights to use, copy, and modify the Software for personal use. 9 | Users are also allowed and encouraged to submit corrections and modifications 10 | to the Software for the benefit of other users. 11 | 12 | It is not allowed to reuse, modify, or redistribute the Software for 13 | commercial use in any way, or for a user’s educational materials such as books 14 | or blog articles without prior permission from the copyright holder. 15 | 16 | The above copyright notice and this permission notice need to be included 17 | in all copies or substantial portions of the software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS OR APRESS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | 27 | 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apress Source Code 2 | 3 | This repository accompanies [*Practical Machine Learning with Rust*](https://www.apress.com/9781484251201) by Joydeep Bhattacharjee (Apress, 2020). 4 | 5 | [comment]: #cover 6 | ![Cover image](9781484251201.jpg) 7 | 8 | Download the files as a zip using the green button, or clone the repository to your machine using Git. 9 | 10 | ## Releases 11 | 12 | Release v1.0 corresponds to the code in the published book, without corrections or updates. 13 | 14 | ## Contributions 15 | 16 | See the file Contributing.md for more information on how you can contribute to this repository. -------------------------------------------------------------------------------- /chapter1/simple-scripts/conditions.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let place = "himalayas"; 3 | 4 | let weather = if place == "himalayas" { 5 | "cold" 6 | } else { 7 | "hot" 8 | }; 9 | println!("{:?}", weather); 10 | } -------------------------------------------------------------------------------- /chapter1/simple-scripts/enumerations.rs: -------------------------------------------------------------------------------- 1 | // enumerations.rs 2 | 3 | #[derive(Debug)] 4 | enum NationalHolidays { 5 | GandhiJayanti, 6 | RepublicDay, 7 | IndependenceDay, 8 | } 9 | 10 | fn inspect(day: NationalHolidays) -> String { 11 | match day { 12 | NationalHolidays::GandhiJayanti => String::from("Oct 2"), 13 | NationalHolidays::RepublicDay => String::from("Jan 26"), 14 | NationalHolidays::IndependenceDay => String::from("Aug 15"), 15 | } 16 | } 17 | 18 | fn main() { 19 | let day = NationalHolidays::GandhiJayanti; 20 | let date = inspect(day); 21 | println!("{:?}", date); // output: Oct 2 22 | } -------------------------------------------------------------------------------- /chapter1/simple-scripts/functions.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("{:?}", square_of(-5)); 3 | } 4 | 5 | fn square_of(x: i32) -> i32 { 6 | println!("x = {:?}", x); 7 | x.pow(2) 8 | } -------------------------------------------------------------------------------- /chapter1/simple-scripts/match.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let place = "himalayas"; 3 | 4 | let weather = match place { 5 | "himalayas" => "cold", 6 | _ => "hot", 7 | }; 8 | println!("{:?}", weather); 9 | } -------------------------------------------------------------------------------- /chapter1/simple-scripts/oops.rs: -------------------------------------------------------------------------------- 1 | // oops.rs 2 | // $ ./oops 3 | // Planet { co2: 0.04, nitrogen: 78.09 } 4 | // Planet { co2: 95.32, nitrogen: 2.7 } 5 | // For planet Planet { co2: 0.04, nitrogen: 78.09 }: co2 = 0.04, nitrogen=78.09, other_gases=21.870003 6 | // For planet Planet { co2: 95.32, nitrogen: 2.7 }: co2 = 95.32, nitrogen=2.7, other_gases=1.9800003 7 | 8 | #[derive(Debug)] 9 | struct Planet { 10 | co2: f32, 11 | nitrogen: f32 12 | } 13 | 14 | trait Atmosphere { 15 | fn new(co2: f32, nitrogen: f32) -> Self; 16 | fn amount_of_other_gases(&self) -> f32; 17 | fn summarize(&self); 18 | } 19 | 20 | impl Atmosphere for Planet { 21 | fn new(co2: f32, nitrogen: f32) -> Planet { 22 | Planet { co2: co2, nitrogen: nitrogen } 23 | } 24 | 25 | fn amount_of_other_gases(&self) -> f32 { 26 | 100.0 - self.co2 - self.nitrogen 27 | } 28 | 29 | fn summarize(&self) { 30 | let other_gases = self.amount_of_other_gases(); 31 | println!("For planet {planet:?}: co2 = {co2}, nitrogen={nitrogen}, other_gases={other_gases}", 32 | planet=self, co2=self.co2, nitrogen=self.nitrogen, other_gases=other_gases); 33 | } 34 | } 35 | 36 | fn main() { 37 | let earth = Planet { co2: 0.04, nitrogen: 78.09 }; 38 | println!("{:?}", earth); 39 | 40 | let mars = Planet { co2: 95.32, nitrogen: 2.7 }; 41 | println!("{:?}", mars); 42 | 43 | earth.summarize(); 44 | 45 | mars.summarize(); 46 | } 47 | -------------------------------------------------------------------------------- /chapter1/simple-scripts/ownership1.rs: -------------------------------------------------------------------------------- 1 | // ownership1.rs 2 | 3 | fn main() { 4 | let lang = "rust"; 5 | let rust1 = add_version(&lang); 6 | println!("{:?}", rust1); 7 | } 8 | 9 | fn add_version(s: &str) -> String { 10 | s.to_string() + " 2018." 11 | } 12 | -------------------------------------------------------------------------------- /chapter1/simple-scripts/ownership2.rs: -------------------------------------------------------------------------------- 1 | // ownership2.rs 2 | fn main() { 3 | let lang = String::from("rust"); 4 | let rust1 = add_version(lang); 5 | println!("{:?}", rust1); 6 | let rust2 = add_lang(lang); 7 | println!("{:?}", rust2); 8 | } 9 | 10 | fn add_version(s: String) -> String { 11 | s + " " + "2018!!" 12 | } 13 | 14 | fn add_lang(s: String) -> String { 15 | s + " " + "lang." 16 | } -------------------------------------------------------------------------------- /chapter1/simple-scripts/ownership3.rs: -------------------------------------------------------------------------------- 1 | // ownership3.rs 2 | 3 | fn main() { 4 | let lang = String::from("rust"); 5 | let rust1 = add_version(&lang); 6 | println!("{:?}", rust1); 7 | let rust2 = add_lang(&lang); 8 | println!("{:?}", rust2); 9 | } 10 | 11 | fn add_version(s: &String) -> String { 12 | s.push_str(" 2019!!"); 13 | s.to_string() 14 | } 15 | 16 | fn add_lang(s: &String) -> String { 17 | s.push_str(" lang."); 18 | s.to_string() 19 | } 20 | -------------------------------------------------------------------------------- /chapter1/simple-scripts/ownership4.rs: -------------------------------------------------------------------------------- 1 | // $ ./ownership4 2 | // "rust 2019!!" 3 | // "rust 2019!! lang." 4 | 5 | fn main() { 6 | let mut lang = String::from("rust"); 7 | let rust1 = add_version(&mut lang); 8 | println!("{:?}", rust1); 9 | let rust2 = add_lang(&mut lang); 10 | println!("{:?}", rust2); 11 | } 12 | 13 | fn add_version(s: &mut String) -> String { 14 | s.push_str(" 2019!!"); 15 | s.to_string() 16 | } 17 | 18 | fn add_lang(s: &mut String) -> String { 19 | s.push_str(" lang."); 20 | s.to_string() 21 | } -------------------------------------------------------------------------------- /chapter1/simple-scripts/ownership5.rs: -------------------------------------------------------------------------------- 1 | // $ ./ownership 2 | // "rust 2018." 3 | // "rust lang." 4 | 5 | fn main() { 6 | let lang = "rust"; // change done here 7 | let rust1 = add_version(&lang); // change done here 8 | println!("{:?}", rust1); 9 | let rust2 = add_lang(&lang); // change done here 10 | println!("{:?}", rust2); 11 | } 12 | 13 | fn add_version(s: &str) -> String { 14 | s.to_string() + " 2018." 15 | } 16 | 17 | fn add_lang(s: &str) -> String { 18 | s.to_string() + " lang." 19 | } -------------------------------------------------------------------------------- /chapter1/simple-scripts/unittestingexample/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "unittestingexample" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | -------------------------------------------------------------------------------- /chapter1/simple-scripts/unittestingexample/src/main.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let lang = "rust"; 3 | let rust1 = add_version(&lang); 4 | println!("{:?}", rust1); 5 | } 6 | 7 | fn add_version(s: &str) -> String { 8 | s.to_string() + " 2018." 9 | } 10 | 11 | #[test] 12 | fn test_add_version() { 13 | assert_eq!(add_version("abcd"), String::from("abcd 2018.")); 14 | } -------------------------------------------------------------------------------- /chapter1/simple-scripts/variables.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let x = "learning rust"; 3 | 4 | println!("{}", x); 5 | 6 | } 7 | -------------------------------------------------------------------------------- /chapter1/simple-scripts/variables1.rs: -------------------------------------------------------------------------------- 1 | #![feature(core_intrinsics)] 2 | 3 | fn print_type_of(_: &T) { 4 | println!("{}", unsafe { std::intrinsics::type_name::() }); 5 | } 6 | 7 | fn main() { 8 | let x = "learning rust"; 9 | let y = 6; 10 | let z = 3.14; 11 | 12 | println!("{}", x); 13 | println!("type of x:"); 14 | print_type_of(&x); 15 | println!("type of y:"); 16 | print_type_of(&y); 17 | println!("type of z:"); 18 | print_type_of(&z); 19 | 20 | } 21 | -------------------------------------------------------------------------------- /chapter1/simple-scripts/variables2.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let mut x = 32; 3 | println!("Current value of x: {}", x); 4 | x = 64; 5 | println!("Current value of x: {}", x); 6 | } 7 | -------------------------------------------------------------------------------- /chapter1/simple-scripts/variables3.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let mut x = 32; 3 | println!("Current value of x: {}", x); 4 | x = "rust"; 5 | println!("Current value of x: {}", x); 6 | } 7 | -------------------------------------------------------------------------------- /chapter1/simple-scripts/variables4.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let x = 1; 3 | let x = x + 2; 4 | let x = x * 2; 5 | println!("Value of x: {}", x); 6 | } 7 | -------------------------------------------------------------------------------- /chapter1/simple-scripts/variables5.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let x = 5; 3 | 4 | if 4 < 10 { 5 | let x = 10; 6 | println!("Inside if x = {:?}", x); 7 | } 8 | println!("Outside if x = {:?}", x); 9 | } -------------------------------------------------------------------------------- /chapter2/datasets/iris.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,species 2 | 5.1,3.5,1.4,0.2,setosa 3 | 4.9,3.0,1.4,0.2,setosa 4 | 4.7,3.2,1.3,0.2,setosa 5 | 4.6,3.1,1.5,0.2,setosa 6 | 5.0,3.6,1.4,0.2,setosa 7 | 5.4,3.9,1.7,0.4,setosa 8 | 4.6,3.4,1.4,0.3,setosa 9 | 5.0,3.4,1.5,0.2,setosa 10 | 4.4,2.9,1.4,0.2,setosa 11 | 4.9,3.1,1.5,0.1,setosa 12 | 5.4,3.7,1.5,0.2,setosa 13 | 4.8,3.4,1.6,0.2,setosa 14 | 4.8,3.0,1.4,0.1,setosa 15 | 4.3,3.0,1.1,0.1,setosa 16 | 5.8,4.0,1.2,0.2,setosa 17 | 5.7,4.4,1.5,0.4,setosa 18 | 5.4,3.9,1.3,0.4,setosa 19 | 5.1,3.5,1.4,0.3,setosa 20 | 5.7,3.8,1.7,0.3,setosa 21 | 5.1,3.8,1.5,0.3,setosa 22 | 5.4,3.4,1.7,0.2,setosa 23 | 5.1,3.7,1.5,0.4,setosa 24 | 4.6,3.6,1.0,0.2,setosa 25 | 5.1,3.3,1.7,0.5,setosa 26 | 4.8,3.4,1.9,0.2,setosa 27 | 5.0,3.0,1.6,0.2,setosa 28 | 5.0,3.4,1.6,0.4,setosa 29 | 5.2,3.5,1.5,0.2,setosa 30 | 5.2,3.4,1.4,0.2,setosa 31 | 4.7,3.2,1.6,0.2,setosa 32 | 4.8,3.1,1.6,0.2,setosa 33 | 5.4,3.4,1.5,0.4,setosa 34 | 5.2,4.1,1.5,0.1,setosa 35 | 5.5,4.2,1.4,0.2,setosa 36 | 4.9,3.1,1.5,0.1,setosa 37 | 5.0,3.2,1.2,0.2,setosa 38 | 5.5,3.5,1.3,0.2,setosa 39 | 4.9,3.1,1.5,0.1,setosa 40 | 4.4,3.0,1.3,0.2,setosa 41 | 5.1,3.4,1.5,0.2,setosa 42 | 5.0,3.5,1.3,0.3,setosa 43 | 4.5,2.3,1.3,0.3,setosa 44 | 4.4,3.2,1.3,0.2,setosa 45 | 5.0,3.5,1.6,0.6,setosa 46 | 5.1,3.8,1.9,0.4,setosa 47 | 4.8,3.0,1.4,0.3,setosa 48 | 5.1,3.8,1.6,0.2,setosa 49 | 4.6,3.2,1.4,0.2,setosa 50 | 5.3,3.7,1.5,0.2,setosa 51 | 5.0,3.3,1.4,0.2,setosa 52 | 7.0,3.2,4.7,1.4,versicolor 53 | 6.4,3.2,4.5,1.5,versicolor 54 | 6.9,3.1,4.9,1.5,versicolor 55 | 5.5,2.3,4.0,1.3,versicolor 56 | 6.5,2.8,4.6,1.5,versicolor 57 | 5.7,2.8,4.5,1.3,versicolor 58 | 6.3,3.3,4.7,1.6,versicolor 59 | 4.9,2.4,3.3,1.0,versicolor 60 | 6.6,2.9,4.6,1.3,versicolor 61 | 5.2,2.7,3.9,1.4,versicolor 62 | 5.0,2.0,3.5,1.0,versicolor 63 | 5.9,3.0,4.2,1.5,versicolor 64 | 6.0,2.2,4.0,1.0,versicolor 65 | 6.1,2.9,4.7,1.4,versicolor 66 | 5.6,2.9,3.6,1.3,versicolor 67 | 6.7,3.1,4.4,1.4,versicolor 68 | 5.6,3.0,4.5,1.5,versicolor 69 | 5.8,2.7,4.1,1.0,versicolor 70 | 6.2,2.2,4.5,1.5,versicolor 71 | 5.6,2.5,3.9,1.1,versicolor 72 | 5.9,3.2,4.8,1.8,versicolor 73 | 6.1,2.8,4.0,1.3,versicolor 74 | 6.3,2.5,4.9,1.5,versicolor 75 | 6.1,2.8,4.7,1.2,versicolor 76 | 6.4,2.9,4.3,1.3,versicolor 77 | 6.6,3.0,4.4,1.4,versicolor 78 | 6.8,2.8,4.8,1.4,versicolor 79 | 6.7,3.0,5.0,1.7,versicolor 80 | 6.0,2.9,4.5,1.5,versicolor 81 | 5.7,2.6,3.5,1.0,versicolor 82 | 5.5,2.4,3.8,1.1,versicolor 83 | 5.5,2.4,3.7,1.0,versicolor 84 | 5.8,2.7,3.9,1.2,versicolor 85 | 6.0,2.7,5.1,1.6,versicolor 86 | 5.4,3.0,4.5,1.5,versicolor 87 | 6.0,3.4,4.5,1.6,versicolor 88 | 6.7,3.1,4.7,1.5,versicolor 89 | 6.3,2.3,4.4,1.3,versicolor 90 | 5.6,3.0,4.1,1.3,versicolor 91 | 5.5,2.5,4.0,1.3,versicolor 92 | 5.5,2.6,4.4,1.2,versicolor 93 | 6.1,3.0,4.6,1.4,versicolor 94 | 5.8,2.6,4.0,1.2,versicolor 95 | 5.0,2.3,3.3,1.0,versicolor 96 | 5.6,2.7,4.2,1.3,versicolor 97 | 5.7,3.0,4.2,1.2,versicolor 98 | 5.7,2.9,4.2,1.3,versicolor 99 | 6.2,2.9,4.3,1.3,versicolor 100 | 5.1,2.5,3.0,1.1,versicolor 101 | 5.7,2.8,4.1,1.3,versicolor 102 | 6.3,3.3,6.0,2.5,virginica 103 | 5.8,2.7,5.1,1.9,virginica 104 | 7.1,3.0,5.9,2.1,virginica 105 | 6.3,2.9,5.6,1.8,virginica 106 | 6.5,3.0,5.8,2.2,virginica 107 | 7.6,3.0,6.6,2.1,virginica 108 | 4.9,2.5,4.5,1.7,virginica 109 | 7.3,2.9,6.3,1.8,virginica 110 | 6.7,2.5,5.8,1.8,virginica 111 | 7.2,3.6,6.1,2.5,virginica 112 | 6.5,3.2,5.1,2.0,virginica 113 | 6.4,2.7,5.3,1.9,virginica 114 | 6.8,3.0,5.5,2.1,virginica 115 | 5.7,2.5,5.0,2.0,virginica 116 | 5.8,2.8,5.1,2.4,virginica 117 | 6.4,3.2,5.3,2.3,virginica 118 | 6.5,3.0,5.5,1.8,virginica 119 | 7.7,3.8,6.7,2.2,virginica 120 | 7.7,2.6,6.9,2.3,virginica 121 | 6.0,2.2,5.0,1.5,virginica 122 | 6.9,3.2,5.7,2.3,virginica 123 | 5.6,2.8,4.9,2.0,virginica 124 | 7.7,2.8,6.7,2.0,virginica 125 | 6.3,2.7,4.9,1.8,virginica 126 | 6.7,3.3,5.7,2.1,virginica 127 | 7.2,3.2,6.0,1.8,virginica 128 | 6.2,2.8,4.8,1.8,virginica 129 | 6.1,3.0,4.9,1.8,virginica 130 | 6.4,2.8,5.6,2.1,virginica 131 | 7.2,3.0,5.8,1.6,virginica 132 | 7.4,2.8,6.1,1.9,virginica 133 | 7.9,3.8,6.4,2.0,virginica 134 | 6.4,2.8,5.6,2.2,virginica 135 | 6.3,2.8,5.1,1.5,virginica 136 | 6.1,2.6,5.6,1.4,virginica 137 | 7.7,3.0,6.1,2.3,virginica 138 | 6.3,3.4,5.6,2.4,virginica 139 | 6.4,3.1,5.5,1.8,virginica 140 | 6.0,3.0,4.8,1.8,virginica 141 | 6.9,3.1,5.4,2.1,virginica 142 | 6.7,3.1,5.6,2.4,virginica 143 | 6.9,3.1,5.1,2.3,virginica 144 | 5.8,2.7,5.1,1.9,virginica 145 | 6.8,3.2,5.9,2.3,virginica 146 | 6.7,3.3,5.7,2.5,virginica 147 | 6.7,3.0,5.2,2.3,virginica 148 | 6.3,2.5,5.0,1.9,virginica 149 | 6.5,3.0,5.2,2.0,virginica 150 | 6.2,3.4,5.4,2.3,virginica 151 | 5.9,3.0,5.1,1.8,virginica 152 | -------------------------------------------------------------------------------- /chapter2/iris_classification_tchrs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/practical-machine-learning-w-rust/b0fd379ee4f0f7bcd9276ae6d31576aa655b08d7/chapter2/iris_classification_tchrs/.DS_Store -------------------------------------------------------------------------------- /chapter2/iris_classification_tchrs/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "iris_classification_tchrs" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | csv = "1.0.5" 9 | serde = "1.0.89" 10 | serde_derive = "1.0.89" 11 | rand = "0.6" 12 | tch = "0.0.6" 13 | ml-utils = { path = "../ml-utils" } -------------------------------------------------------------------------------- /chapter2/iris_classification_tchrs/mklml_mac_2019.0.5.20190502.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/practical-machine-learning-w-rust/b0fd379ee4f0f7bcd9276ae6d31576aa655b08d7/chapter2/iris_classification_tchrs/mklml_mac_2019.0.5.20190502.tgz -------------------------------------------------------------------------------- /chapter2/iris_classification_tchrs/src/linear_with_sgd.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::vec::Vec; 3 | use std::error::Error; 4 | 5 | use csv; 6 | use rand; 7 | use rand::thread_rng; 8 | use rand::seq::SliceRandom; 9 | 10 | use tch; 11 | use tch::{nn, kind, Kind, Tensor, no_grad, vision, Device}; 12 | use tch::{nn::Module, nn::OptimizerConfig}; 13 | 14 | use ml_utils; 15 | use ml_utils::datasets::Flower; 16 | 17 | static FEATURE_DIM: i64 = 4; 18 | static HIDDEN_NODES: i64 = 10; 19 | static LABELS: i64 = 3; 20 | 21 | #[derive(Debug)] 22 | struct Net { 23 | fc1: nn::Linear, 24 | fc2: nn::Linear, 25 | } 26 | 27 | impl Net { 28 | fn new(vs: &nn::Path) -> Net { 29 | let fc1 = nn::Linear::new(vs, FEATURE_DIM, HIDDEN_NODES, Default::default()); 30 | let fc2 = nn::Linear::new(vs, HIDDEN_NODES, LABELS, Default::default()); 31 | Net { fc1, fc2 } 32 | } 33 | } 34 | 35 | impl Module for Net { 36 | fn forward(&self, xs: &Tensor) -> Tensor { 37 | xs.apply(&self.fc1).relu().apply(&self.fc2) 38 | } 39 | } 40 | 41 | pub fn run() -> Result<(), Box> { 42 | // Get all the data 43 | let mut rdr = csv::Reader::from_reader(io::stdin()); 44 | let mut data = Vec::new(); 45 | for result in rdr.deserialize() { 46 | let r: Flower = result?; 47 | data.push(r); // data contains all the records 48 | } 49 | 50 | // shuffle the data. 51 | data.shuffle(&mut thread_rng()); 52 | 53 | // separate out to train and test datasets. 54 | let test_size: f64 = 0.5; 55 | let test_size: f64 = data.len() as f64 * test_size; 56 | let test_size = test_size.round() as usize; 57 | 58 | let (test_data, train_data) = data.split_at(test_size); 59 | let train_size = train_data.len(); 60 | let test_size = test_data.len(); 61 | assert_eq!(train_size, test_size); 62 | 63 | // differentiate the features and the labels. 64 | // torch needs vectors in f64 65 | let flower_x_train: Vec = train_data.iter().flat_map(|r| r.into_feature_vector()).map(|x| x as f64).collect(); 66 | let flower_y_train: Vec = train_data.iter().map(|r| r.into_labels()).map(|x| x as f64).collect(); 67 | 68 | let flower_x_test: Vec = test_data.iter().flat_map(|r| r.into_feature_vector()).map(|x| x as f64).collect(); 69 | let flower_y_test: Vec = test_data.iter().map(|r| r.into_labels()).map(|x| x as f64).collect(); 70 | 71 | let flower_x_train = Tensor::float_vec(flower_x_train.as_slice()); 72 | let flower_y_train = Tensor::float_vec(flower_y_train.as_slice()).to_kind(Kind::Int64); 73 | let flower_x_test = Tensor::float_vec(flower_x_test.as_slice()); 74 | let flower_y_test = Tensor::float_vec(flower_y_test.as_slice()).to_kind(Kind::Int64); 75 | 76 | // print shape of all the data. 77 | println!("Training data shape {:?}", flower_x_train.size()); 78 | println!("Training flower_y_train data shape {:?}", flower_y_train.size()); 79 | 80 | // reshaping examples 81 | // one way to reshape is using unsqueeze 82 | //let flower_x_train1 = flower_x_train.unsqueeze(0); // Training data shape [1, 360] 83 | //println!("Training data shape {:?}", flower_x_train1.size()); 84 | let train_size = train_size as i64; 85 | let test_size = test_size as i64; 86 | let flower_x_train = flower_x_train.view(&[train_size, FEATURE_DIM]); 87 | let flower_x_test = flower_x_test.view(&[test_size, FEATURE_DIM]); 88 | let flower_y_train = flower_y_train.view(&[train_size]); 89 | let flower_y_test = flower_y_test.view(&[test_size]); 90 | 91 | // working on a linear neural network with SGD 92 | let vs = nn::VarStore::new(Device::Cpu); 93 | let net = Net::new(&vs.root()); 94 | let opt = nn::Adam::default().build(&vs, 1e-3)?; 95 | for epoch in 1..200 { 96 | let loss = net 97 | .forward(&flower_x_train) 98 | .cross_entropy_for_logits(&flower_y_train); 99 | opt.backward_step(&loss); 100 | let test_accuracy = net 101 | .forward(&flower_x_test) 102 | .accuracy_for_logits(&flower_y_test); 103 | println!( 104 | "epoch: {:4} train loss: {:8.5} test acc: {:5.2}%", 105 | epoch, 106 | f64::from(&loss), 107 | 100. * f64::from(&test_accuracy), 108 | ); 109 | }; 110 | 111 | Ok(()) 112 | } 113 | -------------------------------------------------------------------------------- /chapter2/iris_classification_tchrs/src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate serde; 2 | // This lets us write `#[derive(Deserialize)]`. 3 | #[macro_use] 4 | extern crate serde_derive; 5 | 6 | use std::vec::Vec; 7 | use std::process::exit; 8 | use std::env::args; 9 | 10 | mod simple_nn; 11 | mod linear_with_sgd; 12 | 13 | fn main() { 14 | let args: Vec = args().collect(); 15 | let model = if args.len() < 2 { 16 | None 17 | } else { 18 | Some(args[1].as_str()) 19 | }; 20 | let res = match model { 21 | None => {println!("Run cargo run [nn|linear sdg] to get outputs", ); Ok(())}, 22 | Some("nn") => simple_nn::run(), 23 | Some(_) => linear_with_sgd::run(), 24 | }; 25 | // Putting the main code in another function serves two purposes: 26 | // 1. We can use the `?` operator. 27 | // 2. We can call exit safely, which does not run any destructors. 28 | exit(match res { 29 | Ok(_) => 0, 30 | Err(e) => { 31 | println!("{}", e); 32 | 1 33 | } 34 | }) 35 | } -------------------------------------------------------------------------------- /chapter2/iris_classification_tchrs/src/simple_nn.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::vec::Vec; 3 | use std::error::Error; 4 | 5 | use csv; 6 | use rand; 7 | use rand::thread_rng; 8 | use rand::seq::SliceRandom; 9 | 10 | use tch; 11 | use tch::{nn, kind, Kind, Tensor, no_grad, vision, Device}; 12 | use tch::{nn::Module, nn::OptimizerConfig}; 13 | 14 | use ml_utils; 15 | use ml_utils::datasets::Flower; 16 | 17 | static FEATURE_DIM: i64 = 4; 18 | static HIDDEN_NODES: i64 = 10; 19 | static LABELS: i64 = 3; 20 | 21 | #[derive(Debug)] 22 | struct Net { 23 | fc1: nn::Linear, 24 | fc2: nn::Linear, 25 | } 26 | 27 | impl Net { 28 | fn new(vs: &nn::Path) -> Net { 29 | let fc1 = nn::Linear::new(vs, FEATURE_DIM, HIDDEN_NODES, Default::default()); 30 | let fc2 = nn::Linear::new(vs, HIDDEN_NODES, LABELS, Default::default()); 31 | Net { fc1, fc2 } 32 | } 33 | } 34 | 35 | impl Module for Net { 36 | fn forward(&self, xs: &Tensor) -> Tensor { 37 | xs.apply(&self.fc1).relu().apply(&self.fc2) 38 | } 39 | } 40 | 41 | pub fn run() -> Result<(), Box> { 42 | // Get all the data 43 | let mut rdr = csv::Reader::from_reader(io::stdin()); 44 | let mut data = Vec::new(); 45 | for result in rdr.deserialize() { 46 | let r: Flower = result?; 47 | data.push(r); // data contains all the records 48 | } 49 | 50 | // shuffle the data. 51 | data.shuffle(&mut thread_rng()); 52 | 53 | // separate out to train and test datasets. 54 | let test_size: f64 = 0.5; 55 | let test_size: f64 = data.len() as f64 * test_size; 56 | let test_size = test_size.round() as usize; 57 | 58 | let (test_data, train_data) = data.split_at(test_size); 59 | let train_size = train_data.len(); 60 | let test_size = test_data.len(); 61 | assert_eq!(train_size, test_size); 62 | 63 | // differentiate the features and the labels. 64 | // torch needs vectors in f64 65 | let flower_x_train: Vec = train_data.iter().flat_map(|r| r.into_feature_vector()).map(|x| x as f64).collect(); 66 | let flower_y_train: Vec = train_data.iter().map(|r| r.into_labels()).map(|x| x as f64).collect(); 67 | 68 | let flower_x_test: Vec = test_data.iter().flat_map(|r| r.into_feature_vector()).map(|x| x as f64).collect(); 69 | let flower_y_test: Vec = test_data.iter().map(|r| r.into_labels()).map(|x| x as f64).collect(); 70 | 71 | let flower_x_train = Tensor::float_vec(flower_x_train.as_slice()); 72 | let flower_y_train = Tensor::float_vec(flower_y_train.as_slice()).to_kind(Kind::Int64); 73 | let flower_x_test = Tensor::float_vec(flower_x_test.as_slice()); 74 | let flower_y_test = Tensor::float_vec(flower_y_test.as_slice()).to_kind(Kind::Int64); 75 | 76 | // print shape of all the data. 77 | println!("Training data shape {:?}", flower_x_train.size()); 78 | println!("Training flower_y_train data shape {:?}", flower_y_train.size()); 79 | 80 | // reshaping examples 81 | // one way to reshape is using unsqueeze 82 | //let flower_x_train1 = flower_x_train.unsqueeze(0); // Training data shape [1, 360] 83 | //println!("Training data shape {:?}", flower_x_train1.size()); 84 | let train_size = train_size as i64; 85 | let test_size = test_size as i64; 86 | let flower_x_train = flower_x_train.view(&[train_size, FEATURE_DIM]); 87 | let flower_x_test = flower_x_test.view(&[test_size, FEATURE_DIM]); 88 | let flower_y_train = flower_y_train.view(&[train_size]); 89 | let flower_y_test = flower_y_test.view(&[test_size]); 90 | 91 | // working on a linear neural network with SGD 92 | let vs = nn::VarStore::new(Device::Cpu); 93 | let net = Net::new(&vs.root()); 94 | let opt = nn::Adam::default().build(&vs, 1e-3)?; 95 | for epoch in 1..200 { 96 | let loss = net 97 | .forward(&flower_x_train) 98 | .cross_entropy_for_logits(&flower_y_train); 99 | opt.backward_step(&loss); 100 | let test_accuracy = net 101 | .forward(&flower_x_test) 102 | .accuracy_for_logits(&flower_y_test); 103 | println!( 104 | "epoch: {:4} train loss: {:8.5} test acc: {:5.2}%", 105 | epoch, 106 | f64::from(&loss), 107 | 100. * f64::from(&test_accuracy), 108 | ); 109 | }; 110 | 111 | 112 | let mut ws = Tensor::ones(&[FEATURE_DIM, 1], kind::FLOAT_CPU).set_requires_grad(true); 113 | let mut bs = Tensor::ones(&[train_size], kind::FLOAT_CPU).set_requires_grad(true); 114 | 115 | 116 | for epoch in 1..200 { 117 | let logits = flower_x_train.mm(&ws) + &bs; 118 | let loss = logits.squeeze().cross_entropy_for_logits(&flower_y_train); // since working on label encoded vectors. 119 | ws.zero_grad(); 120 | bs.zero_grad(); 121 | loss.backward(); 122 | no_grad(|| { 123 | ws += ws.grad() * (-1); 124 | bs += bs.grad() * (-1); 125 | }); 126 | let test_logits = flower_x_test.mm(&ws) + &bs; 127 | let test_accuracy = test_logits 128 | .argmax1(-1, false) 129 | .eq1(&flower_y_test) 130 | .to_kind(Kind::Float) 131 | .mean() 132 | .double_value(&[]); 133 | println!( 134 | "epoch: {:4} train loss: {:8.5} test acc: {:5.2}%", 135 | epoch, 136 | loss.double_value(&[]), 137 | 100. * test_accuracy 138 | ); 139 | } 140 | 141 | Ok(()) 142 | } 143 | -------------------------------------------------------------------------------- /chapter2/iris_classification_xgboost/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/practical-machine-learning-w-rust/b0fd379ee4f0f7bcd9276ae6d31576aa655b08d7/chapter2/iris_classification_xgboost/.DS_Store -------------------------------------------------------------------------------- /chapter2/iris_classification_xgboost/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "iris_classification_xgboost" 3 | version = "0.1.0" 4 | authors = ["joydeep bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | csv = "1.0.5" 9 | serde = "1.0.89" 10 | serde_derive = "1.0.89" 11 | rand = "0.6" 12 | xgboost = "0.1.4" 13 | ml-utils = { path = "../ml-utils" } -------------------------------------------------------------------------------- /chapter2/iris_classification_xgboost/iris.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,species 2 | 5.1,3.5,1.4,0.2,setosa 3 | 4.9,3.0,1.4,0.2,setosa 4 | 4.7,3.2,1.3,0.2,setosa 5 | 4.6,3.1,1.5,0.2,setosa 6 | 5.0,3.6,1.4,0.2,setosa 7 | 5.4,3.9,1.7,0.4,setosa 8 | 4.6,3.4,1.4,0.3,setosa 9 | 5.0,3.4,1.5,0.2,setosa 10 | 4.4,2.9,1.4,0.2,setosa 11 | 4.9,3.1,1.5,0.1,setosa 12 | 5.4,3.7,1.5,0.2,setosa 13 | 4.8,3.4,1.6,0.2,setosa 14 | 4.8,3.0,1.4,0.1,setosa 15 | 4.3,3.0,1.1,0.1,setosa 16 | 5.8,4.0,1.2,0.2,setosa 17 | 5.7,4.4,1.5,0.4,setosa 18 | 5.4,3.9,1.3,0.4,setosa 19 | 5.1,3.5,1.4,0.3,setosa 20 | 5.7,3.8,1.7,0.3,setosa 21 | 5.1,3.8,1.5,0.3,setosa 22 | 5.4,3.4,1.7,0.2,setosa 23 | 5.1,3.7,1.5,0.4,setosa 24 | 4.6,3.6,1.0,0.2,setosa 25 | 5.1,3.3,1.7,0.5,setosa 26 | 4.8,3.4,1.9,0.2,setosa 27 | 5.0,3.0,1.6,0.2,setosa 28 | 5.0,3.4,1.6,0.4,setosa 29 | 5.2,3.5,1.5,0.2,setosa 30 | 5.2,3.4,1.4,0.2,setosa 31 | 4.7,3.2,1.6,0.2,setosa 32 | 4.8,3.1,1.6,0.2,setosa 33 | 5.4,3.4,1.5,0.4,setosa 34 | 5.2,4.1,1.5,0.1,setosa 35 | 5.5,4.2,1.4,0.2,setosa 36 | 4.9,3.1,1.5,0.1,setosa 37 | 5.0,3.2,1.2,0.2,setosa 38 | 5.5,3.5,1.3,0.2,setosa 39 | 4.9,3.1,1.5,0.1,setosa 40 | 4.4,3.0,1.3,0.2,setosa 41 | 5.1,3.4,1.5,0.2,setosa 42 | 5.0,3.5,1.3,0.3,setosa 43 | 4.5,2.3,1.3,0.3,setosa 44 | 4.4,3.2,1.3,0.2,setosa 45 | 5.0,3.5,1.6,0.6,setosa 46 | 5.1,3.8,1.9,0.4,setosa 47 | 4.8,3.0,1.4,0.3,setosa 48 | 5.1,3.8,1.6,0.2,setosa 49 | 4.6,3.2,1.4,0.2,setosa 50 | 5.3,3.7,1.5,0.2,setosa 51 | 5.0,3.3,1.4,0.2,setosa 52 | 7.0,3.2,4.7,1.4,versicolor 53 | 6.4,3.2,4.5,1.5,versicolor 54 | 6.9,3.1,4.9,1.5,versicolor 55 | 5.5,2.3,4.0,1.3,versicolor 56 | 6.5,2.8,4.6,1.5,versicolor 57 | 5.7,2.8,4.5,1.3,versicolor 58 | 6.3,3.3,4.7,1.6,versicolor 59 | 4.9,2.4,3.3,1.0,versicolor 60 | 6.6,2.9,4.6,1.3,versicolor 61 | 5.2,2.7,3.9,1.4,versicolor 62 | 5.0,2.0,3.5,1.0,versicolor 63 | 5.9,3.0,4.2,1.5,versicolor 64 | 6.0,2.2,4.0,1.0,versicolor 65 | 6.1,2.9,4.7,1.4,versicolor 66 | 5.6,2.9,3.6,1.3,versicolor 67 | 6.7,3.1,4.4,1.4,versicolor 68 | 5.6,3.0,4.5,1.5,versicolor 69 | 5.8,2.7,4.1,1.0,versicolor 70 | 6.2,2.2,4.5,1.5,versicolor 71 | 5.6,2.5,3.9,1.1,versicolor 72 | 5.9,3.2,4.8,1.8,versicolor 73 | 6.1,2.8,4.0,1.3,versicolor 74 | 6.3,2.5,4.9,1.5,versicolor 75 | 6.1,2.8,4.7,1.2,versicolor 76 | 6.4,2.9,4.3,1.3,versicolor 77 | 6.6,3.0,4.4,1.4,versicolor 78 | 6.8,2.8,4.8,1.4,versicolor 79 | 6.7,3.0,5.0,1.7,versicolor 80 | 6.0,2.9,4.5,1.5,versicolor 81 | 5.7,2.6,3.5,1.0,versicolor 82 | 5.5,2.4,3.8,1.1,versicolor 83 | 5.5,2.4,3.7,1.0,versicolor 84 | 5.8,2.7,3.9,1.2,versicolor 85 | 6.0,2.7,5.1,1.6,versicolor 86 | 5.4,3.0,4.5,1.5,versicolor 87 | 6.0,3.4,4.5,1.6,versicolor 88 | 6.7,3.1,4.7,1.5,versicolor 89 | 6.3,2.3,4.4,1.3,versicolor 90 | 5.6,3.0,4.1,1.3,versicolor 91 | 5.5,2.5,4.0,1.3,versicolor 92 | 5.5,2.6,4.4,1.2,versicolor 93 | 6.1,3.0,4.6,1.4,versicolor 94 | 5.8,2.6,4.0,1.2,versicolor 95 | 5.0,2.3,3.3,1.0,versicolor 96 | 5.6,2.7,4.2,1.3,versicolor 97 | 5.7,3.0,4.2,1.2,versicolor 98 | 5.7,2.9,4.2,1.3,versicolor 99 | 6.2,2.9,4.3,1.3,versicolor 100 | 5.1,2.5,3.0,1.1,versicolor 101 | 5.7,2.8,4.1,1.3,versicolor 102 | 6.3,3.3,6.0,2.5,virginica 103 | 5.8,2.7,5.1,1.9,virginica 104 | 7.1,3.0,5.9,2.1,virginica 105 | 6.3,2.9,5.6,1.8,virginica 106 | 6.5,3.0,5.8,2.2,virginica 107 | 7.6,3.0,6.6,2.1,virginica 108 | 4.9,2.5,4.5,1.7,virginica 109 | 7.3,2.9,6.3,1.8,virginica 110 | 6.7,2.5,5.8,1.8,virginica 111 | 7.2,3.6,6.1,2.5,virginica 112 | 6.5,3.2,5.1,2.0,virginica 113 | 6.4,2.7,5.3,1.9,virginica 114 | 6.8,3.0,5.5,2.1,virginica 115 | 5.7,2.5,5.0,2.0,virginica 116 | 5.8,2.8,5.1,2.4,virginica 117 | 6.4,3.2,5.3,2.3,virginica 118 | 6.5,3.0,5.5,1.8,virginica 119 | 7.7,3.8,6.7,2.2,virginica 120 | 7.7,2.6,6.9,2.3,virginica 121 | 6.0,2.2,5.0,1.5,virginica 122 | 6.9,3.2,5.7,2.3,virginica 123 | 5.6,2.8,4.9,2.0,virginica 124 | 7.7,2.8,6.7,2.0,virginica 125 | 6.3,2.7,4.9,1.8,virginica 126 | 6.7,3.3,5.7,2.1,virginica 127 | 7.2,3.2,6.0,1.8,virginica 128 | 6.2,2.8,4.8,1.8,virginica 129 | 6.1,3.0,4.9,1.8,virginica 130 | 6.4,2.8,5.6,2.1,virginica 131 | 7.2,3.0,5.8,1.6,virginica 132 | 7.4,2.8,6.1,1.9,virginica 133 | 7.9,3.8,6.4,2.0,virginica 134 | 6.4,2.8,5.6,2.2,virginica 135 | 6.3,2.8,5.1,1.5,virginica 136 | 6.1,2.6,5.6,1.4,virginica 137 | 7.7,3.0,6.1,2.3,virginica 138 | 6.3,3.4,5.6,2.4,virginica 139 | 6.4,3.1,5.5,1.8,virginica 140 | 6.0,3.0,4.8,1.8,virginica 141 | 6.9,3.1,5.4,2.1,virginica 142 | 6.7,3.1,5.6,2.4,virginica 143 | 6.9,3.1,5.1,2.3,virginica 144 | 5.8,2.7,5.1,1.9,virginica 145 | 6.8,3.2,5.9,2.3,virginica 146 | 6.7,3.3,5.7,2.5,virginica 147 | 6.7,3.0,5.2,2.3,virginica 148 | 6.3,2.5,5.0,1.9,virginica 149 | 6.5,3.0,5.2,2.0,virginica 150 | 6.2,3.4,5.4,2.3,virginica 151 | 5.9,3.0,5.1,1.8,virginica 152 | -------------------------------------------------------------------------------- /chapter2/iris_classification_xgboost/src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate serde; 2 | // This lets us write `#[derive(Deserialize)]`. 3 | #[macro_use] 4 | extern crate serde_derive; 5 | 6 | use std::io; 7 | use std::process; 8 | use std::vec::Vec; 9 | use std::error::Error; 10 | 11 | use csv; 12 | use rand; 13 | use rand::thread_rng; 14 | use rand::seq::SliceRandom; 15 | 16 | use xgboost; 17 | use xgboost::{parameters, DMatrix, Booster}; 18 | 19 | use ml_utils::datasets::Flower; 20 | 21 | fn main() { 22 | if let Err(err) = read_csv() { 23 | println!("{}", err); 24 | process::exit(1); 25 | } 26 | } 27 | 28 | fn read_csv() -> Result<(), Box> { 29 | // Get all the data 30 | let mut rdr = csv::Reader::from_reader(io::stdin()); 31 | let mut data = Vec::new(); 32 | for result in rdr.deserialize() { 33 | let r: Flower = result?; 34 | data.push(r); // data contains all the records 35 | } 36 | 37 | // shuffle the data. 38 | data.shuffle(&mut thread_rng()); 39 | 40 | // separate out to train and test datasets. 41 | let test_size: f32 = 0.2; 42 | let test_size: f32 = data.len() as f32 * test_size; 43 | let test_size = test_size.round() as usize; 44 | // we are keeping the val size to be the same as test_size. 45 | // this can be changed if required 46 | let val_size = test_size.clone(); 47 | 48 | let (test_data, train_and_val_data) = data.split_at(test_size); 49 | let (val_data, train_data) = train_and_val_data.split_at(val_size); 50 | let train_size = train_data.len(); 51 | let test_size = test_data.len(); 52 | let val_size = val_data.len(); 53 | 54 | // differentiate the features and the labels. 55 | let flower_x_train: Vec = train_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 56 | let flower_y_train: Vec = train_data.iter().map(|r| r.into_labels()).collect(); 57 | 58 | let flower_x_test: Vec = test_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 59 | let flower_y_test: Vec = test_data.iter().map(|r| r.into_labels()).collect(); 60 | 61 | let flower_x_val: Vec = val_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 62 | let flower_y_val: Vec = val_data.iter().map(|r| r.into_labels()).collect(); 63 | 64 | // convert training data into XGBoost's matrix format 65 | let mut dtrain = DMatrix::from_dense(&flower_x_train, train_size).unwrap(); 66 | 67 | // set ground truth labels for the training matrix 68 | dtrain.set_labels(&flower_y_train).unwrap(); 69 | 70 | // test matrix with 1 row 71 | let mut dtest = DMatrix::from_dense(&flower_x_test, test_size).unwrap(); 72 | dtest.set_labels(&flower_y_test).unwrap(); 73 | 74 | // validation matrix with 1 row 75 | let mut dval = DMatrix::from_dense(&flower_x_val, val_size).unwrap(); 76 | dval.set_labels(&flower_y_val).unwrap(); 77 | 78 | // configure objectives, metrics, etc. 79 | let learning_params = parameters::learning::LearningTaskParametersBuilder::default() 80 | .objective(parameters::learning::Objective::MultiSoftmax(3)) 81 | .build().unwrap(); 82 | 83 | // configure the tree-based learning model's parameters 84 | let tree_params = parameters::tree::TreeBoosterParametersBuilder::default() 85 | .max_depth(2) 86 | .eta(1.0) 87 | .build().unwrap(); 88 | 89 | // overall configuration for Booster 90 | let booster_params = parameters::BoosterParametersBuilder::default() 91 | .booster_type(parameters::BoosterType::Tree(tree_params)) 92 | .learning_params(learning_params) 93 | .verbose(true) 94 | .build().unwrap(); 95 | 96 | // specify datasets to evaluate against during training 97 | let evaluation_sets = &[(&dtrain, "train"), (&dtest, "test")]; 98 | 99 | // overall configuration for training/evaluation 100 | let params = parameters::TrainingParametersBuilder::default() 101 | .dtrain(&dtrain) // dataset to train with 102 | .boost_rounds(2) // number of training iterations 103 | .booster_params(booster_params) // model parameters 104 | .evaluation_sets(Some(evaluation_sets)) // optional datasets to evaluate against in each iteration 105 | .build().unwrap(); 106 | 107 | // train model, and print evaluation data 108 | let booster = Booster::train(¶ms).unwrap(); 109 | 110 | // get predictions 111 | let preds = booster.predict(&dval).unwrap(); 112 | println!("preds: {:?}", preds); 113 | 114 | // true values 115 | let labels = dval.get_labels().unwrap(); 116 | println!("{:?}", labels); 117 | 118 | // find the accuracy 119 | let mut hits = 0; 120 | let mut correct_hits = 0; 121 | for (predicted, actual) in preds.iter().zip(labels.iter()) { 122 | if predicted == actual { 123 | correct_hits += 1; 124 | } 125 | hits += 1; 126 | } 127 | assert_eq!(hits, preds.len()); 128 | println!("accuracy={} ({}/{} correct)", correct_hits as f32 / hits as f32, correct_hits, preds.len()); 129 | 130 | Ok(()) 131 | } -------------------------------------------------------------------------------- /chapter2/kmeans_rusty_machine/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "linear_reg_rustymachine" 3 | version = "0.1.0" 4 | authors = ["joydeep bhattacharjee "] 5 | 6 | [dependencies] 7 | rusty-machine="0.5.4" 8 | rand = "0.6.5" 9 | ml-utils = { path = "../ml-utils" } 10 | csv = "1.0.7" -------------------------------------------------------------------------------- /chapter2/kmeans_rusty_machine/src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate rusty_machine; 2 | extern crate rand; 3 | 4 | use rusty_machine::linalg::{Matrix, BaseMatrix}; 5 | use rusty_machine::learning::k_means::KMeansClassifier; 6 | use rusty_machine::learning::UnSupModel; 7 | 8 | use rand::thread_rng; 9 | use rand::distributions::IndependentSample; 10 | use rand::distributions::normal::Normal; 11 | 12 | use csv; 13 | use ml_utils; 14 | use ml_utils::datasets::Flower; 15 | 16 | // fn generate_data(centroids: &Matrix, 17 | // points_per_centroid: usize, 18 | // noise: f64) 19 | // -> Matrix { 20 | // assert!(centroids.cols() > 0, "Centroids cannot be empty."); 21 | // assert!(centroids.rows() > 0, "Centroids cannot be empty."); 22 | // assert!(noise >= 0f64, "Noise must be non-negative."); 23 | // let mut raw_cluster_data = Vec::with_capacity(centroids.rows() * points_per_centroid * 24 | // centroids.cols()); 25 | 26 | // let mut rng = thread_rng(); 27 | // let normal_rv = Normal::new(0f64, noise); 28 | 29 | // for _ in 0..points_per_centroid { 30 | // // Generate points from each centroid 31 | // for centroid in centroids.row_iter() { 32 | // // Generate a point randomly around the centroid 33 | // let mut point = Vec::with_capacity(centroids.cols()); 34 | // for feature in centroid.iter() { 35 | // point.push(feature + normal_rv.ind_sample(&mut rng)); 36 | // } 37 | 38 | // // Push point to raw_cluster_data 39 | // raw_cluster_data.extend(point); 40 | // } 41 | // } 42 | 43 | // Matrix::new(centroids.rows() * points_per_centroid, 44 | // centroids.cols(), 45 | // raw_cluster_data) 46 | // } 47 | 48 | fn main() { 49 | let data = "sepal_length,sepal_width,petal_length,petal_width,species\n5.1,3.5,1.4,0.2,setosa\n"; 50 | let mut rdr = csv::Reader::from_reader(data.as_bytes()); 51 | let mut data = Vec::new(); 52 | for result in rdr.deserialize() { 53 | let r: Flower = result.unwrap(); 54 | data.push(r); // data contains all the records 55 | } 56 | 57 | let flower_x_train: Vec = data.iter().map(|r| r.into_features()).collect(); 58 | let flower_x_train = Matrix::new(flower_x_train.len, 4, flower_x_train); 59 | 60 | 61 | println!("K-Means clustering example:"); 62 | 63 | const SAMPLES_PER_CENTROID: usize = 2000; 64 | 65 | println!("Generating {0} samples from each centroids:", 66 | SAMPLES_PER_CENTROID); 67 | // Choose two cluster centers, at (-0.5, -0.5) and (0, 0.5). 68 | let centroids = Matrix::new(2, 4, vec![-0.5, -0.5, -0.5, 0.0, 0.5, 0.0, 0.0, 0.0]); 69 | println!("{}", centroids); 70 | 71 | // // Generate some data randomly around the centroids 72 | // let samples = generate_data(¢roids, SAMPLES_PER_CENTROID, 0.4); 73 | 74 | // Create a new model with 2 clusters 75 | let mut model = KMeansClassifier::new(2); 76 | 77 | // Train the model 78 | println!("Training the model..."); 79 | // Our train function returns a Result<(), E> 80 | model.train(&flower_x_train).unwrap(); 81 | 82 | let centroids = model.centroids().as_ref().unwrap(); 83 | println!("Model Centroids:\n{:.3}", centroids); 84 | 85 | // Predict the classes and partition into 86 | println!("Classifying the samples..."); 87 | let classes = model.predict(&flower_x_train).unwrap(); 88 | let (first, second): (Vec, Vec) = classes.data().iter().partition(|&x| *x == 0); 89 | 90 | println!("Samples closest to first centroid: {}", first.len()); 91 | println!("Samples closest to second centroid: {}", second.len()); 92 | } 93 | -------------------------------------------------------------------------------- /chapter2/logistic_regression_rustlearn/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "logistic_regression" 3 | version = "0.1.0" 4 | authors = ["joydeep bhattacharjee "] 5 | 6 | [dependencies] 7 | rustlearn = "0.3.0" 8 | bincode = "1.0.1" 9 | -------------------------------------------------------------------------------- /chapter2/logistic_regression_rustlearn/src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate rustlearn; 2 | extern crate bincode; 3 | 4 | use std::fs::File; 5 | use std::io::prelude::*; 6 | 7 | use rustlearn::prelude::*; 8 | use rustlearn::linear_models::sgdclassifier::Hyperparameters; 9 | use rustlearn::cross_validation::CrossValidation; 10 | use rustlearn::datasets::iris; 11 | use rustlearn::metrics::accuracy_score; 12 | use bincode::{serialize, deserialize}; 13 | 14 | fn main() -> std::io::Result<()> { 15 | let (X, y) = iris::load_data(); 16 | let num_splits = 10; 17 | let num_epochs = 5; 18 | let mut accuracy = 0.0; 19 | let mut model = Hyperparameters::new(X.cols()) 20 | .learning_rate(0.5) 21 | .l2_penalty(0.0) 22 | .l1_penalty(0.0) 23 | .one_vs_rest(); 24 | 25 | for (train_idx, test_idx) in CrossValidation::new(X.rows(), num_splits) { 26 | let X_train = X.get_rows(&train_idx); 27 | let y_train = y.get_rows(&train_idx); 28 | let X_test = X.get_rows(&test_idx); 29 | let y_test = y.get_rows(&test_idx); 30 | 31 | for _ in 0..num_epochs { 32 | model.fit(&X_train, &y_train).unwrap(); 33 | } 34 | let prediction = model.predict(&X_test).unwrap(); 35 | let present_acc = accuracy_score(&y_test, &prediction); 36 | accuracy += present_acc; 37 | } 38 | println!("accuracy: {:#?}", accuracy / num_splits as f32); 39 | 40 | // serialise the library 41 | //let encoded = serialize(&model).unwrap(); 42 | println!("{:?}", model); 43 | //let mut file = File::create("foo.txt")?; 44 | //file.write_all(encoded)?; 45 | Ok(()) 46 | } 47 | -------------------------------------------------------------------------------- /chapter2/ml-utils/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ml-utils" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | serde = "1" 9 | serde_derive = "1" 10 | rand = "0.6.5" 11 | ndarray = "0.12.1" 12 | itertools = "0.8.0" 13 | num-traits = "0.2.6" 14 | 15 | [dev-dependencies] 16 | csv = "1.0.6" -------------------------------------------------------------------------------- /chapter2/ml-utils/calc_prob.py: -------------------------------------------------------------------------------- 1 | from math import lgamma 2 | from numba import jit 3 | import numpy as np 4 | 5 | #defining the functions used 6 | @jit 7 | def h(a, b, c, d): 8 | num = lgamma(a + c) + lgamma(b + d) + lgamma(a + b) + lgamma(c + d) 9 | den = lgamma(a) + lgamma(b) + lgamma(c) + lgamma(d) + lgamma(a + b + c + d) 10 | return np.exp(num - den) 11 | 12 | @jit 13 | def g0(a, b, c): 14 | return np.exp(lgamma(a + b) + lgamma(a + c) - (lgamma(a + b + c) + lgamma(a))) 15 | 16 | @jit 17 | def hiter(a, b, c, d): 18 | while d > 1: 19 | d -= 1 20 | yield h(a, b, c, d) / d 21 | 22 | def g(a, b, c, d): 23 | return g0(a, b, c) + sum(hiter(a, b, c, d)) 24 | 25 | def calc_prob_between(beta1, beta2): 26 | return g(beta1.args[0], beta1.args[1], beta2.args[0], beta2.args[1]) 27 | -------------------------------------------------------------------------------- /chapter2/ml-utils/clusim stuff.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 14, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import clusim.clugen as clugen\n", 10 | "from clusim.sim import contingency_table\n", 11 | "from clusim.clustering import print_clustering\n", 12 | "from clusim.sim import count_pairwise_cooccurence\n", 13 | "from clusim.sim import jaccard_index, rand_index, expected_rand_index" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "c1 = clugen.make_random_clustering(n_elements=9, n_clusters=3, random_model=\"num\")\n", 23 | "c2 = clugen.make_random_clustering(n_elements=9, n_clusters=3, random_model=\"num\")" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 4, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "data": { 33 | "text/plain": [ 34 | "[[2, 1, 1], [0, 1, 1], [1, 2, 0]]" 35 | ] 36 | }, 37 | "execution_count": 4, 38 | "metadata": {}, 39 | "output_type": "execute_result" 40 | } 41 | ], 42 | "source": [ 43 | "contingency_table(c1, c2)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 7, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/plain": [ 54 | "(2.0, 8.0, 8.0, 18.0)" 55 | ] 56 | }, 57 | "execution_count": 7, 58 | "metadata": {}, 59 | "output_type": "execute_result" 60 | } 61 | ], 62 | "source": [ 63 | "count_pairwise_cooccurence(c1, c2)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 8, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "name": "stdout", 73 | "output_type": "stream", 74 | "text": [ 75 | "0837|15|246\n" 76 | ] 77 | } 78 | ], 79 | "source": [ 80 | "print_clustering(c1)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 9, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | "047|1236|85\n" 93 | ] 94 | } 95 | ], 96 | "source": [ 97 | "print_clustering(c2)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 11, 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "data": { 107 | "text/plain": [ 108 | "0.1111111111111111" 109 | ] 110 | }, 111 | "execution_count": 11, 112 | "metadata": {}, 113 | "output_type": "execute_result" 114 | } 115 | ], 116 | "source": [ 117 | "jaccard_index(c1, c2)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 13, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "data": { 127 | "text/plain": [ 128 | "0.5555555555555556" 129 | ] 130 | }, 131 | "execution_count": 13, 132 | "metadata": {}, 133 | "output_type": "execute_result" 134 | } 135 | ], 136 | "source": [ 137 | "rand_index(c1, c2)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 15, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "ename": "UnboundLocalError", 147 | "evalue": "local variable 'expected' referenced before assignment", 148 | "output_type": "error", 149 | "traceback": [ 150 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 151 | "\u001b[0;31mUnboundLocalError\u001b[0m Traceback (most recent call last)", 152 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mexpected_rand_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 153 | "\u001b[0;32m~/opensource/programming-languages/rust-lang/ml-utils/venv/lib/python3.5/site-packages/clusim/sim.py\u001b[0m in \u001b[0;36mexpected_rand_index\u001b[0;34m(n_elements, random_model, n_clusters1, n_clusters2, clu_size_seq1, clu_size_seq2)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 322\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 323\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat64\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpected\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 324\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 325\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 154 | "\u001b[0;31mUnboundLocalError\u001b[0m: local variable 'expected' referenced before assignment" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "expected_rand_index(c1, c2)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [] 168 | } 169 | ], 170 | "metadata": { 171 | "kernelspec": { 172 | "display_name": "Python 3", 173 | "language": "python", 174 | "name": "python3" 175 | }, 176 | "language_info": { 177 | "codemirror_mode": { 178 | "name": "ipython", 179 | "version": 3 180 | }, 181 | "file_extension": ".py", 182 | "mimetype": "text/x-python", 183 | "name": "python", 184 | "nbconvert_exporter": "python", 185 | "pygments_lexer": "ipython3", 186 | "version": "3.5.2" 187 | } 188 | }, 189 | "nbformat": 4, 190 | "nbformat_minor": 2 191 | } 192 | -------------------------------------------------------------------------------- /chapter2/ml-utils/easy_bayesian_AB.py: -------------------------------------------------------------------------------- 1 | # reference https://towardsdatascience.com/bayesian-a-b-testing-with-python-the-easy-guide-d638f89e0b8a 2 | from scipy.stats import beta 3 | import numpy as np 4 | from calc_prob import calc_prob_between 5 | 6 | #This is the known data: imporessions and conversions for the Control and Test set 7 | imps_ctrl,convs_ctrl=16500, 30 8 | imps_test, convs_test=17000, 50 9 | 10 | #here we create the Beta functions for the two sets 11 | a_C, b_C = convs_ctrl+1, imps_ctrl-convs_ctrl+1 12 | beta_C = beta(a_C, b_C) 13 | a_T, b_T = convs_test+1, imps_test-convs_test+1 14 | beta_T = beta(a_T, b_T) 15 | 16 | #calculating the lift 17 | lift=(beta_T.mean()-beta_C.mean())/beta_C.mean() 18 | 19 | #calculating the probability for Test to be better than Control 20 | prob=calc_prob_between(beta_T, beta_C) 21 | 22 | print (f"Test option lift Conversion Rates by {lift*100:2.2f}% with {prob*100:2.1f}% probability.") 23 | #output: Test option lift Conversion Rates by 59.68% with 98.2% probability. 24 | -------------------------------------------------------------------------------- /chapter2/ml-utils/examples/measures.rs: -------------------------------------------------------------------------------- 1 | use std::vec::Vec; 2 | use std::collections::HashSet; 3 | 4 | use ml_utils as ml; 5 | use ml::unsup_metrics::{jaccard_index, hashset, rand_index}; 6 | // use ml::{jaccard_index, hashset, rand_index}; 7 | 8 | fn main() { 9 | // let clusters1 = vec![vec![0u8,8,3, 7], vec![1u8,5], vec![2u8, 4, 6]]; 10 | // let clusters1: Vec> = clusters1.iter().map( 11 | // |v| hashset(&v)).collect(); 12 | // let clusters2 = vec![vec![0u8,4, 7], vec![1u8,2,3, 6], vec![8u8,5]]; 13 | // let clusters2: Vec> = clusters2.iter().map( 14 | // |v| hashset(&v)).collect(); 15 | 16 | // let ji = jaccard_index(&clusters1, &clusters2); 17 | // println!("jaccard index: {:?}", ji); 18 | 19 | // let ri = rand_index(&clusters1, &clusters2); 20 | // println!("{:?}", ri); 21 | println!("change the example"); 22 | } -------------------------------------------------------------------------------- /chapter2/ml-utils/src/hypothesis_testing.rs: -------------------------------------------------------------------------------- 1 | use std::vec::Vec; 2 | use std::collections::HashMap; 3 | // use std::io::Error; 4 | use std::error::Error; 5 | 6 | use rand; 7 | use rand::distributions::{Bernoulli, Distribution}; 8 | 9 | #[derive(Debug, PartialEq, Eq, Hash)] 10 | enum User { 11 | Group, 12 | Converted, 13 | } 14 | 15 | fn generate_data(control_size: u32, test_size: u32, p_control: f64, p_test: f64) -> Vec> { 16 | // initiate empty container. 17 | let mut data = vec![]; 18 | 19 | let total = control_size + test_size; 20 | 21 | let group_bern = Bernoulli::new(0.5); // we need to divide the whole population equally 22 | 23 | let control_bern = Bernoulli::new(p_control); 24 | let test_bern = Bernoulli::new(p_test); 25 | 26 | for _ in 0..total { 27 | 28 | let mut row = HashMap::new(); 29 | let v = group_bern.sample(&mut rand::thread_rng()); 30 | row.insert(User::Group, v); 31 | 32 | let converted_v = match v { 33 | // true means control and false means test 34 | true => control_bern.sample(&mut rand::thread_rng()), 35 | false => test_bern.sample(&mut rand::thread_rng()), 36 | }; 37 | row.insert(User::Converted, converted_v); 38 | data.push(row); 39 | } 40 | data 41 | } 42 | 43 | fn find_rate_difference(data: &Vec>) -> Result> { 44 | let mut total_control_groups: usize = 0; 45 | let mut converted_control_group: usize = 0; 46 | let mut converted_test_group: usize = 0; 47 | for d in data { 48 | let user_group = d.get(&User::Group) 49 | .expect("data must have group and converted"); 50 | let user_conversion = d.get(&User::Converted) 51 | .expect("data must have group and converted"); 52 | if user_group == &true { 53 | total_control_groups += 1; 54 | if user_conversion == &true { 55 | converted_control_group += 1; 56 | } 57 | } else { 58 | if user_conversion == &true { 59 | converted_test_group += 1; 60 | } 61 | } 62 | } 63 | let total_test_group = data.len() - total_control_groups; 64 | let control_rate = converted_control_group as f64/total_control_groups as f64; 65 | let test_rate = converted_test_group as f64/total_test_group as f64; 66 | Ok(test_rate - control_rate) 67 | } 68 | 69 | fn main() { 70 | // A is control and B is test 71 | let control_size = 1000; 72 | let test_size = 1000; 73 | 74 | let bcr = 0.10; // baseline conversion rate 75 | let d_hat = 0.02; // difference between the groups 76 | let data = generate_data(control_size, test_size, bcr, bcr + d_hat); // we want data that is a little better than baseline. 77 | println!("{:?}", data); 78 | 79 | let x = find_rate_difference(&data); 80 | println!("{:?}", x); 81 | } 82 | 83 | #[cfg(test)] 84 | mod tests { 85 | use super::*; 86 | 87 | #[test] 88 | fn test_generate_data() { 89 | let data = generate_data(10, 10, 0.1, 0.02); 90 | assert_eq!(data.len(), 20); 91 | assert_eq!(data[0].contains_key(&User::Group), true); 92 | } 93 | 94 | #[test] 95 | fn test_find_rate_difference() { 96 | let mut data = vec![]; 97 | let data1: HashMap<_, _> = vec![(User::Group, false), (User::Converted, false)].into_iter().collect(); 98 | data.push(data1); 99 | let data2: HashMap<_, _> = vec![(User::Group, true), (User::Converted, true)].into_iter().collect(); 100 | data.push(data2); 101 | let res = find_rate_difference(&data).unwrap(); 102 | assert_eq!(res, -1.0); 103 | } 104 | } -------------------------------------------------------------------------------- /chapter2/ml-utils/src/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate serde; 2 | // This lets us write `#[derive(Deserialize)]`. 3 | #[macro_use] 4 | extern crate serde_derive; 5 | 6 | pub mod unsup_metrics; 7 | pub mod sup_metrics; 8 | pub mod datasets; -------------------------------------------------------------------------------- /chapter2/ml-utils/src/sup_metrics.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::Ordering; 2 | 3 | // for regression 4 | pub fn r_squared_score(y_test: &[f64], y_preds: &[f64]) -> f64 { 5 | let model_variance: f64 = y_test.iter().zip(y_preds.iter()).fold( 6 | 0., |v, (y_i, y_i_hat)| { 7 | v + (y_i - y_i_hat).powi(2) 8 | } 9 | ); 10 | 11 | // get the mean for the actual values to be used later 12 | let y_test_mean = y_test.iter().sum::() as f64 13 | / y_test.len() as f64; 14 | 15 | // finding the variance 16 | let variance = y_test.iter().fold( 17 | 0., |v, &x| {v + (x - y_test_mean).powi(2)} 18 | ); 19 | let r2_calculated: f64 = 1.0 - (model_variance / variance); 20 | r2_calculated 21 | } 22 | 23 | // for classification 24 | pub fn accuracy(y_test: &[u32], y_preds: &[u32]) -> f32 { 25 | let mut correct_hits = 0; 26 | for (predicted, actual) in y_preds.iter().zip(y_test.iter()) { 27 | if predicted == actual { 28 | correct_hits += 1; 29 | } 30 | } 31 | let acc: f32 = correct_hits as f32 / y_test.len() as f32; 32 | acc 33 | } 34 | 35 | pub fn logloss_score(y_test: &[f32], y_preds: &[f32], eps: f32) -> f32 { 36 | // complete this http://wiki.fast.ai/index.php/Log_Loss#Log_Loss_vs_Cross-Entropy 37 | let y_preds = y_preds.iter().map(|&p| { 38 | match p.partial_cmp(&(1.0 - eps)) { 39 | Some(Ordering::Less) => p, 40 | _ => 1.0 - eps, // if equal or greater. 41 | } 42 | }); 43 | let y_preds = y_preds.map(|p| { 44 | match p.partial_cmp(&eps) { 45 | Some(Ordering::Less) => eps, 46 | _ => p, 47 | } 48 | }); 49 | 50 | // Now compute the logloss 51 | let logloss_vals = y_preds.zip(y_test.iter()).map(|(predicted, &actual)| { 52 | if actual as f32 == 1.0 { 53 | (-1.0) * predicted.ln() 54 | } else if actual as f32 == 0.0 { 55 | (-1.0) * (1.0 - predicted).ln() 56 | } else { 57 | panic!("Invalid labels: target data is not either 0.0 or 1.0"); 58 | } 59 | }); 60 | logloss_vals.sum() 61 | } -------------------------------------------------------------------------------- /chapter2/ml-utils/src/unsup_metrics.rs: -------------------------------------------------------------------------------- 1 | use std::vec::Vec; 2 | use std::collections::HashSet; 3 | // use std::io::Error; 4 | use std::error::Error; 5 | use std::cmp::Ordering; 6 | use std::iter::FromIterator; 7 | 8 | use rand; 9 | use rand::distributions::{Bernoulli, Distribution}; 10 | use itertools; 11 | use itertools::iproduct; 12 | use itertools::Itertools; 13 | use ndarray; 14 | use ndarray::{arr2, Array, ArrayBase, OwnedRepr, Dim, Axis}; 15 | use ndarray::prelude::*; 16 | 17 | fn matching_elems_count(s1: &HashSet, s2: &HashSet) -> u64 { 18 | let common: Vec<_> = s1.intersection(s2).collect(); 19 | common.len() as u64 20 | } 21 | 22 | fn contingency_table(clusters1: &[HashSet], clusters2: &[HashSet]) -> ArrayBase, Dim<[usize; 2]>> { 23 | let length = clusters1.len(); 24 | assert!(length == clusters2.len()); 25 | let product = iproduct!(clusters1, clusters2); 26 | let cont_table_vec: Vec = product.map( 27 | |(c1, c2)| matching_elems_count(c1, c2) 28 | ).collect(); 29 | // println!("{:?}", cont_table_vec); 30 | let cont_table_mat = Array::from_shape_vec((3, 3), cont_table_vec).unwrap(); 31 | cont_table_mat 32 | // let v_chunked: Vec> = cont_table_vec.chunks(length).map(|x| x.to_vec()).collect(); 33 | // v_chunked 34 | } 35 | 36 | fn cluster_size_sequence_sqsum(clusters: &[HashSet]) -> u64 { 37 | let cluster1_size_seq: Vec = clusters.iter().map( 38 | |v| v.len() as u64).collect(); 39 | let squares = cluster1_size_seq.iter().map( 40 | |num| num.pow(2) 41 | ); 42 | squares.sum() 43 | } 44 | 45 | fn elements_in_vectr(vectr: &[HashSet]) -> u64 { 46 | let flatten_array: Vec = vectr 47 | .iter() 48 | .flat_map(|array| array.iter()) 49 | .cloned() 50 | .collect(); 51 | flatten_array.len() as u64 52 | 53 | } 54 | 55 | fn count_pairwise_cooccurence(clusters1: &[HashSet], clusters2: &[HashSet]) -> (f64, f64, f64, f64) { 56 | let cont_tbl = contingency_table(&clusters1, &clusters2); 57 | // println!("{:?}", cont_tbl); 58 | 59 | let square_matrix = cont_tbl.mapv(|a| a.pow(2)); 60 | // println!("{:?}", square_matrix); 61 | let sum_of_squares1 = square_matrix.into_raw_vec(); 62 | let sum_of_squares: u64 = sum_of_squares1.iter().sum(); 63 | // println!("{:?}", sum_of_squares); 64 | let c1_sum_sq_sizes = cluster_size_sequence_sqsum(clusters1); 65 | let c2_sum_sq_sizes = cluster_size_sequence_sqsum(clusters2); 66 | // println!("{:?}", c1_sum_sq_sizes); 67 | 68 | let c1_elements_count = elements_in_vectr(clusters1); 69 | let n11 = 0.5 * (sum_of_squares - c1_elements_count) as f64; 70 | // println!("{:?}", n11); 71 | let n10 = 0.5 * (c1_sum_sq_sizes - sum_of_squares) as f64; 72 | let n01 = 0.5 * (c2_sum_sq_sizes - sum_of_squares) as f64; 73 | let n00 = 0.5 * c1_elements_count as f64 * (c1_elements_count - 1) as f64 - n11 - n10 - n01; 74 | (n11, n10, n01, n00) 75 | } 76 | 77 | pub fn hashset(data: &[u64]) -> HashSet { 78 | HashSet::from_iter(data.iter().cloned()) 79 | } 80 | 81 | pub fn jaccard_index(clusters1: &[HashSet], clusters2: &[HashSet]) -> f64 { 82 | let (n11, n10, n01, n00) = count_pairwise_cooccurence(clusters1, clusters2); 83 | // println!("{:?}", (n11, n10, n01, n00)); 84 | let denominator = n11 + n10 + n01; 85 | if denominator > 0.0 { 86 | return n11 / denominator; 87 | } else { 88 | 0.0 89 | } 90 | } 91 | 92 | pub fn rand_index(clusters1: &[HashSet], clusters2: &[HashSet]) -> f64 { 93 | let (n11, n10, n01, n00) = count_pairwise_cooccurence(clusters1, clusters2); 94 | (n11 + n00) / (n11 + n10 + n01 + n00) 95 | } 96 | 97 | #[cfg(test)] 98 | mod tests { 99 | use super::*; 100 | 101 | fn generate_data() -> (Vec>, Vec>) { 102 | let clusters1 = vec![vec![0u64,8,3, 7], vec![1u64,5], vec![2u64, 4, 6]]; 103 | let clusters1: Vec> = clusters1.iter().map( 104 | |v| hashset(&v)).collect(); 105 | let clusters2 = vec![vec![0u64,4, 7], vec![1u64,2,3, 6], vec![8u64,5]]; 106 | let clusters2: Vec> = clusters2.iter().map( 107 | |v| hashset(&v)).collect(); 108 | (clusters1, clusters2) 109 | } 110 | 111 | #[test] 112 | fn test_contingency_table() { 113 | let (clusters1, clusters2) = generate_data(); 114 | let table = contingency_table(&clusters1, &clusters2); 115 | println!("{:?}", table); 116 | let table2: Vec = [2, 1, 1, 0, 1, 1, 1, 2, 0].to_vec(); 117 | let table3 = Array::from_shape_vec((3,3), table2).unwrap(); 118 | assert_eq!(table, table3); 119 | } 120 | 121 | #[test] 122 | fn test_matching_elems_count() { 123 | let (clusters1, clusters2) = generate_data(); 124 | let s1 = &clusters1[0]; 125 | let s2 = &clusters2[0]; 126 | let res = matching_elems_count(&s1, &s2); 127 | assert_eq!(res, 2); 128 | } 129 | 130 | #[test] 131 | fn test_cluster_size_sequence_sqsum() { 132 | let (clusters1, _) = generate_data(); 133 | let res = cluster_size_sequence_sqsum(&clusters1); 134 | assert_eq!(res, 29); 135 | } 136 | 137 | #[test] 138 | fn test_elements_in_vector() { 139 | let (clusters1, _) = generate_data(); 140 | let res = elements_in_vectr(&clusters1); 141 | assert_eq!(res, 9); 142 | } 143 | 144 | #[test] 145 | fn test_count_pairwise_cooccurence() { 146 | let (clusters1, clusters2) = generate_data(); 147 | let res = count_pairwise_cooccurence(&clusters1, &clusters2); 148 | assert_eq!(res, (2.0, 8.0, 8.0, 18.0)); 149 | } 150 | 151 | #[test] 152 | fn test_jaccard_index() { 153 | let (clusters1, clusters2) = generate_data(); 154 | let res = jaccard_index(&clusters1, &clusters2); 155 | assert_eq!(res, 0.1111111111111111); 156 | } 157 | 158 | #[test] 159 | fn test_rand_index() { 160 | let (clusters1, clusters2) = generate_data(); 161 | let res = rand_index(&clusters1, &clusters2); 162 | assert_eq!(res, 0.5555555555555556); 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /chapter2/nb_classification/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "nb_classification" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | csv = "*" 9 | rusty-machine="*" 10 | hyper = "*" 11 | rulinalg = { version = "0.4.2", features = ["io"] } 12 | serde = "1" 13 | serde_derive = "1" 14 | rand = "0.6.5" -------------------------------------------------------------------------------- /chapter2/rust_and_tf/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rust_and_tf" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | tensorflow = { version = "0.13.0", features = ["tensorflow_unstable"] } 9 | serde = "1" 10 | serde_derive = "1" 11 | rand = "0.6.5" 12 | transpose = "0.2.0" 13 | mnist = "0.4.0" 14 | ml-utils = { path = "../ml-utils" } 15 | random = "0.12.2" -------------------------------------------------------------------------------- /chapter2/rust_and_tf/src/conv_nets_maxpooling.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | 3 | use std::error::Error; 4 | use std::result::Result; 5 | use std::io::prelude::*; 6 | use std::io::BufReader; 7 | use std::path::Path; 8 | use std::fs::File; 9 | use std::vec::Vec; 10 | 11 | use rand; 12 | use rand::thread_rng; 13 | use rand::seq::SliceRandom; 14 | use transpose; 15 | use mnist; 16 | use mnist::{Mnist, MnistBuilder}; 17 | 18 | use tensorflow as tf; 19 | use tf::expr::{Compiler, Constant}; 20 | use tf::{Graph, Tensor, DataType, Shape}; 21 | use tf::{Session, SessionOptions, SessionRunArgs}; 22 | 23 | #[cfg_attr(feature="examples_system_alloc", global_allocator)] 24 | #[cfg(feature="examples_system_alloc")] 25 | static ALLOCATOR: std::alloc::System = std::alloc::System; 26 | 27 | pub fn run() -> Result<(), Box> { 28 | // Get all the data 29 | let (trn_size, rows, cols) = (10_000, 28, 28); 30 | 31 | // Deconstruct the returned Mnist struct. 32 | let Mnist { trn_img, trn_lbl, .. } = MnistBuilder::new() 33 | .label_format_digit() 34 | .training_set_length(trn_size) 35 | .validation_set_length(10_000) 36 | .test_set_length(10_000) 37 | .finalize(); 38 | // Get the label of the first digit. 39 | let first_label = trn_lbl[0]; 40 | println!("The first digit is a {}.", first_label); 41 | println!("size of training {}.", trn_img.len()); 42 | 43 | let trn_img: Vec = trn_img.iter().map(|&x| x as f64).collect(); 44 | let trn_lbl: Vec = trn_lbl.iter().map(|&x| x as f64).collect(); 45 | 46 | 47 | // Define graph. 48 | let mut graph = Graph::new(); 49 | let X = >::new(&[10_000, 28, 28, 1]).with_values(&trn_img[..])?; 50 | let y = >::new(&[10_000,]).with_values(&trn_lbl[..])?; 51 | let z = >::new(&[28, 28, 1, 32]).with_values(&vec![1.; 25088])?; 52 | 53 | let X_const = { 54 | let mut c = graph.new_operation("Placeholder", "X")?; 55 | c.set_attr_type("dtype", DataType::Double)?; // check the enums https://github.com/tensorflow/rust/blob/ddff61850be1c8044ac86350caeed5a55824ebe4/src/lib.rs#L297 56 | // c.set_attr_shape("shape", &Shape::from(Some(vec![Some(28),Some(28),Some(1),Some(32)])))?; 57 | c.set_attr_shape("shape", &Shape::from(Some(vec![Some(10_000),Some(28),Some(28),Some(1)])))?; 58 | c.finish()? 59 | }; 60 | // operation types https://github.com/malmaud/TensorFlow.jl/blob/063511525902bdf84a461035758ef9a73ba4a635/src/ops/op_names.txt 61 | let max_pool = { 62 | let mut op = graph.new_operation("MaxPool", "max_pool")?; 63 | op.add_input(X_const.clone()); 64 | op.set_attr_string("padding", "VALID")?; 65 | op.set_attr_int_list("strides", &[1,2,2,1])?; 66 | op.set_attr_int_list("ksize", &[1,2,2,1])?; 67 | op.finish()? 68 | }; 69 | 70 | // Run graph. 71 | let session = Session::new(&SessionOptions::new(), &graph)?; 72 | let mut args = SessionRunArgs::new(); 73 | args.add_feed(&X_const, 0, &X); 74 | let max_pool_token = args.request_fetch(&max_pool, 0); 75 | session.run(&mut args)?; 76 | let max_pool_token_res: Tensor = args.fetch::(max_pool_token)?; 77 | println!("Now the max_pool", ); 78 | println!("{:?}", &max_pool_token_res[..]); 79 | 80 | Ok(()) 81 | } 82 | -------------------------------------------------------------------------------- /chapter2/rust_and_tf/src/graph_variables.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::result::Result; 3 | use tensorflow as tf; 4 | use tf::expr::{Variable, Compiler}; 5 | use tf::{Graph, Tensor}; 6 | use tf::{Session, SessionOptions, SessionRunArgs}; 7 | 8 | #[cfg_attr(feature="examples_system_alloc", global_allocator)] 9 | #[cfg(feature="examples_system_alloc")] 10 | static ALLOCATOR: std::alloc::System = std::alloc::System; 11 | 12 | pub fn run() -> Result<(), Box> { 13 | let mut g = Graph::new(); 14 | 15 | let (x_node, y_node, z_node) = { 16 | let mut compiler = Compiler::new(&mut g); 17 | let x_expr = >::new_expr(&vec![1], "x"); 18 | let y_expr = >::new_expr(&vec![1], "y"); 19 | let y_node = compiler.compile(y_expr.clone())?; 20 | let x_node = compiler.compile(x_expr.clone())?; 21 | 22 | // let z = x * x * y + y + 2; 23 | let z_node = compiler.compile( 24 | x_expr.clone() * x_expr.clone() * y_expr.clone() 25 | + y_expr.clone() + 2.0f32)?; 26 | (x_node, y_node, z_node) 27 | }; 28 | 29 | let options = SessionOptions::new(); 30 | let mut session = Session::new(&options, &g)?; 31 | 32 | // Evaluate the graph. 33 | let x = >::new(&[1]).with_values(&[3.0_f32]).unwrap(); 34 | let y = >::new(&[1]).with_values(&[4.0_f32]).unwrap(); 35 | let mut step = SessionRunArgs::new(); 36 | step.add_feed(&x_node, 0, &x); 37 | step.add_feed(&y_node, 0, &y); 38 | let output_token = step.request_fetch(&z_node, 0); 39 | session.run(&mut step).unwrap(); 40 | 41 | // Check our results. 42 | let output_tensor = step.fetch::(output_token)?; 43 | println!("working with variables: z = x * x * y + y + 2 => {:?}", output_tensor[0]); 44 | session.close()?; 45 | 46 | Ok(()) 47 | } -------------------------------------------------------------------------------- /chapter2/rust_and_tf/src/graph_with_placeholder.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::result::Result; 3 | use tensorflow as tf; 4 | use tf::expr::{Placeholder, Compiler}; 5 | use tf::{Graph, Tensor}; 6 | use tf::{Session, SessionOptions, SessionRunArgs}; 7 | 8 | #[cfg_attr(feature="examples_system_alloc", global_allocator)] 9 | #[cfg(feature="examples_system_alloc")] 10 | static ALLOCATOR: std::alloc::System = std::alloc::System; 11 | 12 | pub fn run() -> Result<(), Box> { 13 | let mut g = Graph::new(); 14 | 15 | let (x_node, y_node, z_node) = { 16 | let mut compiler = Compiler::new(&mut g); 17 | let x_expr = >::new_expr(&vec![2], "x"); 18 | let y_expr = >::new_expr(&vec![2], "y"); 19 | let y_node = compiler.compile(y_expr.clone())?; 20 | let x_node = compiler.compile(x_expr.clone())?; 21 | 22 | // let f = x * x * y + y + 2; 23 | let z_node = compiler.compile(x_expr.clone() * x_expr.clone() * y_expr.clone() + y_expr.clone() + 2.0f32)?; 24 | (x_node, y_node, z_node) 25 | }; 26 | 27 | let options = SessionOptions::new(); 28 | let mut session = Session::new(&options, &g)?; 29 | 30 | // Evaluate the graph. 31 | let x = >::new(&[2]).with_values(&[1.0_f32, 2.0]).unwrap(); 32 | let y = >::new(&[2]).with_values(&[3.0_f32, 4.0]).unwrap(); 33 | let mut step = SessionRunArgs::new(); 34 | step.add_feed(&x_node, 0, &x); 35 | step.add_feed(&y_node, 0, &y); 36 | let output_token = step.request_fetch(&z_node, 0); 37 | session.run(&mut step).unwrap(); 38 | 39 | // Check our results. 40 | let output_tensor = step.fetch::(output_token)?; 41 | println!("{:?}", output_tensor[0]); 42 | println!("{:?}", output_tensor[1]); 43 | session.close()?; 44 | 45 | Ok(()) 46 | } -------------------------------------------------------------------------------- /chapter2/rust_and_tf/src/linear_regression.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | 3 | use std::error::Error; 4 | use std::result::Result; 5 | use std::vec::Vec; 6 | 7 | use rand; 8 | use rand::thread_rng; 9 | use rand::seq::SliceRandom; 10 | use transpose; 11 | 12 | use tensorflow as tf; 13 | use tf::{Graph, Tensor, DataType}; 14 | use tf::{Session, SessionOptions, SessionRunArgs}; 15 | 16 | use ml_utils; 17 | use ml_utils::datasets::get_boston_records_from_file; 18 | use ml_utils::sup_metrics::r_squared_score; 19 | 20 | #[cfg_attr(feature="examples_system_alloc", global_allocator)] 21 | #[cfg(feature="examples_system_alloc")] 22 | static ALLOCATOR: std::alloc::System = std::alloc::System; 23 | 24 | pub fn run() -> Result<(), Box> { 25 | // Get all the data 26 | let filename = "data/housing.csv"; 27 | let mut data = get_boston_records_from_file(&filename); 28 | 29 | // shuffle the data. 30 | data.shuffle(&mut thread_rng()); 31 | 32 | // separate out to train and test datasets. 33 | let test_size: f64 = 0.2; 34 | let test_size: f64 = data.len() as f64 * test_size; 35 | let test_size = test_size.round() as usize; 36 | let (test_data, train_data) = data.split_at(test_size); 37 | let train_size = train_data.len(); 38 | let test_size = test_data.len(); 39 | 40 | // differentiate the features and the targets. 41 | let boston_x_train: Vec = train_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 42 | let boston_y_train: Vec = train_data.iter().map(|r| r.into_targets()).collect(); 43 | 44 | let boston_x_test: Vec = test_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 45 | let boston_y_test: Vec = test_data.iter().map(|r| r.into_targets()).collect(); 46 | 47 | // println!("{:?}", boston_y_train.len()); 48 | // println!("{:?}", boston_x_train.len()); 49 | 50 | // Define graph. 51 | let mut graph = Graph::new(); 52 | let dim = (boston_y_train.len() as u64, 13); 53 | let test_dim = (boston_y_test.len() as u64, dim.1); 54 | let X_train = >::new(&[dim.0, dim.1]).with_values(&boston_x_train)?; 55 | let y_train = >::new(&[dim.0, 1]).with_values(&boston_y_train)?; 56 | let X_test = >::new(&[test_dim.0, test_dim.1]).with_values(&boston_x_test)?; 57 | // let y_test = >::new(&[test_dim.0, 1]).with_values(&boston_y_test)?; 58 | 59 | let mut output_array = vec![0.0; (dim.0 * dim.1) as usize]; 60 | transpose::transpose(&boston_x_train, &mut output_array, dim.1 as usize, dim.0 as usize); 61 | let XT = >::new(&[dim.1, dim.0]).with_values(&output_array[..])?; 62 | let XT_const = { 63 | let mut op = graph.new_operation("Const", "XT")?; 64 | op.set_attr_tensor("value", XT)?; 65 | op.set_attr_type("dtype", DataType::Double)?; // check the enums https://github.com/tensorflow/rust/blob/ddff61850be1c8044ac86350caeed5a55824ebe4/src/lib.rs#L297 66 | op.finish()? 67 | }; 68 | let X_const = { 69 | let mut op = graph.new_operation("Const", "X_train")?; 70 | op.set_attr_tensor("value", X_train)?; 71 | op.set_attr_type("dtype", DataType::Double)?; // check the enums https://github.com/tensorflow/rust/blob/ddff61850be1c8044ac86350caeed5a55824ebe4/src/lib.rs#L297 72 | op.finish()? 73 | }; 74 | // operation types https://github.com/malmaud/TensorFlow.jl/blob/063511525902bdf84a461035758ef9a73ba4a635/src/ops/op_names.txt 75 | let y_const = { 76 | let mut op = graph.new_operation("Const", "y_train")?; 77 | op.set_attr_tensor("value", y_train)?; 78 | op.set_attr_type("dtype", DataType::Double)?; 79 | op.finish()? 80 | }; 81 | let mul = { 82 | let mut op = graph.new_operation("MatMul", "mul")?; 83 | op.add_input(XT_const.clone()); 84 | op.add_input(X_const.clone()); 85 | op.finish()? 86 | }; 87 | let inverse = { 88 | let mut op = graph.new_operation("MatrixInverse", "mul_inv")?; 89 | op.add_input(mul); 90 | op.finish()? 91 | }; 92 | let mul2 = { 93 | let mut op = graph.new_operation("MatMul", "mul2")?; 94 | op.add_input(inverse); 95 | op.add_input(XT_const.clone()); 96 | op.finish()? 97 | }; 98 | let theta = { 99 | let mut op = graph.new_operation("MatMul", "theta")?; 100 | op.add_input(mul2); 101 | op.add_input(y_const); 102 | op.finish()? 103 | }; 104 | 105 | // running predictions 106 | // y = X_test .* theta 107 | let X_test_const = { 108 | let mut op = graph.new_operation("Const", "X_test")?; 109 | op.set_attr_tensor("value", X_test)?; 110 | op.set_attr_type("dtype", DataType::Double)?; 111 | op.finish()? 112 | }; 113 | let predictions = { 114 | let mut op = graph.new_operation("MatMul", "preds")?; 115 | op.add_input(X_test_const); 116 | op.add_input(theta); 117 | op.finish()? 118 | }; 119 | 120 | // Run graph. 121 | let session = Session::new(&SessionOptions::new(), &graph)?; 122 | let mut args = SessionRunArgs::new(); 123 | let preds_token = args.request_fetch(&predictions, 0); 124 | session.run(&mut args)?; 125 | let preds_token_res: Tensor = args.fetch::(preds_token)?; 126 | // println!("Now the preds", ); 127 | // println!("{:?}", &preds_token_res[..]); 128 | println!("r-squared error score: {:?}", r_squared_score(&preds_token_res.to_vec(), &boston_y_test)); 129 | 130 | Ok(()) 131 | } 132 | -------------------------------------------------------------------------------- /chapter2/rust_and_tf/src/linear_regression_from_model.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | 3 | use std::error::Error; 4 | use std::result::Result; 5 | use std::vec::Vec; 6 | 7 | use rand; 8 | use rand::thread_rng; 9 | use rand::seq::SliceRandom; 10 | use transpose; 11 | 12 | use tensorflow as tf; 13 | use tf::{Graph, Tensor, DataType}; 14 | use tf::{Session, SessionOptions, SessionRunArgs}; 15 | 16 | use ml_utils; 17 | use ml_utils::datasets::get_boston_records_from_file; 18 | use ml_utils::sup_metrics::r_squared_score; 19 | 20 | use random; 21 | use random::Source; 22 | use std::path::Path; 23 | use std::process::exit; 24 | use tensorflow::Code; 25 | use tensorflow::Status; 26 | 27 | #[cfg_attr(feature="examples_system_alloc", global_allocator)] 28 | #[cfg(feature="examples_system_alloc")] 29 | static ALLOCATOR: std::alloc::System = std::alloc::System; 30 | 31 | pub fn run() -> Result<(), Box> { 32 | // Get all the data 33 | let filename = "data/housing.csv"; 34 | let mut data = get_boston_records_from_file(&filename); 35 | 36 | // shuffle the data. 37 | data.shuffle(&mut thread_rng()); 38 | 39 | // separate out to train and test datasets. 40 | let test_size: f64 = 0.2; 41 | let test_size: f64 = data.len() as f64 * test_size; 42 | let test_size = test_size.round() as usize; 43 | let (test_data, train_data) = data.split_at(test_size); 44 | let train_size = train_data.len(); 45 | let test_size = test_data.len(); 46 | 47 | // differentiate the features and the targets. 48 | let boston_x_train: Vec = train_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 49 | let boston_y_train: Vec = train_data.iter().map(|r| r.into_targets()).collect(); 50 | 51 | let boston_x_test: Vec = test_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 52 | let boston_y_test: Vec = test_data.iter().map(|r| r.into_targets()).collect(); 53 | 54 | // println!("{:?}", boston_y_train.len()); 55 | println!("{:?}", boston_x_train.len()); 56 | 57 | // Define graph. 58 | let mut graph = Graph::new(); 59 | let dim = (boston_y_train.len() as u64, 13); 60 | let test_dim = (boston_y_test.len() as u64, dim.1); 61 | let X_train = >::new(&[dim.0, dim.1]).with_values(&boston_x_train)?; 62 | let y_train = >::new(&[dim.0, 1]).with_values(&boston_y_train)?; 63 | let X_test = >::new(&[test_dim.0, test_dim.1]).with_values(&boston_x_test)?; 64 | // let y_test = >::new(&[test_dim.0, 1]).with_values(&boston_y_test)?; 65 | 66 | let export_dir = "boston_regression/"; // y = w * x + b 67 | if !Path::new(export_dir).exists() { 68 | return Err(Box::new(Status::new_set(Code::NotFound, 69 | &format!("Run the code in the tensorflow notebook in \ 70 | {} and try again.", 71 | export_dir)) 72 | .unwrap())); 73 | } 74 | 75 | // Load the saved model exported by regression_savedmodel.py. 76 | let mut graph = Graph::new(); 77 | let session = Session::from_saved_model(&SessionOptions::new(), 78 | &["train", "serve"], 79 | &mut graph, 80 | export_dir)?; 81 | let op_x = graph.operation_by_name_required("x")?; 82 | let op_x_test = graph.operation_by_name_required("x_test")?; 83 | let op_y = graph.operation_by_name_required("y")?; 84 | let op_train = graph.operation_by_name_required("train")?; 85 | let op_w = graph.operation_by_name_required("w")?; 86 | let op_y_preds = graph.operation_by_name_required("y_preds")?; 87 | 88 | Session::new(&SessionOptions::new(), &graph)?; 89 | let mut args = SessionRunArgs::new(); 90 | args.add_feed(&op_x, 0, &X_train); 91 | args.add_feed(&op_x_test, 0, &X_test); 92 | args.add_feed(&op_y, 0, &y_train); 93 | args.add_target(&op_train); 94 | let preds_token = args.request_fetch(&op_y_preds, 0); 95 | for _ in 0..10 { 96 | session.run(&mut args)?; 97 | }; 98 | let preds_token_res: Tensor = args.fetch::(preds_token)?; 99 | println!("Now the preds", ); 100 | println!("{:?}", &preds_token_res[..]); 101 | println!("{:?}", &boston_y_test); 102 | println!("{:?}", r_squared_score(&preds_token_res[..], &boston_y_test)); 103 | 104 | Ok(()) 105 | } 106 | -------------------------------------------------------------------------------- /chapter2/rust_and_tf/src/main.rs: -------------------------------------------------------------------------------- 1 | /// This package is about the different ways you can use tensorflow in rust. 2 | /// Current possible arguments. 3 | /// # Arguments 4 | /// * `` - WIll run the placeholders example. 5 | /// * `seq` - WIll run the sequence of nodes example 6 | /// * `cars` - Will run the example with graph variables 7 | /// 8 | /// # Example 9 | /// ``` 10 | /// $ cargo run seq 11 | /// ➜ rust_and_tf git:(master) ✗ cargo run seq 12 | /// Finished dev [unoptimized + debuginfo] target(s) in 0.15s 13 | /// Running `target/debug/rust_and_tf seq` 14 | /// 2019-04-07 12:55:41.781908: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX 15 | /// 2019-04-07 12:55:41.814069: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 1996260000 Hz 16 | /// 2019-04-07 12:55:41.814902: I tensorflow/compiler/xla/service/service.cc:150] XLA service 0x56514fdfded0 executing computations on platform Host. Devices: 17 | /// 2019-04-07 12:55:41.814966: I tensorflow/compiler/xla/service/service.cc:158] StreamExecutor device (0): , 18 | /// constant evaluation: w = 3; x = w + 2; y = x + 5; z = x * 3 19 | /// y => 10.0 20 | /// z => 15.0 21 | /// ``` 22 | 23 | // reference: https://github.com/danieldk/dpar 24 | // https://www.tensorflow.org/tutorials/estimators/cnn 25 | 26 | extern crate serde; 27 | // This lets us write `#[derive(Deserialize)]`. 28 | #[macro_use] 29 | extern crate serde_derive; 30 | 31 | use std::process::exit; 32 | use std::env::args; 33 | 34 | mod graph_with_placeholder; 35 | mod seq_nodes; 36 | mod graph_variables; 37 | mod linear_regression; 38 | mod conv_nets; 39 | mod linear_regression_from_model; 40 | mod conv_nets_maxpooling; 41 | 42 | 43 | #[cfg_attr(feature="examples_system_alloc", global_allocator)] 44 | #[cfg(feature="examples_system_alloc")] 45 | static ALLOCATOR: std::alloc::System = std::alloc::System; 46 | 47 | fn main() { 48 | let args: Vec = args().collect(); 49 | let model = if args.len() < 2 { 50 | None 51 | } else { 52 | Some(args[1].as_str()) 53 | }; 54 | let res = match model { 55 | None => graph_with_placeholder::run(), 56 | Some("seq") => seq_nodes::run(), 57 | Some("vars") => graph_variables::run(), 58 | Some("lr") => linear_regression::run(), 59 | Some("lr_py") => linear_regression_from_model::run(), 60 | Some("conv") => conv_nets::run(), 61 | Some("conv_mp") => conv_nets_maxpooling::run(), 62 | Some(_) => graph_with_placeholder::run(), 63 | }; 64 | // Putting the main code in another function serves two purposes: 65 | // 1. We can use the `?` operator. 66 | // 2. We can call exit safely, which does not run any destructors. 67 | exit(match res { 68 | Ok(_) => 0, 69 | Err(e) => { 70 | println!("{}", e); 71 | 1 72 | } 73 | }) 74 | } -------------------------------------------------------------------------------- /chapter2/rust_and_tf/src/seq_nodes.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::result::Result; 3 | 4 | use tensorflow as tf; 5 | use tf::expr::{Compiler, Constant}; 6 | use tf::{Graph, Tensor}; 7 | use tf::{Session, SessionOptions, SessionRunArgs}; 8 | 9 | #[cfg_attr(feature="examples_system_alloc", global_allocator)] 10 | #[cfg(feature="examples_system_alloc")] 11 | static ALLOCATOR: std::alloc::System = std::alloc::System; 12 | 13 | pub fn run() -> Result<(), Box> { 14 | let mut g = Graph::new(); 15 | 16 | let (y_node, z_node) = { 17 | let mut compiler = Compiler::new(&mut g); 18 | let w = >::new(&[1]).with_values(&[3.0_f32]).unwrap(); 19 | let w_expr = >::new_expr(w); 20 | let x_expr = w_expr.clone() + 2.0f32; 21 | let y_expr = x_expr.clone() + 5.0f32; 22 | let z_expr = x_expr.clone() * 3.0f32; 23 | 24 | let y_node = compiler.compile(y_expr.clone())?; 25 | let z_node = compiler.compile(z_expr.clone())?; 26 | (y_node, z_node) 27 | }; 28 | 29 | let options = SessionOptions::new(); 30 | let mut session = Session::new(&options, &g)?; 31 | 32 | // Evaluate the graph. 33 | let mut step = SessionRunArgs::new(); 34 | let output_token_y = step.request_fetch(&y_node, 0); 35 | let output_token_z = step.request_fetch(&z_node, 0); 36 | session.run(&mut step).unwrap(); 37 | 38 | // Check our results. 39 | let output_tensor_y = step.fetch::(output_token_y)?; 40 | let output_tensor_z = step.fetch::(output_token_z)?; 41 | println!("constant evaluation: w = 3; x = w + 2; y = x + 5; z = x * 3"); 42 | println!("y => {:?}", output_tensor_y[0]); 43 | println!("z => {:?}", output_tensor_z[0]); 44 | session.close()?; 45 | 46 | Ok(()) 47 | } -------------------------------------------------------------------------------- /chapter2/rust_and_tf/tensorflow create model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 15, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import matplotlib.pyplot as plt\n", 10 | "%matplotlib inline\n", 11 | "import tensorflow as tf\n", 12 | "import numpy as np\n", 13 | "\n", 14 | "from numpy import genfromtxt\n", 15 | "from sklearn.datasets import load_boston\n", 16 | "from tensorflow.python.saved_model.builder import SavedModelBuilder\n", 17 | "from tensorflow.python.saved_model.signature_def_utils import build_signature_def\n", 18 | "from tensorflow.python.saved_model.signature_constants import REGRESS_METHOD_NAME\n", 19 | "from tensorflow.python.saved_model.tag_constants import TRAINING, SERVING\n", 20 | "from tensorflow.python.saved_model.utils import build_tensor_info" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 19, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "learning_rate = 0.01\n", 30 | "cost_history = np.empty(shape=[1],dtype=float)\n", 31 | "\n", 32 | "X = tf.placeholder(tf.float64,[None,n_dim], name=\"x\")\n", 33 | "X_test = tf.placeholder(tf.float64,[None,n_dim], name=\"x_test\")\n", 34 | "Y = tf.placeholder(tf.float64,[None,1], name=\"y\")\n", 35 | "W = tf.Variable(tf.ones([n_dim,1],dtype=tf.float64), name=\"w\")\n", 36 | "\n", 37 | "init = tf.variables_initializer(tf.global_variables(), name=\"init\")" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 20, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "y_ = tf.matmul(X, W, name=\"y_hat\")\n", 47 | "y_preds = tf.matmul(X_test, W, name=\"y_preds\")\n", 48 | "cost = tf.reduce_mean(tf.square(y_ - Y))\n", 49 | "training_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost, name=\"train\")" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "http://aqibsaeed.github.io/2016-07-07-TensorflowLR/" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 28, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "INFO:tensorflow:Assets added to graph.\n", 69 | "INFO:tensorflow:No assets to write.\n", 70 | "INFO:tensorflow:SavedModel written to: boston_regression/saved_model.pb\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "directory = 'boston_regression'\n", 76 | "builder = SavedModelBuilder(directory)\n", 77 | "\n", 78 | "with tf.Session(graph=tf.get_default_graph()) as sess:\n", 79 | " sess.run(init)\n", 80 | "\n", 81 | " signature_inputs = {\n", 82 | " \"x\": build_tensor_info(X),\n", 83 | " \"x_test\": build_tensor_info(X_test),\n", 84 | " \"y\": build_tensor_info(Y)\n", 85 | " }\n", 86 | " signature_outputs = {\n", 87 | " \"out\": build_tensor_info(y_preds)\n", 88 | " }\n", 89 | " signature_def = build_signature_def(\n", 90 | " signature_inputs, signature_outputs,\n", 91 | " REGRESS_METHOD_NAME)\n", 92 | " builder.add_meta_graph_and_variables(\n", 93 | " sess, [TRAINING, SERVING],\n", 94 | " signature_def_map={\n", 95 | " REGRESS_METHOD_NAME: signature_def\n", 96 | " },\n", 97 | " assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS))\n", 98 | " builder.save(as_text=False)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [] 107 | } 108 | ], 109 | "metadata": { 110 | "kernelspec": { 111 | "display_name": "Python 3", 112 | "language": "python", 113 | "name": "python3" 114 | }, 115 | "language_info": { 116 | "codemirror_mode": { 117 | "name": "ipython", 118 | "version": 3 119 | }, 120 | "file_extension": ".py", 121 | "mimetype": "text/x-python", 122 | "name": "python", 123 | "nbconvert_exporter": "python", 124 | "pygments_lexer": "ipython3", 125 | "version": "3.5.2" 126 | } 127 | }, 128 | "nbformat": 4, 129 | "nbformat_minor": 2 130 | } 131 | -------------------------------------------------------------------------------- /chapter2/rustlearn_classification_tasks/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rustlearn_classification_tasks" 3 | version = "0.1.0" 4 | authors = ["joydeep bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | rustlearn = "0.5.0" 9 | csv = "1.0.5" 10 | serde = "1.0.89" 11 | serde_derive = "1.0.89" 12 | rand = "0.6" 13 | ml-utils = { path = "../ml-utils" } -------------------------------------------------------------------------------- /chapter2/rustlearn_classification_tasks/src/binary_class_scores.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | 3 | use rustlearn::prelude::*; 4 | use rustlearn::metrics::{accuracy_score, roc_auc_score}; 5 | 6 | use ml_utils; 7 | use ml_utils::sup_metrics::{accuracy, logloss_score}; 8 | 9 | pub fn run() -> Result<(), Box> { 10 | let preds = vec![1., 0.0001, 0.908047338626, 0.0199900075962, 0.904058545833, 0.321508119045, 0.657086320195]; 11 | let actuals = vec![1., 0., 0., 1., 1., 0., 0.]; 12 | println!("logloss score: {:?}", logloss_score(&actuals, &preds, 1e-15)); 13 | println!("roc auc scores: {:?}", roc_auc_score(&Array::from(actuals), &Array::from(preds))?); 14 | 15 | Ok(()) 16 | } -------------------------------------------------------------------------------- /chapter2/rustlearn_classification_tasks/src/logistic_reg.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::vec::Vec; 3 | use std::error::Error; 4 | 5 | use csv; 6 | use rand; 7 | use rand::thread_rng; 8 | use rand::seq::SliceRandom; 9 | 10 | use rustlearn::prelude::*; 11 | use rustlearn::linear_models::sgdclassifier::Hyperparameters as logistic_regression; 12 | use rustlearn::metrics::{accuracy_score, roc_auc_score}; 13 | 14 | use ml_utils; 15 | use ml_utils::sup_metrics::{accuracy, logloss_score}; 16 | use ml_utils::datasets::Flower; 17 | 18 | pub fn run() -> Result<(), Box> { 19 | // Get all the data 20 | let mut rdr = csv::Reader::from_reader(io::stdin()); 21 | let mut data = Vec::new(); 22 | for result in rdr.deserialize() { 23 | let r: Flower = result?; 24 | data.push(r); // data contains all the records 25 | } 26 | 27 | // shuffle the data. 28 | data.shuffle(&mut thread_rng()); 29 | 30 | // separate out to train and test datasets. 31 | let test_size: f32 = 0.2; 32 | let test_size: f32 = data.len() as f32 * test_size; 33 | let test_size = test_size.round() as usize; 34 | let (test_data, train_data) = data.split_at(test_size); 35 | let train_size = train_data.len(); 36 | let test_size = test_data.len(); 37 | 38 | // differentiate the features and the labels. 39 | let flower_x_train: Vec = train_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 40 | let flower_y_train: Vec = train_data.iter().map(|r| r.into_labels()).collect(); 41 | 42 | let flower_x_test: Vec = test_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 43 | let flower_y_test: Vec = test_data.iter().map(|r| r.into_labels()).collect(); 44 | 45 | // Since rustlearn works with arrays we need to convert the vectors to a dense matrix or a sparse matrix 46 | let mut flower_x_train = Array::from(flower_x_train); // as opposed to rusty machine, all floats here are f32 reference : https://github.com/maciejkula/rustlearn/blob/7daf692fe504966aa84d920321b884afe19caa79/src/array/dense.rs#L129 47 | flower_x_train.reshape(train_size, 4); 48 | 49 | let flower_y_train = Array::from(flower_y_train); 50 | 51 | let mut flower_x_test = Array::from(flower_x_test); 52 | flower_x_test.reshape(test_size, 4); 53 | 54 | let flower_y_test = Array::from(flower_y_test); 55 | 56 | // working with Stochastic Gradient descent. 57 | // uses adaptive per parameter learning rate Adagrad 58 | let mut model = logistic_regression::new(4) 59 | .learning_rate(1.0) 60 | .l2_penalty(0.5) 61 | .l1_penalty(0.0) 62 | .one_vs_rest(); 63 | let num_epochs = 100; 64 | 65 | for _ in 0..num_epochs { 66 | model.fit(&flower_x_train, &flower_y_train).unwrap(); 67 | } 68 | 69 | let prediction = model.predict(&flower_x_test).unwrap(); 70 | let acc1 = accuracy_score(&flower_y_test, &prediction); 71 | // let acc2 = accuracy(flower_y_test.data()[..], prediction.data()[..]); 72 | println!("Logistic Regression: accuracy: {:?}", acc1); 73 | // println!("Logistic Regression: accuracy: {:?}", acc2); 74 | 75 | Ok(()) 76 | } -------------------------------------------------------------------------------- /chapter2/rustlearn_classification_tasks/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::vec::Vec; 2 | use std::process::exit; 3 | use std::env::args; 4 | 5 | mod trees; 6 | mod logistic_reg; 7 | mod svm; 8 | mod binary_class_scores; 9 | 10 | fn main() { 11 | let args: Vec = args().collect(); 12 | let model = if args.len() < 2 { 13 | None 14 | } else { 15 | Some(args[1].as_str()) 16 | }; 17 | let res = match model { 18 | None => {println!("nothing", ); Ok(())}, 19 | Some("lr") => logistic_reg::run(), 20 | Some("svm") => svm::run(), 21 | Some("bs") => binary_class_scores::run(), 22 | Some(_) => trees::run(), 23 | }; 24 | // Putting the main code in another function serves two purposes: 25 | // 1. We can use the `?` operator. 26 | // 2. We can call exit safely, which does not run any destructors. 27 | exit(match res { 28 | Ok(_) => 0, 29 | Err(e) => { 30 | println!("{}", e); 31 | 1 32 | } 33 | }) 34 | } -------------------------------------------------------------------------------- /chapter2/rustlearn_classification_tasks/src/svm.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::vec::Vec; 3 | use std::error::Error; 4 | 5 | use csv; 6 | use rand; 7 | use rand::thread_rng; 8 | use rand::seq::SliceRandom; 9 | 10 | use rustlearn::prelude::*; 11 | use rustlearn::svm::libsvm::svc::{Hyperparameters as libsvm_svc, KernelType}; 12 | use rustlearn::metrics::{accuracy_score, roc_auc_score}; 13 | 14 | use ml_utils; 15 | use ml_utils::sup_metrics::{accuracy, logloss_score}; 16 | use ml_utils::datasets::Flower; 17 | 18 | pub fn run() -> Result<(), Box> { 19 | // Get all the data 20 | let mut rdr = csv::Reader::from_reader(io::stdin()); 21 | let mut data = Vec::new(); 22 | for result in rdr.deserialize() { 23 | let r: Flower = result?; 24 | data.push(r); // data contains all the records 25 | } 26 | 27 | // shuffle the data. 28 | data.shuffle(&mut thread_rng()); 29 | 30 | // separate out to train and test datasets. 31 | let test_size: f32 = 0.2; 32 | let test_size: f32 = data.len() as f32 * test_size; 33 | let test_size = test_size.round() as usize; 34 | let (test_data, train_data) = data.split_at(test_size); 35 | let train_size = train_data.len(); 36 | let test_size = test_data.len(); 37 | 38 | // differentiate the features and the labels. 39 | let flower_x_train: Vec = train_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 40 | let flower_y_train: Vec = train_data.iter().map(|r| r.into_labels()).collect(); 41 | 42 | let flower_x_test: Vec = test_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 43 | let flower_y_test: Vec = test_data.iter().map(|r| r.into_labels()).collect(); 44 | 45 | // Since rustlearn works with arrays we need to convert the vectors to a dense matrix or a sparse matrix 46 | let mut flower_x_train = Array::from(flower_x_train); // as opposed to rusty machine, all floats here are f32 reference : https://github.com/maciejkula/rustlearn/blob/7daf692fe504966aa84d920321b884afe19caa79/src/array/dense.rs#L129 47 | flower_x_train.reshape(train_size, 4); 48 | 49 | let flower_y_train = Array::from(flower_y_train); 50 | 51 | let mut flower_x_test = Array::from(flower_x_test); 52 | flower_x_test.reshape(test_size, 4); 53 | 54 | let flower_y_test = Array::from(flower_y_test); 55 | 56 | // Working with svms 57 | let svm_linear_model = libsvm_svc::new(4, KernelType::Linear, 3) 58 | .C(0.3) 59 | .build(); 60 | let svm_poly_model = libsvm_svc::new(4, KernelType::Polynomial, 3) 61 | .C(0.3) 62 | .build(); 63 | let svm_rbf_model = libsvm_svc::new(4, KernelType::RBF, 3) 64 | .C(0.3) 65 | .build(); 66 | let svm_sigmoid_model = libsvm_svc::new(4, KernelType::Sigmoid, 3) 67 | .C(0.3) 68 | .build(); 69 | let svm_kernel_types = ["linear", "polynomial", "rbf", "sigmoid"]; 70 | let mut svm_model_types = [svm_linear_model, svm_poly_model, svm_rbf_model, svm_sigmoid_model]; 71 | for (kernel_type, svm_model) in svm_kernel_types.iter().zip(svm_model_types.iter_mut()) { 72 | svm_model.fit(&flower_x_train, &flower_y_train).unwrap(); 73 | 74 | let prediction = svm_model.predict(&flower_x_test).unwrap(); 75 | let acc = accuracy_score(&flower_y_test, &prediction); 76 | println!("Lib svm {kernel}: accuracy: {accuracy}", accuracy=acc, kernel=kernel_type); 77 | }; 78 | 79 | let preds = vec![1., 0.0001, 0.908047338626, 0.0199900075962, 0.904058545833, 0.321508119045, 0.657086320195]; 80 | let actuals = vec![1., 0., 0., 1., 1., 0., 0.]; 81 | println!("logloss score: {:?}", logloss_score(&actuals, &preds, 1e-15)); 82 | println!("roc auc scores: {:?}", roc_auc_score(&Array::from(actuals), &Array::from(preds))?); 83 | 84 | Ok(()) 85 | } -------------------------------------------------------------------------------- /chapter2/rustlearn_classification_tasks/src/trees.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::vec::Vec; 3 | use std::error::Error; 4 | 5 | use csv; 6 | use rand; 7 | use rand::thread_rng; 8 | use rand::seq::SliceRandom; 9 | 10 | use rustlearn::prelude::*; 11 | use rustlearn::ensemble::random_forest::Hyperparameters as randomforest; 12 | use rustlearn::trees::decision_tree; 13 | use rustlearn::metrics::{accuracy_score, roc_auc_score}; 14 | 15 | use ml_utils; 16 | use ml_utils::sup_metrics::{accuracy, logloss_score}; 17 | use ml_utils::datasets::Flower; 18 | 19 | pub fn run() -> Result<(), Box> { 20 | // Get all the data 21 | let mut rdr = csv::Reader::from_reader(io::stdin()); 22 | let mut data = Vec::new(); 23 | for result in rdr.deserialize() { 24 | let r: Flower = result?; 25 | data.push(r); // data contains all the records 26 | } 27 | 28 | // shuffle the data. 29 | data.shuffle(&mut thread_rng()); 30 | 31 | // separate out to train and test datasets. 32 | let test_size: f32 = 0.2; 33 | let test_size: f32 = data.len() as f32 * test_size; 34 | let test_size = test_size.round() as usize; 35 | let (test_data, train_data) = data.split_at(test_size); 36 | let train_size = train_data.len(); 37 | let test_size = test_data.len(); 38 | 39 | // differentiate the features and the labels. 40 | let flower_x_train: Vec = train_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 41 | let flower_y_train: Vec = train_data.iter().map(|r| r.into_labels()).collect(); 42 | 43 | let flower_x_test: Vec = test_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 44 | let flower_y_test: Vec = test_data.iter().map(|r| r.into_labels()).collect(); 45 | 46 | // Since rustlearn works with arrays we need to convert the vectors to a dense matrix or a sparse matrix 47 | let mut flower_x_train = Array::from(flower_x_train); // as opposed to rusty machine, all floats here are f32 reference : https://github.com/maciejkula/rustlearn/blob/7daf692fe504966aa84d920321b884afe19caa79/src/array/dense.rs#L129 48 | flower_x_train.reshape(train_size, 4); 49 | 50 | let flower_y_train = Array::from(flower_y_train); 51 | 52 | let mut flower_x_test = Array::from(flower_x_test); 53 | flower_x_test.reshape(test_size, 4); 54 | 55 | let flower_y_test = Array::from(flower_y_test); 56 | 57 | // create a decision tree model 58 | let mut decision_tree_model = decision_tree::Hyperparameters::new(flower_x_train.cols()) 59 | .one_vs_rest(); 60 | decision_tree_model.fit(&flower_x_train, &flower_y_train).unwrap(); 61 | 62 | let prediction = decision_tree_model.predict(&flower_x_test).unwrap(); 63 | let acc = accuracy_score(&flower_y_test, &prediction); 64 | println!("DecisionTree model accuracy: {:?}", acc); 65 | 66 | 67 | 68 | // create a random forest model 69 | let mut tree_params = decision_tree::Hyperparameters::new(flower_x_train.cols()); 70 | tree_params.min_samples_split(10) 71 | .max_features(4); 72 | 73 | let mut random_forest_model = randomforest::new(tree_params, 10).one_vs_rest(); 74 | 75 | random_forest_model.fit(&flower_x_train, &flower_y_train).unwrap(); 76 | 77 | // Optionally serialize and deserialize the model 78 | 79 | // let encoded = bincode::rustc_serialize::encode(&model, 80 | // bincode::SizeLimit::Infinite).unwrap(); 81 | // let decoded: OneVsRestWrapper = bincode::rustc_serialize::decode(&encoded).unwrap(); 82 | 83 | let prediction = random_forest_model.predict(&flower_x_test).unwrap(); 84 | let acc = accuracy_score(&flower_y_test, &prediction); 85 | println!("Random Forest: accuracy: {:?}", acc); 86 | 87 | Ok(()) 88 | } -------------------------------------------------------------------------------- /chapter2/rusty_machine_classification/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rusty_machine_classification" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | rusty-machine = { path = "../rusty-machine" } 9 | ml-utils = { path = "../ml-utils" } 10 | rand = "0.6.5" 11 | csv = "1.0.7" -------------------------------------------------------------------------------- /chapter2/rusty_machine_classification/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::vec::Vec; 3 | use std::error::Error; 4 | 5 | use rusty_machine as rm; 6 | use rm::linalg::Matrix; 7 | use rm::linalg::Vector; 8 | use rm::learning::knn::KNNClassifier; 9 | use rusty_machine::learning::knn::{KDTree, BallTree, BruteForce}; 10 | use rm::learning::SupModel; 11 | use csv; 12 | use rand; 13 | use rand::thread_rng; 14 | use rand::seq::SliceRandom; 15 | 16 | use ml_utils; 17 | use ml_utils::datasets::Flower; 18 | use ml_utils::sup_metrics::accuracy; 19 | 20 | fn main() -> Result<(), Box> { 21 | // Get all the data 22 | let mut rdr = csv::Reader::from_reader(io::stdin()); 23 | let mut data = Vec::new(); 24 | for result in rdr.deserialize() { 25 | let r: Flower = result?; 26 | data.push(r); // data contains all the records 27 | } 28 | 29 | // shuffle the data. 30 | data.shuffle(&mut thread_rng()); 31 | 32 | // separate out to train and test datasets. 33 | let test_size: f64 = 0.2; 34 | let test_size: f64 = data.len() as f64 * test_size; 35 | let test_size = test_size.round() as usize; 36 | let (test_data, train_data) = data.split_at(test_size); 37 | let train_size = train_data.len(); 38 | let test_size = test_data.len(); 39 | 40 | // differentiate the features and the labels. 41 | let flower_x_train: Vec = train_data.iter().flat_map(|r| { 42 | let features = r.into_feature_vector(); 43 | let features: Vec = features.iter().map(|&x| x as f64).collect(); 44 | features 45 | }).collect(); 46 | let flower_y_train: Vec = train_data.iter().map( 47 | |r| r.into_int_labels() as usize).collect(); 48 | 49 | let flower_x_test: Vec = test_data.iter().flat_map(|r| { 50 | let features = r.into_feature_vector(); 51 | let features: Vec = features.iter().map(|&x| x as f64).collect(); 52 | features 53 | }).collect(); 54 | let flower_y_test: Vec = test_data.iter().map(|r| r.into_int_labels() as u32).collect(); 55 | 56 | // COnvert the data into matrices for rusty machine 57 | let flower_x_train = Matrix::new(train_size, 4, flower_x_train); 58 | let flower_y_train = Vector::new(flower_y_train); 59 | let flower_x_test = Matrix::new(test_size, 4, flower_x_test); 60 | 61 | // train the classifier to search 2 nearest. this is the same as kdtree 62 | let mut knn = KNNClassifier::new(2); 63 | println!("{:?}", knn); 64 | 65 | // train the classifier 66 | knn.train(&flower_x_train, &flower_y_train).unwrap(); 67 | 68 | // predict new points 69 | let preds = knn.predict(&flower_x_test).unwrap(); 70 | let preds: Vec = preds.data().iter().map(|&x| x as u32).collect(); 71 | println!("default is binary tree"); 72 | println!("accuracy {:?}", accuracy(preds.as_slice(), &flower_y_test)); 73 | 74 | // Ball tree is good when the number of dimensions are huge. 75 | let mut knn = KNNClassifier::new_specified(2, BallTree::new(30)); 76 | println!("{:?}", knn); 77 | 78 | // train the classifier 79 | knn.train(&flower_x_train, &flower_y_train).unwrap(); 80 | 81 | // predict new points 82 | let preds = knn.predict(&flower_x_test).unwrap(); 83 | let preds: Vec = preds.data().iter().map(|&x| x as u32).collect(); 84 | println!("accuracy for ball tree {:?}", accuracy(preds.as_slice(), &flower_y_test)); 85 | 86 | // The k-d tree is a binary tree in which every leaf node is a k-dimensional point 87 | let mut knn = KNNClassifier::new_specified(2, KDTree::default()); 88 | println!("{:?}", knn); 89 | 90 | // train the classifier 91 | knn.train(&flower_x_train, &flower_y_train).unwrap(); 92 | 93 | // predict new points 94 | let preds = knn.predict(&flower_x_test).unwrap(); 95 | let preds: Vec = preds.data().iter().map(|&x| x as u32).collect(); 96 | println!("accuracy for kdtree tree {:?}", accuracy(preds.as_slice(), &flower_y_test)); 97 | 98 | // The k-d tree is a binary tree in which every leaf node is a k-dimensional point 99 | let mut knn = KNNClassifier::new_specified(2, KDTree::default()); 100 | println!("{:?}", knn); 101 | 102 | // train the classifier 103 | knn.train(&flower_x_train, &flower_y_train).unwrap(); 104 | 105 | // predict new points 106 | let preds = knn.predict(&flower_x_test).unwrap(); 107 | let preds: Vec = preds.data().iter().map(|&x| x as u32).collect(); 108 | println!("accuracy for ball tree {:?}", accuracy(preds.as_slice(), &flower_y_test)); 109 | 110 | // Brute force means all the nearest neighbors are looked into 111 | let mut knn = KNNClassifier::new_specified(2, BruteForce::default()); 112 | println!("{:?}", knn); 113 | 114 | // train the classifier 115 | knn.train(&flower_x_train, &flower_y_train).unwrap(); 116 | 117 | // predict new points 118 | let preds = knn.predict(&flower_x_test).unwrap(); 119 | let preds: Vec = preds.data().iter().map(|&x| x as u32).collect(); 120 | println!("accuracy for brute force {:?}", accuracy(preds.as_slice(), &flower_y_test)); 121 | 122 | 123 | Ok(()) 124 | } 125 | -------------------------------------------------------------------------------- /chapter2/rusty_machine_supervised_algos/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rusty_machine_supervised_algos" 3 | version = "0.1.0" 4 | authors = ["joydeep bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | csv = "*" 9 | rusty-machine="*" 10 | serde = "1" 11 | serde_derive = "1" 12 | rand = "0.6.5" -------------------------------------------------------------------------------- /chapter2/rustymachine_regression/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rustlymachine_regression" 3 | version = "0.1.0" 4 | authors = ["joydeep bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | rusty-machine = "0.5.4" 9 | serde = "1" 10 | serde_derive = "1" 11 | rand = "0.6.5" 12 | ml-utils = { path = "../ml-utils" } -------------------------------------------------------------------------------- /chapter2/rustymachine_regression/src/gaussian_process_reg.rs: -------------------------------------------------------------------------------- 1 | /// Data gathered from https://www.kaggle.com/vikrishnan/boston-house-prices 2 | /// Boston dataset: https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html 3 | /// This module shows how to run regression models 4 | use std::io::prelude::*; 5 | use std::io::BufReader; 6 | use std::path::Path; 7 | use std::fs::File; 8 | use std::vec::Vec; 9 | use std::error::Error; 10 | use std::process::exit; 11 | use std::env::args; 12 | 13 | use rusty_machine; 14 | use rusty_machine::linalg::Matrix; 15 | // use rusty_machine::linalg::BaseMatrix; 16 | use rusty_machine::linalg::Vector; 17 | use rusty_machine::learning::gp::GaussianProcess; 18 | use rusty_machine::learning::gp::ConstMean; 19 | use rusty_machine::learning::toolkit::kernel; 20 | use rusty_machine::analysis::score::neg_mean_squared_error; 21 | use rusty_machine::learning::SupModel; 22 | 23 | // use ndarray::{Array, arr1}; 24 | use rand; 25 | use rand::thread_rng; 26 | use rand::seq::SliceRandom; 27 | 28 | use ml_utils::datasets::get_boston_records_from_file; 29 | use ml_utils::sup_metrics::r_squared_score; 30 | 31 | pub fn run() -> Result<(), Box> { 32 | // Get all the data 33 | let filename = "data/housing.csv"; 34 | let mut data = get_boston_records_from_file(&filename); // file must be in the folder data 35 | 36 | // shuffle the data. 37 | data.shuffle(&mut thread_rng()); 38 | 39 | // separate out to train and test datasets. 40 | let test_size: f64 = 0.2; 41 | let test_size: f64 = data.len() as f64 * test_size; 42 | let test_size = test_size.round() as usize; 43 | let (test_data, train_data) = data.split_at(test_size); 44 | let train_size = train_data.len(); 45 | let test_size = test_data.len(); 46 | 47 | // differentiate the features and the targets. 48 | let boston_x_train: Vec = train_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 49 | let boston_y_train: Vec = train_data.iter().map(|r| r.into_targets()).collect(); 50 | 51 | let boston_x_test: Vec = test_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 52 | let boston_y_test: Vec = test_data.iter().map(|r| r.into_targets()).collect(); 53 | 54 | // using ndarray 55 | // let boston_x_train = Array::from_shape_vec((train_size, 13), boston_x_train).unwrap(); 56 | // let boston_y_train = Array::from_vec(boston_y_train); 57 | // let boston_x_test = Array::from_shape_vec((test_size, 13), boston_x_test).unwrap(); 58 | // let boston_y_test = Array::from_vec(boston_y_test); 59 | 60 | // COnvert the data into matrices for rusty machine 61 | let boston_x_train = Matrix::new(train_size, 13, boston_x_train); 62 | let boston_y_train = Vector::new(boston_y_train); 63 | let boston_x_test = Matrix::new(test_size, 13, boston_x_test); 64 | // let boston_y_test = Vector::new(boston_y_test); 65 | let boston_y_test = Matrix::new(test_size, 1, boston_y_test); 66 | 67 | // Create a gaussian process regression 68 | // A squared exponential kernel with lengthscale 2 and amplitude 1 69 | let ker = kernel::SquaredExp::new(2., 1.); 70 | 71 | // zero function as mean function 72 | let zero_mean = ConstMean::default(); 73 | 74 | // defining the model with noise 10 75 | let mut gaus_model = GaussianProcess::new(ker, zero_mean, 10f64); 76 | 77 | gaus_model.train(&boston_x_train, &boston_y_train)?; 78 | 79 | let predictions = gaus_model.predict(&boston_x_test).unwrap(); 80 | let predictions = Matrix::new(test_size, 1, predictions); 81 | let acc = neg_mean_squared_error(&predictions, &boston_y_test); 82 | println!("gaussian process regression error: {:?}", acc); 83 | println!("gaussian process regression R2 score: {:?}", r_squared_score( 84 | &boston_y_test.data(), &predictions.data())); 85 | 86 | Ok(()) 87 | } 88 | -------------------------------------------------------------------------------- /chapter2/rustymachine_regression/src/glms.rs: -------------------------------------------------------------------------------- 1 | /// Data gathered from https://www.kaggle.com/vikrishnan/boston-house-prices 2 | /// Boston dataset: https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html 3 | /// This module shows how to run regression models 4 | use std::io::prelude::*; 5 | use std::io::BufReader; 6 | use std::path::Path; 7 | use std::fs::File; 8 | use std::vec::Vec; 9 | use std::error::Error; 10 | use std::process::exit; 11 | use std::env::args; 12 | 13 | use rusty_machine; 14 | use rusty_machine::linalg::Matrix; 15 | use rusty_machine::linalg::Vector; 16 | use rusty_machine::learning::glm::{GenLinearModel, Normal}; 17 | use rusty_machine::analysis::score::neg_mean_squared_error; 18 | use rusty_machine::learning::SupModel; 19 | 20 | // use ndarray::{Array, arr1}; 21 | use rand; 22 | use rand::thread_rng; 23 | use rand::seq::SliceRandom; 24 | 25 | use ml_utils::datasets::get_boston_records_from_file; 26 | use ml_utils::sup_metrics::r_squared_score; 27 | 28 | pub fn run() -> Result<(), Box> { 29 | // Get all the data 30 | let filename = "data/housing.csv"; 31 | let mut data = get_boston_records_from_file(&filename); // file must be in the folder data 32 | 33 | // shuffle the data. 34 | data.shuffle(&mut thread_rng()); 35 | 36 | // separate out to train and test datasets. 37 | let test_size: f64 = 0.2; 38 | let test_size: f64 = data.len() as f64 * test_size; 39 | let test_size = test_size.round() as usize; 40 | let (test_data, train_data) = data.split_at(test_size); 41 | let train_size = train_data.len(); 42 | let test_size = test_data.len(); 43 | 44 | // differentiate the features and the targets. 45 | let boston_x_train: Vec = train_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 46 | let boston_y_train: Vec = train_data.iter().map(|r| r.into_targets()).collect(); 47 | 48 | let boston_x_test: Vec = test_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 49 | let boston_y_test: Vec = test_data.iter().map(|r| r.into_targets()).collect(); 50 | 51 | // using ndarray 52 | // let boston_x_train = Array::from_shape_vec((train_size, 13), boston_x_train).unwrap(); 53 | // let boston_y_train = Array::from_vec(boston_y_train); 54 | // let boston_x_test = Array::from_shape_vec((test_size, 13), boston_x_test).unwrap(); 55 | // let boston_y_test = Array::from_vec(boston_y_test); 56 | 57 | // COnvert the data into matrices for rusty machine 58 | let boston_x_train = Matrix::new(train_size, 13, boston_x_train); 59 | let boston_y_train = Vector::new(boston_y_train); 60 | let boston_x_test = Matrix::new(test_size, 13, boston_x_test); 61 | // let boston_y_test = Vector::new(boston_y_test); 62 | let boston_y_test = Matrix::new(test_size, 1, boston_y_test); 63 | 64 | // Create a normal generalised linear model 65 | let mut normal_model = GenLinearModel::new(Normal); 66 | normal_model.train(&boston_x_train, &boston_y_train)?; 67 | 68 | let predictions = normal_model.predict(&boston_x_test).unwrap(); 69 | let predictions = Matrix::new(test_size, 1, predictions); 70 | let acc = neg_mean_squared_error(&predictions, &boston_y_test); 71 | println!("glm poisson accuracy: {:?}", acc); 72 | println!("glm poisson R2 score: {:?}", r_squared_score( 73 | &boston_y_test.data(), &predictions.data())); 74 | 75 | Ok(()) 76 | } 77 | -------------------------------------------------------------------------------- /chapter2/rustymachine_regression/src/lin_reg.rs: -------------------------------------------------------------------------------- 1 | /// Data gathered from https://www.kaggle.com/vikrishnan/boston-house-prices 2 | /// Boston dataset: https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html 3 | /// This module shows how to run regression models 4 | use std::io::prelude::*; 5 | use std::io::BufReader; 6 | use std::path::Path; 7 | use std::fs::File; 8 | use std::vec::Vec; 9 | use std::error::Error; 10 | use std::process::exit; 11 | use std::env::args; 12 | 13 | use rusty_machine; 14 | use rusty_machine::linalg::Matrix; 15 | // use rusty_machine::linalg::BaseMatrix; 16 | use rusty_machine::linalg::Vector; 17 | use rusty_machine::learning::lin_reg::LinRegressor; 18 | use rusty_machine::learning::gp::GaussianProcess; 19 | use rusty_machine::learning::gp::ConstMean; 20 | use rusty_machine::learning::toolkit::kernel; 21 | use rusty_machine::learning::glm::{GenLinearModel, Normal}; 22 | use rusty_machine::analysis::score::neg_mean_squared_error; 23 | use rusty_machine::learning::SupModel; 24 | 25 | // use ndarray::{Array, arr1}; 26 | use rand; 27 | use rand::thread_rng; 28 | use rand::seq::SliceRandom; 29 | 30 | use ml_utils::datasets::get_boston_records_from_file; 31 | use ml_utils::sup_metrics::r_squared_score; 32 | 33 | pub fn run() -> Result<(), Box> { 34 | // Get all the data 35 | let filename = "data/housing.csv"; 36 | let mut data = get_boston_records_from_file(&filename); // file must be in the folder data 37 | 38 | // shuffle the data. 39 | data.shuffle(&mut thread_rng()); 40 | 41 | // separate out to train and test datasets. 42 | let test_size: f64 = 0.2; 43 | let test_size: f64 = data.len() as f64 * test_size; 44 | let test_size = test_size.round() as usize; 45 | let (test_data, train_data) = data.split_at(test_size); 46 | let train_size = train_data.len(); 47 | let test_size = test_data.len(); 48 | 49 | // differentiate the features and the targets. 50 | let boston_x_train: Vec = train_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 51 | let boston_y_train: Vec = train_data.iter().map(|r| r.into_targets()).collect(); 52 | 53 | let boston_x_test: Vec = test_data.iter().flat_map(|r| r.into_feature_vector()).collect(); 54 | let boston_y_test: Vec = test_data.iter().map(|r| r.into_targets()).collect(); 55 | 56 | // using ndarray 57 | // let boston_x_train = Array::from_shape_vec((train_size, 13), boston_x_train).unwrap(); 58 | // let boston_y_train = Array::from_vec(boston_y_train); 59 | // let boston_x_test = Array::from_shape_vec((test_size, 13), boston_x_test).unwrap(); 60 | // let boston_y_test = Array::from_vec(boston_y_test); 61 | 62 | // COnvert the data into matrices for rusty machine 63 | let boston_x_train = Matrix::new(train_size, 13, boston_x_train); 64 | let boston_y_train = Vector::new(boston_y_train); 65 | let boston_x_test = Matrix::new(test_size, 13, boston_x_test); 66 | // let boston_y_test = Vector::new(boston_y_test); 67 | let boston_y_test = Matrix::new(test_size, 1, boston_y_test); 68 | 69 | // Create a linear regression model 70 | let mut lin_model = LinRegressor::default(); 71 | println!("{:?}", lin_model); 72 | 73 | // Train the model 74 | lin_model.train(&boston_x_train, &boston_y_train); 75 | 76 | // Now we will predict 77 | let predictions = lin_model.predict(&boston_x_test).unwrap(); 78 | let predictions = Matrix::new(test_size, 1, predictions); 79 | let acc = neg_mean_squared_error(&predictions, &boston_y_test); 80 | println!("linear regression error: {:?}", acc); 81 | println!("linear regression R2 score: {:?}", r_squared_score( 82 | &boston_y_test.data(), &predictions.data())); 83 | 84 | Ok(()) 85 | } 86 | -------------------------------------------------------------------------------- /chapter2/rustymachine_regression/src/main.rs: -------------------------------------------------------------------------------- 1 | /// Data gathered from https://www.kaggle.com/vikrishnan/boston-house-prices 2 | /// Boston dataset: https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html 3 | /// This module shows how to run regression models 4 | extern crate serde; 5 | // This lets us write `#[derive(Deserialize)]`. 6 | #[macro_use] 7 | extern crate serde_derive; 8 | 9 | use std::vec::Vec; 10 | use std::process::exit; 11 | use std::env::args; 12 | 13 | mod lin_reg; 14 | mod gaussian_process_reg; 15 | mod glms; 16 | 17 | fn main() { 18 | let args: Vec = args().collect(); 19 | let model = if args.len() < 2 { 20 | None 21 | } else { 22 | Some(args[1].as_str()) 23 | }; 24 | let res = match model { 25 | None => {println!("nothing", ); Ok(())}, 26 | Some("lr") => lin_reg::run(), 27 | Some("gp") => gaussian_process_reg::run(), 28 | Some("glms") => glms::run(), 29 | Some(_) => lin_reg::run(), 30 | }; 31 | // Putting the main code in another function serves two purposes: 32 | // 1. We can use the `?` operator. 33 | // 2. We can call exit safely, which does not run any destructors. 34 | exit(match res { 35 | Ok(_) => 0, 36 | Err(e) => { 37 | println!("{}", e); 38 | 1 39 | } 40 | }) 41 | } -------------------------------------------------------------------------------- /chapter3/reinforcement-learning-frozenlake/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "reinforcement-learning-frozenlake" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | rsrl = { git = "https://github.com/Ploppz/rsrl" } 9 | slog = "2.4.1" 10 | ndarray = "0.12.1" 11 | ndarray-linalg = "0.10" -------------------------------------------------------------------------------- /chapter3/reinforcement-learning-frozenlake/src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate rsrl; 2 | #[macro_use] 3 | extern crate slog; 4 | 5 | use rsrl::{ 6 | control::gtd::GreedyGQ, 7 | core::{make_shared, run, Evaluation, Parameter, SerialExperiment}, 8 | domains::{Domain, MountainCar}, 9 | fa::{basis::{Composable, fixed::Fourier}, LFA}, 10 | geometry::Space, 11 | logging, 12 | policies::fixed::{EpsilonGreedy, Greedy, Random}, 13 | }; 14 | 15 | fn main() { 16 | let logger = logging::root(logging::stdout()); 17 | 18 | let domain = MountainCar::default(); 19 | let mut agent = { 20 | let n_actions = domain.action_space().card().into(); 21 | 22 | // Build the linear value functions using a fourier basis projection. 23 | let bases = Fourier::from_space(3, domain.state_space()); 24 | let v_func = LFA::scalar(bases.clone()); 25 | let q_func = make_shared(LFA::vector(bases, n_actions)); 26 | 27 | // Build a stochastic behaviour policy with exponential epsilon. 28 | let policy = EpsilonGreedy::new( 29 | Greedy::new(q_func.clone()), 30 | Random::new(n_actions), 31 | Parameter::exponential(0.2, 0.0001, 0.99), 32 | ); 33 | 34 | GreedyGQ::new(q_func, v_func, policy, 0.01, 0.001, 0.99) 35 | }; 36 | 37 | let domain_builder = Box::new(MountainCar::default); 38 | 39 | // Training phase: 40 | let _training_result = { 41 | // Start a serial learning experiment up to 1000 steps per episode. 42 | let e = SerialExperiment::new(&mut agent, domain_builder.clone(), 1000); 43 | 44 | // Realise 1000 episodes of the experiment generator. 45 | run(e, 2000, Some(logger.clone())) 46 | }; 47 | 48 | // Testing phase: 49 | let testing_result = Evaluation::new(&mut agent, domain_builder).next().unwrap(); 50 | 51 | info!(logger, "solution"; testing_result); 52 | } -------------------------------------------------------------------------------- /chapter3/rsrl_custom/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rsrl_custom" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | rsrl = { git = "https://github.com/infinite-Joy/rsrl", branch = "mymodel" } 9 | slog = "2.4.1" 10 | ndarray = "0.12.0" -------------------------------------------------------------------------------- /chapter3/rusty_machine_unsupervised/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rusty_machine_unsupervised" 3 | version = "0.1.0" 4 | authors = ["joydeep bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | csv = "1.1.1" 9 | #rusty-machine="0.5.4" 10 | rusty-machine= { path = "../rusty-machine" } 11 | serde = "1" 12 | serde_derive = "1" 13 | rand = "0.7.0" 14 | ml-utils = { path = "../../chapter2/ml-utils" } -------------------------------------------------------------------------------- /chapter3/rusty_machine_unsupervised/data/iris.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,species 2 | 5.1,3.5,1.4,0.2,setosa 3 | 4.9,3.0,1.4,0.2,setosa 4 | 4.7,3.2,1.3,0.2,setosa 5 | 4.6,3.1,1.5,0.2,setosa 6 | 5.0,3.6,1.4,0.2,setosa 7 | 5.4,3.9,1.7,0.4,setosa 8 | 4.6,3.4,1.4,0.3,setosa 9 | 5.0,3.4,1.5,0.2,setosa 10 | 4.4,2.9,1.4,0.2,setosa 11 | 4.9,3.1,1.5,0.1,setosa 12 | 5.4,3.7,1.5,0.2,setosa 13 | 4.8,3.4,1.6,0.2,setosa 14 | 4.8,3.0,1.4,0.1,setosa 15 | 4.3,3.0,1.1,0.1,setosa 16 | 5.8,4.0,1.2,0.2,setosa 17 | 5.7,4.4,1.5,0.4,setosa 18 | 5.4,3.9,1.3,0.4,setosa 19 | 5.1,3.5,1.4,0.3,setosa 20 | 5.7,3.8,1.7,0.3,setosa 21 | 5.1,3.8,1.5,0.3,setosa 22 | 5.4,3.4,1.7,0.2,setosa 23 | 5.1,3.7,1.5,0.4,setosa 24 | 4.6,3.6,1.0,0.2,setosa 25 | 5.1,3.3,1.7,0.5,setosa 26 | 4.8,3.4,1.9,0.2,setosa 27 | 5.0,3.0,1.6,0.2,setosa 28 | 5.0,3.4,1.6,0.4,setosa 29 | 5.2,3.5,1.5,0.2,setosa 30 | 5.2,3.4,1.4,0.2,setosa 31 | 4.7,3.2,1.6,0.2,setosa 32 | 4.8,3.1,1.6,0.2,setosa 33 | 5.4,3.4,1.5,0.4,setosa 34 | 5.2,4.1,1.5,0.1,setosa 35 | 5.5,4.2,1.4,0.2,setosa 36 | 4.9,3.1,1.5,0.1,setosa 37 | 5.0,3.2,1.2,0.2,setosa 38 | 5.5,3.5,1.3,0.2,setosa 39 | 4.9,3.1,1.5,0.1,setosa 40 | 4.4,3.0,1.3,0.2,setosa 41 | 5.1,3.4,1.5,0.2,setosa 42 | 5.0,3.5,1.3,0.3,setosa 43 | 4.5,2.3,1.3,0.3,setosa 44 | 4.4,3.2,1.3,0.2,setosa 45 | 5.0,3.5,1.6,0.6,setosa 46 | 5.1,3.8,1.9,0.4,setosa 47 | 4.8,3.0,1.4,0.3,setosa 48 | 5.1,3.8,1.6,0.2,setosa 49 | 4.6,3.2,1.4,0.2,setosa 50 | 5.3,3.7,1.5,0.2,setosa 51 | 5.0,3.3,1.4,0.2,setosa 52 | 7.0,3.2,4.7,1.4,versicolor 53 | 6.4,3.2,4.5,1.5,versicolor 54 | 6.9,3.1,4.9,1.5,versicolor 55 | 5.5,2.3,4.0,1.3,versicolor 56 | 6.5,2.8,4.6,1.5,versicolor 57 | 5.7,2.8,4.5,1.3,versicolor 58 | 6.3,3.3,4.7,1.6,versicolor 59 | 4.9,2.4,3.3,1.0,versicolor 60 | 6.6,2.9,4.6,1.3,versicolor 61 | 5.2,2.7,3.9,1.4,versicolor 62 | 5.0,2.0,3.5,1.0,versicolor 63 | 5.9,3.0,4.2,1.5,versicolor 64 | 6.0,2.2,4.0,1.0,versicolor 65 | 6.1,2.9,4.7,1.4,versicolor 66 | 5.6,2.9,3.6,1.3,versicolor 67 | 6.7,3.1,4.4,1.4,versicolor 68 | 5.6,3.0,4.5,1.5,versicolor 69 | 5.8,2.7,4.1,1.0,versicolor 70 | 6.2,2.2,4.5,1.5,versicolor 71 | 5.6,2.5,3.9,1.1,versicolor 72 | 5.9,3.2,4.8,1.8,versicolor 73 | 6.1,2.8,4.0,1.3,versicolor 74 | 6.3,2.5,4.9,1.5,versicolor 75 | 6.1,2.8,4.7,1.2,versicolor 76 | 6.4,2.9,4.3,1.3,versicolor 77 | 6.6,3.0,4.4,1.4,versicolor 78 | 6.8,2.8,4.8,1.4,versicolor 79 | 6.7,3.0,5.0,1.7,versicolor 80 | 6.0,2.9,4.5,1.5,versicolor 81 | 5.7,2.6,3.5,1.0,versicolor 82 | 5.5,2.4,3.8,1.1,versicolor 83 | 5.5,2.4,3.7,1.0,versicolor 84 | 5.8,2.7,3.9,1.2,versicolor 85 | 6.0,2.7,5.1,1.6,versicolor 86 | 5.4,3.0,4.5,1.5,versicolor 87 | 6.0,3.4,4.5,1.6,versicolor 88 | 6.7,3.1,4.7,1.5,versicolor 89 | 6.3,2.3,4.4,1.3,versicolor 90 | 5.6,3.0,4.1,1.3,versicolor 91 | 5.5,2.5,4.0,1.3,versicolor 92 | 5.5,2.6,4.4,1.2,versicolor 93 | 6.1,3.0,4.6,1.4,versicolor 94 | 5.8,2.6,4.0,1.2,versicolor 95 | 5.0,2.3,3.3,1.0,versicolor 96 | 5.6,2.7,4.2,1.3,versicolor 97 | 5.7,3.0,4.2,1.2,versicolor 98 | 5.7,2.9,4.2,1.3,versicolor 99 | 6.2,2.9,4.3,1.3,versicolor 100 | 5.1,2.5,3.0,1.1,versicolor 101 | 5.7,2.8,4.1,1.3,versicolor 102 | 6.3,3.3,6.0,2.5,virginica 103 | 5.8,2.7,5.1,1.9,virginica 104 | 7.1,3.0,5.9,2.1,virginica 105 | 6.3,2.9,5.6,1.8,virginica 106 | 6.5,3.0,5.8,2.2,virginica 107 | 7.6,3.0,6.6,2.1,virginica 108 | 4.9,2.5,4.5,1.7,virginica 109 | 7.3,2.9,6.3,1.8,virginica 110 | 6.7,2.5,5.8,1.8,virginica 111 | 7.2,3.6,6.1,2.5,virginica 112 | 6.5,3.2,5.1,2.0,virginica 113 | 6.4,2.7,5.3,1.9,virginica 114 | 6.8,3.0,5.5,2.1,virginica 115 | 5.7,2.5,5.0,2.0,virginica 116 | 5.8,2.8,5.1,2.4,virginica 117 | 6.4,3.2,5.3,2.3,virginica 118 | 6.5,3.0,5.5,1.8,virginica 119 | 7.7,3.8,6.7,2.2,virginica 120 | 7.7,2.6,6.9,2.3,virginica 121 | 6.0,2.2,5.0,1.5,virginica 122 | 6.9,3.2,5.7,2.3,virginica 123 | 5.6,2.8,4.9,2.0,virginica 124 | 7.7,2.8,6.7,2.0,virginica 125 | 6.3,2.7,4.9,1.8,virginica 126 | 6.7,3.3,5.7,2.1,virginica 127 | 7.2,3.2,6.0,1.8,virginica 128 | 6.2,2.8,4.8,1.8,virginica 129 | 6.1,3.0,4.9,1.8,virginica 130 | 6.4,2.8,5.6,2.1,virginica 131 | 7.2,3.0,5.8,1.6,virginica 132 | 7.4,2.8,6.1,1.9,virginica 133 | 7.9,3.8,6.4,2.0,virginica 134 | 6.4,2.8,5.6,2.2,virginica 135 | 6.3,2.8,5.1,1.5,virginica 136 | 6.1,2.6,5.6,1.4,virginica 137 | 7.7,3.0,6.1,2.3,virginica 138 | 6.3,3.4,5.6,2.4,virginica 139 | 6.4,3.1,5.5,1.8,virginica 140 | 6.0,3.0,4.8,1.8,virginica 141 | 6.9,3.1,5.4,2.1,virginica 142 | 6.7,3.1,5.6,2.4,virginica 143 | 6.9,3.1,5.1,2.3,virginica 144 | 5.8,2.7,5.1,1.9,virginica 145 | 6.8,3.2,5.9,2.3,virginica 146 | 6.7,3.3,5.7,2.5,virginica 147 | 6.7,3.0,5.2,2.3,virginica 148 | 6.3,2.5,5.0,1.9,virginica 149 | 6.5,3.0,5.2,2.0,virginica 150 | 6.2,3.4,5.4,2.3,virginica 151 | 5.9,3.0,5.1,1.8,virginica 152 | -------------------------------------------------------------------------------- /chapter4/SQL_db/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "SQL_db" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | postgres = "0.15.2" 9 | -------------------------------------------------------------------------------- /chapter4/SQL_db/src/main.rs: -------------------------------------------------------------------------------- 1 | use postgres; 2 | use postgres::{Connection, TlsMode, Error}; 3 | 4 | #[derive(Debug)] 5 | struct Weather { 6 | id: i32, 7 | month: String, 8 | normal: f64, 9 | warmest: f64, 10 | coldest: f64 11 | } 12 | 13 | fn main() -> Result<(), Box> { 14 | let conn = Connection::connect("postgresql://postgres:postgres@localhost:5432/postgres", 15 | TlsMode::None)?; 16 | 17 | conn.execute("CREATE TABLE IF NOT EXISTS weather ( 18 | id SERIAL PRIMARY KEY, 19 | month VARCHAR NOT NULL, 20 | normal DOUBLE PRECISION NOT NULL, 21 | warmest DOUBLE PRECISION NOT NULL, 22 | coldest DOUBLE PRECISION NOT NULL 23 | )", &[])?; 24 | let weathers = vec![ 25 | ("January", 21.3, 27.3, 15.1), 26 | ("February", 23.6, 30.1, 17.0), 27 | ("March", 26.1, 32.7, 19.5), 28 | ("April", 28.0, 34.2, 21.8), 29 | ("May", 27.4, 33.2, 21.4), 30 | ("June", 24.6, 29.2, 20.1), 31 | ("July", 23.9, 28.1, 19.7), 32 | ("August", 23.5, 27.4, 19.5), 33 | ("September", 23.9, 28.2, 19.6), 34 | ("October", 23.7, 28.0, 19.3), 35 | ("November", 22.2, 27.0, 17.5), 36 | ("December", 21.1, 26.2, 16.0) 37 | ]; 38 | 39 | for weather in &weathers { 40 | conn.execute("INSERT INTO weather (month, normal, warmest, coldest) VALUES ($1, $2, $3, $4)", 41 | &[&weather.0, &weather.1, &weather.2, &weather.3])?; 42 | } 43 | 44 | for row in &conn.query("SELECT id, month, normal, warmest, coldest FROM weather", &[])? { 45 | let weather = Weather { 46 | id: row.get(0), 47 | month: row.get(1), 48 | normal: row.get(2), 49 | warmest: row.get(3), 50 | coldest: row.get(4) 51 | }; 52 | println!("{:?}", weather); 53 | } 54 | 55 | // get the average value 56 | for row in &conn.query("SELECT AVG(warmest) FROM weather;", &[])? { 57 | let x: f64 = row.get(0); 58 | println!("{:?}", x); 59 | } 60 | 61 | Ok(()) 62 | } -------------------------------------------------------------------------------- /chapter4/data_formats/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "data_formats" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | serde = "1.0.90" 9 | serde_derive = "1.0.90" 10 | serde_json = "1.0" 11 | serde-xml-rs = "0.3.1" 12 | csv = "1.0.7" -------------------------------------------------------------------------------- /chapter4/data_formats/data/sample_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Some <java> class 11 | 12 | 13 | Another "java" class 14 | 15 | 16 | Weird 'XML' config 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | JavaScript & program 27 | 28 | 29 | Cascading style sheet: © - ҉ 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /chapter4/data_formats/src/csvreading.rs: -------------------------------------------------------------------------------- 1 | use std::result::Result; 2 | use std::vec::Vec; 3 | use std::error::Error; 4 | use std::fs::File; 5 | use std::env; 6 | use std::ffi::OsString; 7 | 8 | use csv; 9 | use csv::ReaderBuilder; 10 | 11 | #[derive(Debug)] 12 | #[derive(Deserialize)] 13 | struct IrisRecord { 14 | sepal_length: f64, 15 | sepal_width: f64, 16 | petal_length: f64, 17 | petal_width: f64, 18 | species: String, 19 | } 20 | 21 | /// Returns the first positional argument sent to this process. If there are no 22 | /// positional arguments, then this returns an error. 23 | fn get_first_arg() -> Result> { 24 | match env::args_os().nth(2) { 25 | None => Err(From::from("expected 2 arguments, but got none")), 26 | Some(file_path) => Ok(file_path), 27 | } 28 | } 29 | pub fn run() -> Result<(), Box> { 30 | let file_path = get_first_arg()?; 31 | let file = File::open(file_path)?; 32 | let mut rdr = ReaderBuilder::new() 33 | .has_headers(true) 34 | .from_reader(file); 35 | let mut iris_matrix: Vec = vec![]; 36 | 37 | for result in rdr.deserialize() { 38 | let record: IrisRecord = result?; 39 | iris_matrix.push(record); 40 | } 41 | println!("{:#?}", iris_matrix); 42 | 43 | Ok(()) 44 | } -------------------------------------------------------------------------------- /chapter4/data_formats/src/jsonreading.rs: -------------------------------------------------------------------------------- 1 | use std::str::FromStr; 2 | use serde::{de, Deserialize, Deserializer}; 3 | use std::result::Result; 4 | use std::vec::Vec; 5 | use std::error::Error; 6 | use std::fs::File; 7 | 8 | #[derive(Debug, Serialize, Deserialize)] 9 | struct Prizes { 10 | prizes: Vec, 11 | } 12 | 13 | #[derive(Debug, Serialize, Deserialize)] 14 | #[allow(non_snake_case)] 15 | struct Prize { 16 | category: String, 17 | #[serde(default)] 18 | overallMotivation: Option, 19 | laureates: Vec, 20 | #[serde(deserialize_with = "de_u16_from_str")] 21 | year: u16, 22 | } 23 | 24 | #[derive(Debug, Serialize, Deserialize)] 25 | struct NobelLaureate { 26 | share: String, 27 | #[serde(default)] 28 | motivation: Option, 29 | surname: String, 30 | #[serde(deserialize_with = "de_u16_from_str")] 31 | id: u16, 32 | firstname: String, 33 | } 34 | 35 | fn de_u16_from_str<'a, D>(deserializer: D) -> Result 36 | where D: Deserializer<'a> 37 | { 38 | let s = String::deserialize(deserializer)?; 39 | u16::from_str(&s).map_err(de::Error::custom) 40 | } 41 | 42 | pub fn run() -> Result<(), Box> { 43 | let the_file = r#"{ 44 | "FirstName": "John", 45 | "LastName": "Doe", 46 | "Age": 43, 47 | "Address": { 48 | "Street": "Downing Street 10", 49 | "City": "London", 50 | "Country": "Great Britain" 51 | }, 52 | "PhoneNumbers": [ 53 | "+44 1234567", 54 | "+44 2345678" 55 | ] 56 | }"#; 57 | // random json string 58 | let person: serde_json::Value = serde_json::from_str(the_file).expect("JSON was not well-formatted"); 59 | let address = person.get("Address").unwrap(); 60 | println!("{:?}", address.get("City").unwrap()); 61 | 62 | println!("from prizes json file", ); 63 | let file = File::open("data/prize.json") 64 | .expect("file should open read only"); 65 | let prizes_data: Prizes = serde_json::from_reader(file) 66 | .expect("file should be proper JSON"); 67 | let prizes_0 = &prizes_data.prizes[0]; 68 | println!("category: {:?}", prizes_0.category); 69 | println!("laureates: {:?}", prizes_0.laureates); 70 | println!("overall motivation: {:?}", prizes_0.overallMotivation); 71 | println!("year: {:?}", prizes_0.year); 72 | 73 | Ok(()) 74 | } 75 | -------------------------------------------------------------------------------- /chapter4/data_formats/src/main.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate serde_derive; 3 | 4 | extern crate serde; 5 | extern crate serde_json; 6 | extern crate serde_xml_rs; 7 | 8 | use std::vec::Vec; 9 | use std::process::exit; 10 | use std::env::args; 11 | 12 | mod jsonreading; 13 | mod xmlreading; 14 | mod csvreading; 15 | 16 | fn main() { 17 | let args: Vec = args().collect(); 18 | let model = if args.len() < 2 { 19 | None 20 | } else { 21 | Some(args[1].as_str()) 22 | }; 23 | let res = match model { 24 | None => {println!("nothing", ); Ok(())}, 25 | Some("json") => jsonreading::run(), 26 | Some("xml") => xmlreading::run(), 27 | Some(_) => csvreading::run(), 28 | }; 29 | // Putting the main code in another function serves two purposes: 30 | // 1. We can use the `?` operator. 31 | // 2. We can call exit safely, which does not run any destructors. 32 | exit(match res { 33 | Ok(_) => 0, 34 | Err(e) => { 35 | println!("{}", e); 36 | 1 37 | } 38 | }) 39 | } -------------------------------------------------------------------------------- /chapter4/data_formats/src/xmlreading.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std::result::Result; 3 | use std::error::Error; 4 | 5 | use serde_xml_rs; 6 | use serde_xml_rs::from_reader; 7 | 8 | #[derive(Deserialize, Debug)] 9 | struct Project { 10 | name: String, 11 | libraries: Vec, 12 | module: Vec, 13 | } 14 | 15 | #[derive(Deserialize, Debug)] 16 | struct Module { 17 | files: Vec, 18 | #[serde(default)] 19 | libraries: Vec, 20 | } 21 | 22 | #[derive(Deserialize, Debug)] 23 | struct Files { 24 | file: Vec, 25 | } 26 | 27 | #[derive(Deserialize, Debug)] 28 | struct FileName { 29 | name: String, 30 | #[serde(rename = "type")] 31 | lang: String, 32 | #[serde(rename = "$value")] 33 | body: String, 34 | } 35 | 36 | #[derive(Deserialize, Debug)] 37 | struct Libraries { 38 | library: Vec, 39 | } 40 | 41 | #[derive(Deserialize, Debug)] 42 | struct Library { 43 | #[serde(rename = "groupId")] 44 | group_id: String, 45 | #[serde(rename = "artifactId")] 46 | artifact_id: String, 47 | version: String, 48 | } 49 | 50 | pub fn run() -> Result<(), Box> { 51 | let file = File::open("data/sample_2.xml").unwrap(); 52 | let project: Project = from_reader(file).unwrap(); 53 | println!("{:#?}", project.libraries[0].library[0]); 54 | println!("{:#?}", project); 55 | Ok(()) 56 | } 57 | -------------------------------------------------------------------------------- /chapter4/data_transformations_datafusion/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "data_transformations_datafusion" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | datafusion = "0.13.0" 9 | arrow = "0.13.0" -------------------------------------------------------------------------------- /chapter4/data_transformations_datafusion/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use arrow; 4 | use datafusion; 5 | use arrow::array::{BinaryArray, Float64Array, UInt16Array, ListArray}; 6 | use arrow::datatypes::{DataType, Field, Schema}; 7 | 8 | use datafusion::execution::context::ExecutionContext; 9 | 10 | fn main() { 11 | // create local execution context 12 | let mut ctx = ExecutionContext::new(); 13 | 14 | // define schema for data source (csv file) 15 | let schema = Arc::new(Schema::new(vec![ 16 | Field::new("PassengerId", DataType::Int32, false), 17 | Field::new("Survived", DataType::Int32, false), 18 | Field::new("Pclass", DataType::Int32, false), 19 | Field::new("Name", DataType::Utf8, false), 20 | Field::new("Sex", DataType::Utf8, false), 21 | Field::new("Age", DataType::Int32, true), 22 | Field::new("SibSp", DataType::Int32, false), 23 | Field::new("Parch", DataType::Int32, false), 24 | Field::new("Ticket", DataType::Utf8, false), 25 | Field::new("Fare", DataType::Float64, false), 26 | Field::new("Cabin", DataType::Utf8, true), 27 | Field::new("Embarked", DataType::Utf8, false), 28 | ])); 29 | 30 | // register csv file with the execution context 31 | ctx.register_csv( 32 | "titanic", 33 | "titanic/train.csv", 34 | &schema, 35 | true, 36 | ); 37 | 38 | // simple projection and selection 39 | let sql = "SELECT Name, Sex FROM titanic WHERE Fare > 8"; 40 | let sql1 = "SELECT MAX(Fare) FROM titanic WHERE Survived = 1"; 41 | 42 | // execute the query 43 | let relation = ctx.sql(&sql, 1024 * 1024).unwrap(); 44 | let relation1 = ctx.sql(&sql1, 1024 * 1024).unwrap(); 45 | 46 | // display the relation 47 | let mut results = relation.borrow_mut(); 48 | let mut results1 = relation1.borrow_mut(); 49 | 50 | while let Some(batch) = results.next().unwrap() { 51 | println!( 52 | "RecordBatch has {} rows and {} columns", 53 | batch.num_rows(), 54 | batch.num_columns() 55 | ); 56 | 57 | let name = batch 58 | .column(0) 59 | .as_any() 60 | .downcast_ref::() 61 | .unwrap(); 62 | 63 | let sex = batch 64 | .column(1) 65 | .as_any() 66 | // .downcast_ref::() 67 | .downcast_ref::() 68 | .unwrap(); 69 | 70 | for i in 0..batch.num_rows() { 71 | let name_value: String = String::from_utf8(name.value(i).to_vec()).unwrap(); 72 | let sex_value: String = String::from_utf8(sex.value(i).to_vec()).unwrap(); 73 | 74 | println!("name: {}, sex: {}", name_value, sex_value,); 75 | } 76 | } 77 | while let Some(batch) = results1.next().unwrap() { 78 | println!( 79 | "RecordBatch has {} rows and {} columns", 80 | batch.num_rows(), 81 | batch.num_columns() 82 | ); 83 | 84 | let name = batch 85 | .column(0) 86 | .as_any() 87 | .downcast_ref::() 88 | .unwrap(); 89 | 90 | for i in 0..batch.num_rows() { 91 | let name_value: f64 = name.value(i); 92 | 93 | println!("name: {}", name_value,); 94 | } 95 | } 96 | println!("Hello, world!"); 97 | } -------------------------------------------------------------------------------- /chapter4/databases/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "databases" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | postgres = "0.15.2" 9 | rusted_cypher = "1.1.0" -------------------------------------------------------------------------------- /chapter4/databases/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::vec::Vec; 2 | use std::process::exit; 3 | use std::env::args; 4 | 5 | mod neo4j_db; 6 | mod postgres_db; 7 | 8 | fn main() { 9 | let args: Vec = args().collect(); 10 | let model = if args.len() < 2 { 11 | None 12 | } else { 13 | Some(args[1].as_str()) 14 | }; 15 | let res = match model { 16 | None => {println!("nothing", ); Ok(())}, 17 | Some("postgres") => postgres_db::run(), 18 | Some("neo4j") => neo4j_db::run(), 19 | Some(_) => {println!("nothing", ); Ok(())}, 20 | }; 21 | // Putting the main code in another function serves two purposes: 22 | // 1. We can use the `?` operator. 23 | // 2. We can call exit safely, which does not run any destructors. 24 | exit(match res { 25 | Ok(_) => 0, 26 | Err(e) => { 27 | println!("{}", e); 28 | 1 29 | } 30 | }) 31 | } -------------------------------------------------------------------------------- /chapter4/databases/src/neo4j_db.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | 3 | use rusted_cypher; 4 | use rusted_cypher::{GraphClient, Statement}; 5 | use std::iter::repeat; 6 | 7 | pub fn run() -> Result<(), Box> { 8 | // let graph = GraphClient::connect( 9 | // "http://neo4j:neo4j@localhost:7474/db/data"); 10 | let graph = GraphClient::connect( 11 | "http://localhost:7474/db/data")?; 12 | 13 | let mut query = graph.query(); 14 | 15 | // create index 16 | let statement1 = Statement::new( 17 | "CREATE CONSTRAINT ON (m:Movie) ASSERT m.id IS UNIQUE;"); 18 | let statement2 = Statement::new( 19 | " CREATE CONSTRAINT ON (u:User) ASSERT u.id IS UNIQUE;" 20 | ); 21 | let statement3 = Statement::new( 22 | " CREATE CONSTRAINT ON (g:Genre) ASSERT g.name IS UNIQUE;" 23 | ); 24 | 25 | query.add_statement(statement1); 26 | query.add_statement(statement2); 27 | query.add_statement(statement3); 28 | 29 | query.send()?; 30 | 31 | // import movies.csv 32 | graph.exec( 33 | "USING PERIODIC COMMIT LOAD CSV WITH HEADERS \ 34 | FROM \"http://10.0.1.43:8000/movies.csv\" AS line \ 35 | WITH line, SPLIT(line.genres, \"|\") AS Genres \ 36 | CREATE (m:Movie { id: TOINTEGER(line.`movieId`), title: line.`title` }) \ 37 | WITH Genres \ 38 | UNWIND RANGE(0, SIZE(Genres)-1) as i \ 39 | MERGE (g:Genre {name: UPPER(Genres[i])}) \ 40 | CREATE (m)-[r:GENRE {position:i+1}]->(g);" 41 | )?; 42 | 43 | // import ratings.csv 44 | graph.exec( 45 | " USING PERIODIC COMMIT LOAD CSV WITH HEADERS \ 46 | FROM \"http://10.0.1.43:8000/ratings.csv\" AS line \ 47 | WITH line \ 48 | MATCH (m:Movie { id: TOINTEGER(line.`movieId`) }) \ 49 | MATCH (u:User { id: TOINTEGER(line.`userId`) }) \ 50 | CREATE (u)-[r:RATING {rating: TOFLOAT(line.`rating`)}]->(m);" 51 | )?; 52 | 53 | // import tags 54 | graph.exec( 55 | " USING PERIODIC COMMIT LOAD CSV WITH HEADERS \ 56 | FROM \"http://10.0.1.43:8000/tags.csv\" AS line \ 57 | WITH line \ 58 | MATCH (m:Movie { id: TOINTEGER(line.`movieId`) }) \ 59 | MERGE (u:User { id: TOINTEGER(line.`userId`) }) \ 60 | CREATE (u)-[r:TAG {tag: line.`tag`}]->(m);" 61 | )?; 62 | 63 | let result = graph.exec( 64 | "MATCH (u:User {id: 119}) RETURN u.id")?; 65 | 66 | assert_eq!(result.data.len(), 1); 67 | 68 | for row in result.rows() { 69 | let id: u16 = row.get("u.id")?; 70 | println!("user id: {}", id); 71 | } 72 | 73 | // understand the shortest paths between all 74 | 75 | let all_users = graph.exec( 76 | "MATCH (u:User) RETURN u.id")?; 77 | let all_users: Vec = all_users.rows().map(|x| x.get("u.id").unwrap()).collect(); 78 | 79 | let mut length_of_paths = vec![]; 80 | for (u1, u2) in all_users.iter() 81 | .enumerate() 82 | .flat_map(|(i, val)| repeat(val).zip(all_users.iter().skip(i + 1))) { 83 | let statement = format!( 84 | "MATCH (n:User) where n.id IN [{user1}, {user2}] 85 | WITH collect(n) as nodes 86 | UNWIND nodes as n 87 | UNWIND nodes as m 88 | WITH * WHERE id(n) < id(m) 89 | MATCH path = allShortestPaths( (n)-[*..4]-(m) ) 90 | RETURN min(length(path))", user1=u1, user2=u2); 91 | let min_paths = graph.exec(statement)?; 92 | let min_paths: Vec> = min_paths.rows().map(|x| x.get("min(length(path))").unwrap()).collect(); 93 | match min_paths[0] { 94 | Some(mp) => {length_of_paths.push((u1, u2, mp)); ()}, 95 | _ => (), 96 | }; 97 | } 98 | println!("{:?}", length_of_paths); 99 | 100 | Ok(()) 101 | } -------------------------------------------------------------------------------- /chapter4/databases/src/postgres_db.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | 3 | use postgres; 4 | use postgres::{Connection, TlsMode}; 5 | 6 | #[derive(Debug)] 7 | struct Weather { 8 | id: i32, 9 | month: String, 10 | normal: f64, 11 | warmest: f64, 12 | coldest: f64 13 | } 14 | pub fn run() -> Result<(), Box> { 15 | let conn = Connection::connect("postgresql://postgres:postgres@localhost:5432/postgres", 16 | TlsMode::None)?; 17 | 18 | conn.execute("CREATE TABLE IF NOT EXISTS weather ( 19 | id SERIAL PRIMARY KEY, 20 | month VARCHAR NOT NULL, 21 | normal DOUBLE PRECISION NOT NULL, 22 | warmest DOUBLE PRECISION NOT NULL, 23 | coldest DOUBLE PRECISION NOT NULL 24 | )", &[])?; 25 | let weathers = vec![ 26 | ("January", 21.3, 27.3, 15.1), 27 | ("February", 23.6, 30.1, 17.0), 28 | ("March", 26.1, 32.7, 19.5), 29 | ("April", 28.0, 34.2, 21.8), 30 | ("May", 27.4, 33.2, 21.4), 31 | ("June", 24.6, 29.2, 20.1), 32 | ("July", 23.9, 28.1, 19.7), 33 | ("August", 23.5, 27.4, 19.5), 34 | ("September", 23.9, 28.2, 19.6), 35 | ("October", 23.7, 28.0, 19.3), 36 | ("November", 22.2, 27.0, 17.5), 37 | ("December", 21.1, 26.2, 16.0) 38 | ]; 39 | 40 | for weather in &weathers { 41 | conn.execute("INSERT INTO weather (month, normal, warmest, coldest) VALUES ($1, $2, $3, $4)", 42 | &[&weather.0, &weather.1, &weather.2, &weather.3])?; 43 | } 44 | 45 | for row in &conn.query("SELECT id, month, normal, warmest, coldest FROM weather", &[])? { 46 | let weather = Weather { 47 | id: row.get(0), 48 | month: row.get(1), 49 | normal: row.get(2), 50 | warmest: row.get(3), 51 | coldest: row.get(4) 52 | }; 53 | println!("{:?}", weather); 54 | } 55 | 56 | // get the average value 57 | for row in &conn.query("SELECT AVG(warmest) FROM weather;", &[])? { 58 | let x: f64 = row.get(0); 59 | println!("{:?}", x); 60 | } 61 | 62 | Ok(()) 63 | } -------------------------------------------------------------------------------- /chapter4/graph_db/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "graph_db" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | rusted_cypher = "1.1.0" 9 | -------------------------------------------------------------------------------- /chapter4/graph_db/src/main.rs: -------------------------------------------------------------------------------- 1 | use rusted_cypher; 2 | use rusted_cypher::{GraphClient, Statement, GraphError}; 3 | use std::iter::repeat; 4 | 5 | fn main() -> Result<(), Box> { 6 | // let graph = GraphClient::connect( 7 | // "http://neo4j:neo4j@localhost:7474/db/data"); 8 | let graph = GraphClient::connect( 9 | "http://localhost:7474/db/data")?; 10 | 11 | let mut query = graph.query(); 12 | 13 | // create index 14 | let statement1 = Statement::new( 15 | "CREATE CONSTRAINT ON (m:Movie) ASSERT m.id IS UNIQUE;"); 16 | let statement2 = Statement::new( 17 | " CREATE CONSTRAINT ON (u:User) ASSERT u.id IS UNIQUE;" 18 | ); 19 | let statement3 = Statement::new( 20 | " CREATE CONSTRAINT ON (g:Genre) ASSERT g.name IS UNIQUE;" 21 | ); 22 | 23 | query.add_statement(statement1); 24 | query.add_statement(statement2); 25 | query.add_statement(statement3); 26 | 27 | query.send()?; 28 | 29 | // import movies.csv 30 | graph.exec( 31 | "USING PERIODIC COMMIT LOAD CSV WITH HEADERS \ 32 | FROM \"http://10.0.1.43:8000/movies.csv\" AS line \ 33 | WITH line, SPLIT(line.genres, \"|\") AS Genres \ 34 | CREATE (m:Movie { id: TOINTEGER(line.`movieId`), title: line.`title` }) \ 35 | WITH Genres \ 36 | UNWIND RANGE(0, SIZE(Genres)-1) as i \ 37 | MERGE (g:Genre {name: UPPER(Genres[i])}) \ 38 | CREATE (m)-[r:GENRE {position:i+1}]->(g);" 39 | )?; 40 | 41 | // import ratings.csv 42 | graph.exec( 43 | " USING PERIODIC COMMIT LOAD CSV WITH HEADERS \ 44 | FROM \"http://10.0.1.43:8000/ratings.csv\" AS line \ 45 | WITH line \ 46 | MATCH (m:Movie { id: TOINTEGER(line.`movieId`) }) \ 47 | MATCH (u:User { id: TOINTEGER(line.`userId`) }) \ 48 | CREATE (u)-[r:RATING {rating: TOFLOAT(line.`rating`)}]->(m);" 49 | )?; 50 | 51 | // import tags 52 | graph.exec( 53 | " USING PERIODIC COMMIT LOAD CSV WITH HEADERS \ 54 | FROM \"http://10.0.1.43:8000/tags.csv\" AS line \ 55 | WITH line \ 56 | MATCH (m:Movie { id: TOINTEGER(line.`movieId`) }) \ 57 | MERGE (u:User { id: TOINTEGER(line.`userId`) }) \ 58 | CREATE (u)-[r:TAG {tag: line.`tag`}]->(m);" 59 | )?; 60 | 61 | let result = graph.exec( 62 | "MATCH (u:User {id: 119}) RETURN u.id")?; 63 | 64 | assert_eq!(result.data.len(), 1); 65 | 66 | for row in result.rows() { 67 | let id: u16 = row.get("u.id")?; 68 | println!("user id: {}", id); 69 | } 70 | 71 | // understand the shortest paths between all 72 | 73 | let all_users = graph.exec( 74 | "MATCH (u:User) RETURN u.id")?; 75 | let all_users: Vec = all_users.rows().map(|x| x.get("u.id").unwrap()).collect(); 76 | 77 | let mut length_of_paths = vec![]; 78 | for (u1, u2) in all_users.iter() 79 | .enumerate() 80 | .flat_map(|(i, val)| repeat(val).zip(all_users.iter().skip(i + 1))) { 81 | let statement = format!( 82 | "MATCH (n:User) where n.id IN [{user1}, {user2}] 83 | WITH collect(n) as nodes 84 | UNWIND nodes as n 85 | UNWIND nodes as m 86 | WITH * WHERE id(n) < id(m) 87 | MATCH path = allShortestPaths( (n)-[*..4]-(m) ) 88 | RETURN min(length(path))", user1=u1, user2=u2); 89 | let min_paths = graph.exec(statement)?; 90 | let min_paths: Vec> = min_paths.rows().map(|x| x.get("min(length(path))").unwrap()).collect(); 91 | match min_paths[0] { 92 | Some(mp) => {length_of_paths.push((u1, u2, mp)); ()}, 93 | _ => (), 94 | }; 95 | } 96 | println!("{:?}", length_of_paths); 97 | 98 | Ok(()) 99 | } -------------------------------------------------------------------------------- /chapter4/matrix_transformations/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "matrix_transformations" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | ndarray-rand = "0.9.0" 9 | ndarray = "0.12.1" -------------------------------------------------------------------------------- /chapter4/matrix_transformations/src/main.rs: -------------------------------------------------------------------------------- 1 | use ndarray; 2 | use ndarray::prelude::*; 3 | use ndarray::stack; 4 | 5 | fn main() { 6 | let a1 = arr2(&[[0., 1., 2.], 7 | [3., 4., 5.]]); 8 | println!("{:?}", a1); 9 | println!("------------------------", ); 10 | 11 | let a2 = Array::from_shape_vec((2, 3).strides((3, 1)), 12 | vec![0., 1., 2., 3., 4., 5.]).unwrap(); 13 | assert!(a1 == a2); 14 | 15 | let a_T = a1.t(); 16 | println!("transposed matrix:"); 17 | println!("{:?}", a_T); 18 | println!("------------------------", ); 19 | 20 | let a_mm = a1.dot(&a_T); 21 | println!("dot product:"); 22 | println!("{:?}", a_mm); 23 | println!("{:?}", a_mm.shape()); // output [2, 2] 24 | println!("------------------------", ); 25 | } 26 | -------------------------------------------------------------------------------- /chapter4/s3_files/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "s3_files" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | rusoto_s3 = "0.38.0" 9 | rusoto_core = "0.38.0" 10 | env_logger = "0.6.1" 11 | futures = "0.1.26" 12 | futures-fs = "0.0.5" 13 | rand = "0.6.5" 14 | csv = "1.0.7" 15 | ml-utils = { path = "../../ml-utils" } 16 | rustlearn = "0.5.0" -------------------------------------------------------------------------------- /chapter4/scraping/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "scraping" 3 | version = "0.1.0" 4 | authors = ["joydeep bhattacharjee"] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | reqwest = "0.9.15" 9 | scraper = "0.10.0" -------------------------------------------------------------------------------- /chapter4/scraping/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::time::{SystemTime, UNIX_EPOCH}; 2 | use reqwest; 3 | use scraper::{Selector, Html}; 4 | 5 | 6 | fn main() -> Result<(), Box> { 7 | let start = SystemTime::now(); 8 | let since_the_epoch = start.duration_since(UNIX_EPOCH) 9 | .expect("Time went backwards"); 10 | 11 | let mut resp = reqwest::get( 12 | "https://www.moneycontrol.com/india/stockpricequote/power-generation-distribution/ntpc/NTP")?; 13 | assert!(resp.status().is_success()); 14 | 15 | let body = resp.text().unwrap(); 16 | let fragment = Html::parse_document(&body); 17 | // let stories = Selector::parse("#Bse_Prc_tick > strong:nth-child(1)").unwrap(); 18 | let stories = Selector::parse("#mktdet_2 > div:nth-child(2) > div:nth-child(3) > div.FR.gD_12").unwrap(); 19 | 20 | for price in fragment.select(&stories) { 21 | let price_txt = price.text().collect::>(); 22 | if price_txt.len() == 1 { 23 | println!("{:?}", (since_the_epoch, price_txt[0])); 24 | } 25 | } 26 | 27 | Ok(()) 28 | } 29 | 30 | -------------------------------------------------------------------------------- /chapter5/crfsuite-model/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crfsuite-model" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | # data reading and organisation 9 | csv = "1.0.7" 10 | serde = "1" 11 | serde_derive = "1" 12 | rand = "0.6.5" 13 | 14 | # machine learning 15 | crfsuite = "0.2.6" -------------------------------------------------------------------------------- /chapter5/crfsuite-model/src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate serde; 2 | #[macro_use] 3 | extern crate serde_derive; 4 | 5 | use std::io; 6 | use std::vec::Vec; 7 | use std::error::Error; 8 | 9 | use csv; 10 | use rand; 11 | use rand::thread_rng; 12 | use rand::seq::SliceRandom; 13 | 14 | use crfsuite::{Model, Attribute, CrfError}; 15 | use crfsuite::{Trainer, Algorithm, GraphicalModel}; 16 | 17 | #[derive(Debug, Deserialize, Clone)] 18 | pub struct NER { 19 | // #[serde(rename = "")] 20 | // id: String, 21 | lemma: String, 22 | #[serde(rename = "next-lemma")] 23 | next_lemma: String, 24 | // next-next-lemma: String, 25 | // next-next-pos: String, 26 | // next-next-shape: String, 27 | // next-next-word: String, 28 | // next-pos: String, 29 | // next-shape: String, 30 | // next-word: String, 31 | // pos: String, 32 | // prev-iob: String, 33 | // prev-lemma: String, 34 | // prev-pos: String, 35 | // prev-prev-iob: String, 36 | // prev-prev-lemma: String, 37 | // prev-prev-pos: String, 38 | // prev-prev-shape: String, 39 | // prev-prev-word: String, 40 | // prev-shape: String, 41 | // prev-word: String, 42 | // sentence_idx: String, 43 | // shape: String, 44 | word: String, 45 | tag: String 46 | } 47 | 48 | fn get_data() -> Result, Box> { 49 | let mut rdr = csv::Reader::from_reader(io::stdin()); 50 | let mut data = Vec::new(); 51 | for result in rdr.deserialize() { 52 | let r: NER = result?; 53 | data.push(r); 54 | } 55 | // println!("{:?}", data.len()); 56 | data.shuffle(&mut thread_rng()); 57 | Ok(data) 58 | } 59 | 60 | fn split_test_train(data: &[NER], test_size: f32) -> (Vec, Vec) { 61 | let test_size: f32 = data.len() as f32 * test_size; 62 | let test_size = test_size.round() as usize; 63 | 64 | let (test_data, train_data) = data.split_at(test_size); 65 | (test_data.to_vec(), train_data.to_vec()) 66 | } 67 | 68 | fn create_xseq_yseq(data: &[NER]) 69 | -> (Vec>, Vec) { 70 | let mut xseq = vec![]; 71 | let mut yseq = vec![]; 72 | for item in data { 73 | let seq = vec![Attribute::new(item.lemma.clone(), 1.0), 74 | Attribute::new(item.next_lemma.clone(), 0.5)]; // higher weightage for the mainword. 75 | xseq.push(seq); 76 | yseq.push(item.tag.clone()); 77 | } 78 | (xseq, yseq) 79 | } 80 | 81 | fn check_accuracy(preds: &[String], actual: &[String]) { 82 | let mut hits = 0; 83 | let mut correct_hits = 0; 84 | for (predicted, actual) in preds.iter().zip(actual) { 85 | if actual != "O" { // will not consider the other category as it bloats the accuracy. 86 | if predicted == actual && actual != "O" { 87 | correct_hits += 1; 88 | } 89 | hits += 1; 90 | } 91 | } 92 | println!("accuracy={} ({}/{} correct)", 93 | correct_hits as f32 / hits as f32, 94 | correct_hits, 95 | hits); 96 | } 97 | 98 | fn crfmodel_training(xseq: Vec>, 99 | yseq: Vec, 100 | model_name: &str) -> Result<(), Box> { 101 | let mut trainer = Trainer::new(true); 102 | trainer.select(Algorithm::AROW, GraphicalModel::CRF1D)?; 103 | trainer.append(&xseq, &yseq, 0i32)?; 104 | trainer.train(model_name, -1i32)?; // using all instances for training. 105 | Ok(()) 106 | } 107 | 108 | fn model_prediction(xtest: Vec>, 109 | model_name: &str) 110 | -> Result, Box>{ 111 | let model = Model::from_file(model_name)?; 112 | let mut tagger = model.tagger()?; 113 | let preds = tagger.tag(&xtest)?; 114 | Ok(preds) 115 | } 116 | 117 | fn main() { 118 | let data = get_data().unwrap(); 119 | let (test_data, train_data) = split_test_train(&data, 0.2); 120 | let (xseq_train, yseq_train) = create_xseq_yseq(&train_data); 121 | let (xseq_test, yseq_test) = create_xseq_yseq(&test_data); 122 | crfmodel_training(xseq_train, yseq_train, "rustml.crfsuite").unwrap(); 123 | let preds = model_prediction(xseq_test, "rustml.crfsuite").unwrap(); 124 | check_accuracy(&preds, &yseq_test); 125 | } 126 | -------------------------------------------------------------------------------- /chapter5/fasttext-model/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fasttext-model" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | # data reading and organisation 9 | csv = "1.0.7" 10 | serde = "1" 11 | serde_derive = "1" 12 | rand = "0.6.5" 13 | 14 | # machine learning 15 | fasttext = "0.4.1" 16 | 17 | # text normalisation 18 | stopwords = "0.1.0" 19 | vtext = "0.1.0-alpha.1" 20 | rust-stemmers = "1.1.0" 21 | 22 | # others 23 | itertools = "0.8.0" -------------------------------------------------------------------------------- /chapter5/fasttext-model/src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate serde; 2 | // This lets us write `#[derive(Deserialize)]`. 3 | #[macro_use] 4 | extern crate serde_derive; 5 | 6 | use std::io; 7 | use std::vec::Vec; 8 | use std::error::Error; 9 | use std::io::Write; 10 | use std::fs::File; 11 | 12 | use csv; 13 | use rand; 14 | use rand::thread_rng; 15 | use rand::seq::SliceRandom; 16 | 17 | use fasttext::{FastText, Args, ModelName, LossName}; 18 | use stopwords; 19 | use std::collections::HashSet; 20 | use stopwords::{Spark, Language, Stopwords}; 21 | use itertools::Itertools; 22 | use vtext::tokenize::VTextTokenizer; 23 | use rust_stemmers::{Algorithm, Stemmer}; 24 | 25 | const TRAIN_FILE: &str = "data.train"; 26 | const TEST_FILE: &str = "data.test"; 27 | const MODEL: &str = "model.bin"; 28 | 29 | #[derive(Debug, Deserialize)] 30 | pub struct SpookyAuthor { 31 | id: String, 32 | text: String, 33 | author: String 34 | } 35 | 36 | impl SpookyAuthor { 37 | pub fn into_tokens(&self) -> String { 38 | // convert all to lowercase 39 | let lc_text = self.text.to_lowercase(); 40 | 41 | // tokenise the words 42 | let tok = VTextTokenizer::new("en"); 43 | let tokens: Vec<&str> = tok.tokenize(lc_text.as_str()).collect(); 44 | 45 | // stem the words 46 | let en_stemmer = Stemmer::create(Algorithm::English); 47 | let tokens: Vec = tokens.iter().map(|x| en_stemmer.stem(x).into_owned()).collect(); 48 | let mut tokens: Vec<&str> = tokens.iter().map(|x| x.as_str()).collect(); 49 | 50 | // remove the stopwords 51 | let stops: HashSet<_> = Spark::stopwords(Language::English) 52 | .unwrap().iter().collect(); 53 | tokens.retain(|s| !stops.contains(s)); 54 | 55 | // join the tokens and return 56 | tokens.iter().join(" ") 57 | } 58 | 59 | fn into_labels(&self) -> String { 60 | match self.author.as_str() { 61 | "EAP" => String::from("__label__EAP"), 62 | "HPL" => String::from("__label__HPL"), 63 | "MWS" => String::from("__label__MWS"), 64 | l => panic!("Not able to parse the target. Some other target got passed. {:?}", l), 65 | } 66 | } 67 | } 68 | 69 | fn push_training_data_to_file(train_data: &[SpookyAuthor], filename: &str) -> Result<(), Box> { 70 | let mut f = File::create(filename)?; 71 | for item in train_data { 72 | writeln!(f, "{} {}", item.into_labels(), item.into_tokens())?; 73 | } 74 | Ok(()) 75 | } 76 | 77 | fn push_test_data_to_file(test_data: &[SpookyAuthor], filename: &str) -> Result<(), Box> { 78 | let mut f = File::create(filename)?; 79 | for item in test_data { 80 | writeln!(f, "{}", item.into_tokens())?; 81 | } 82 | Ok(()) 83 | } 84 | 85 | fn main() -> Result<(), Box> { 86 | let mut rdr = csv::Reader::from_reader(io::stdin()); 87 | let mut data = Vec::new(); 88 | for result in rdr.deserialize() { 89 | let r: SpookyAuthor = result?; 90 | data.push(r); 91 | } 92 | data.shuffle(&mut thread_rng()); 93 | 94 | // separate out to train and test datasets. 95 | let test_size: f32 = 0.2; 96 | let test_size: f32 = data.len() as f32 * test_size; 97 | let test_size = test_size.round() as usize; 98 | 99 | let (test_data, train_data) = data.split_at(test_size); 100 | push_training_data_to_file(train_data.to_owned(), TRAIN_FILE)?; 101 | push_test_data_to_file(test_data.to_owned(), TEST_FILE)?; 102 | 103 | // model initiation and training 104 | let mut args = Args::new(); 105 | args.set_input(TRAIN_FILE); 106 | args.set_model(ModelName::SUP); 107 | args.set_loss(LossName::SOFTMAX); 108 | let mut ft_model = FastText::new(); 109 | ft_model.train(&args).unwrap(); 110 | 111 | // accuracy 112 | let preds = test_data.iter().map(|x| ft_model.predict(x.text.as_str(), 1, 0.0)); 113 | let test_labels = test_data.iter().map(|x| x.into_labels()); 114 | let mut hits = 0; 115 | let mut correct_hits = 0; 116 | let preds_clone = preds.clone(); 117 | for (predicted, actual) in preds.zip(test_labels) { 118 | let predicted = predicted?; 119 | let predicted = &predicted[0]; // only taking the first value. 120 | if predicted.clone().label == actual { 121 | correct_hits += 1; 122 | } 123 | hits += 1; 124 | } 125 | assert_eq!(hits, preds_clone.len()); 126 | println!("accuracy={} ({}/{} correct)", correct_hits as f32 / hits as f32, correct_hits, preds_clone.len()); 127 | ft_model.save_model(MODEL)?; 128 | 129 | Ok(()) 130 | } 131 | 132 | #[cfg(test)] 133 | mod tests { 134 | use super::*; 135 | use csv; 136 | 137 | #[test] 138 | fn test_spooky_author() { 139 | let data = "\"id\",\"text\",\"author\"\n\"id26305\",\"This process, however, afforded me no means of ascertaining the dimensions of my dungeon; as I might make its circuit, and return to the point whence I set out, without being aware of the fact; so perfectly uniform seemed the wall.\",\"EAP\"\n\"id17569\",\"It never once occurred to me that the fumbling might be a mere mistake.\",\"HPL\""; 140 | let mut rdr = csv::Reader::from_reader(data.as_bytes()); 141 | let mut data = Vec::new(); 142 | for result in rdr.deserialize() { 143 | let r: SpookyAuthor = result.unwrap(); 144 | data.push(r); 145 | } 146 | assert_eq!(data[0].into_training_string(), "__label__EAP This process, however, afforded me no means of ascertaining the dimensions of my dungeon; as I might make its circuit, and return to the point whence I set out, without being aware of the fact; so perfectly uniform seemed the wall."); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /chapter5/jigsaw/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "jigsaw" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | # nlp related 9 | fasttext = "0.4.1" 10 | vtext = "0.1.0-alpha.1" 11 | stopwords = "0.1.0" 12 | 13 | # matrices 14 | sprs = "0.6.4" 15 | ndarray = "0.12.1" 16 | 17 | # data reading and organisation 18 | csv = "1.0.7" 19 | serde = "1" 20 | serde_derive = "1" 21 | rand = "0.6.5" 22 | 23 | # model building 24 | rustlearn = "0.5.0" -------------------------------------------------------------------------------- /chapter5/jigsaw/references.txt: -------------------------------------------------------------------------------- 1 | https://www.kaggle.com/sandeepkumar121995/keras-bi-gru-lstm-attention-fasttext 2 | https://www.kaggle.com/abhishek/approaching-almost-any-nlp-problem-on-kaggle 3 | 4 | // now we can probably load them to different vectors. 5 | 6 | // check out the rust repl https://github.com/google/evcxr -------------------------------------------------------------------------------- /chapter5/snips-model/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "snips-model" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | snips-nlu-lib = { git = "https://github.com/snipsco/snips-nlu-rs", branch = "master" } 9 | rocket = "0.4.0" 10 | rocket_contrib = "0.4.0" 11 | serde = "1.0" 12 | serde_json = "1.0" 13 | serde_derive = "1.0" 14 | -------------------------------------------------------------------------------- /chapter5/snips-model/snips_training.md: -------------------------------------------------------------------------------- 1 | python3 -m venv venv 2 | source venv/bin/activate 3 | pip install snips-nlu 4 | python -m snips_nlu download en 5 | snips-nlu download-all-languages 6 | snips-nlu train dataset.json snips.model -v 7 | -------------------------------------------------------------------------------- /chapter5/snips-model/src/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(proc_macro_hygiene, decl_macro)] 2 | 3 | #[macro_use] extern crate rocket; 4 | #[macro_use] extern crate rocket_contrib; 5 | #[macro_use] extern crate serde_derive; 6 | extern crate snips_nlu_lib; 7 | 8 | #[cfg(test)] mod tests; 9 | 10 | use std::sync::Mutex; 11 | 12 | use snips_nlu_lib::SnipsNluEngine; 13 | use rocket::{Rocket, State}; 14 | use rocket_contrib::json::Json; 15 | 16 | type Engine = Mutex; 17 | 18 | #[derive(Serialize, Deserialize)] 19 | struct Message { 20 | contents: String 21 | } 22 | 23 | fn init_engine() -> SnipsNluEngine { 24 | let engine_dir = "/home/saionee/opensource/programming-languages/rust-lang/chapter5/snips-nlu-rs/snips.model"; 25 | println!("\nLoading the nlu engine..."); 26 | let engine = SnipsNluEngine::from_path(engine_dir).unwrap(); 27 | engine 28 | } 29 | 30 | #[get("/")] 31 | fn hello() -> &'static str { 32 | "Hello, from snips model inference!" 33 | } 34 | 35 | #[post("/infer", format = "json", data = "")] 36 | fn infer(message: Json, engine: State) -> String { 37 | let query = message.0.contents; 38 | let engine = engine.lock().unwrap(); 39 | let result = engine.get_intents(query.trim()).unwrap(); 40 | let result_json = serde_json::to_string_pretty(&result).unwrap(); 41 | result_json 42 | } 43 | 44 | 45 | fn rocket() -> Rocket { 46 | // load the snips ingerence engine. 47 | let engine = init_engine(); 48 | 49 | // Have Rocket manage the engine to be passed to the functions. 50 | rocket::ignite() 51 | .manage(Mutex::new(engine)) 52 | .mount("/", routes![hello, infer]) 53 | } 54 | 55 | fn main() { 56 | rocket().launch(); 57 | } 58 | -------------------------------------------------------------------------------- /chapter6/adversarial/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "adversarial" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | tch = { git = "https://github.com/LaurentMazare/tch-rs.git", rev = "e92fadc" } 9 | failure = "0.1.5" -------------------------------------------------------------------------------- /chapter6/adversarial/src/main.rs: -------------------------------------------------------------------------------- 1 | // CNN model. This should rearch 99.1% accuracy. 2 | 3 | use tch::{nn, nn::ModuleT, nn::OptimizerConfig, Device, Tensor}; 4 | 5 | #[derive(Debug)] 6 | struct Net { 7 | conv1: nn::Conv2D, 8 | conv2: nn::Conv2D, 9 | fc1: nn::Linear, 10 | fc2: nn::Linear, 11 | } 12 | 13 | impl Net { 14 | fn new(vs: &nn::Path) -> Net { 15 | let conv1 = nn::conv2d(vs, 1, 32, 5, Default::default()); 16 | let conv2 = nn::conv2d(vs, 32, 64, 5, Default::default()); 17 | let fc1 = nn::linear(vs, 1024, 1024, Default::default()); 18 | let fc2 = nn::linear(vs, 1024, 10, Default::default()); 19 | Net { 20 | conv1, 21 | conv2, 22 | fc1, 23 | fc2, 24 | } 25 | } 26 | } 27 | 28 | impl nn::ModuleT for Net { 29 | fn forward_t(&self, xs: &Tensor, train: bool) -> Tensor { 30 | xs.view(&[-1, 1, 28, 28]) 31 | .apply(&self.conv1) 32 | .max_pool2d_default(2) 33 | .apply(&self.conv2) 34 | .max_pool2d_default(2) 35 | .view(&[-1, 1024]) 36 | .apply(&self.fc1) 37 | .relu() 38 | .dropout_(0.5, train) 39 | .apply(&self.fc2) 40 | } 41 | } 42 | 43 | // FGSM attack code 44 | fn fgsm_attack(image: &Tensor, epsilon: f64, data_grad: &Tensor) -> Tensor { 45 | // Collect the element-wise sign of the data gradient 46 | let sign_data_grad = data_grad.sign(); 47 | // Create the perturbed image by adjusting each pixel of the input image 48 | // let perturbed_image = image + epsilon*sign_data_grad; 49 | let change = sign_data_grad * epsilon; 50 | let mut perturbed_image = image + change; 51 | // # Adding clipping to maintain [0,1] range 52 | let perturbed_image = perturbed_image.clamp_(0., 1.); 53 | // # Return the perturbed image 54 | perturbed_image 55 | } 56 | 57 | pub fn main() -> failure::Fallible<()> { 58 | let m = tch::vision::mnist::load_dir("data")?; 59 | let vs = nn::VarStore::new(Device::cuda_if_available()); 60 | let net = Net::new(&vs.root()); 61 | let opt = nn::Adam::default().build(&vs, 1e-4)?; 62 | for epoch in 1..100 { 63 | for (bimages, blabels) in m.train_iter(256).shuffle().to_device(vs.device()) { 64 | let bimages = bimages.set_requires_grad(true); 65 | println!("{:?}", bimages.requires_grad()); 66 | 67 | let data_grad = bimages.grad(); 68 | // println!("{:?}", data_grad.sign()); 69 | 70 | // # Call FGSM Attack 71 | let epsilon = 0.5; 72 | let perturbed_data = fgsm_attack(&bimages, epsilon, &data_grad); 73 | 74 | let loss = net 75 | .forward_t(&perturbed_data, true) 76 | .cross_entropy_for_logits(&blabels); 77 | opt.backward_step(&loss); 78 | } 79 | let test_accuracy = 80 | net.batch_accuracy_for_logits(&m.test_images, &m.test_labels, vs.device(), 1024); 81 | println!("epoch: {:4} test acc: {:5.2}%", epoch, 100. * test_accuracy,); 82 | } 83 | Ok(()) 84 | } -------------------------------------------------------------------------------- /chapter6/face-detection-tf/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "face-detection-tf" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | tensorflow = "0.13.0" 9 | structopt = "0.2.15" 10 | image = "0.21.1" 11 | imageproc = "0.18.0" -------------------------------------------------------------------------------- /chapter6/face-detection-tf/mtcnn.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/practical-machine-learning-w-rust/b0fd379ee4f0f7bcd9276ae6d31576aa655b08d7/chapter6/face-detection-tf/mtcnn.pb -------------------------------------------------------------------------------- /chapter6/face-detection-tf/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | use std::error::Error; 3 | 4 | use tensorflow::Graph; 5 | use tensorflow::ImportGraphDefOptions; 6 | use tensorflow::{Session, SessionOptions, SessionRunArgs, Tensor}; 7 | use structopt::StructOpt; 8 | use image; 9 | use image::GenericImageView; 10 | use image::Rgba; 11 | use imageproc; 12 | use imageproc::rect::Rect; 13 | use imageproc::drawing::draw_hollow_rect_mut; 14 | 15 | const LINE_COLOUR: Rgba = Rgba { 16 | data: [0, 255, 0, 0], 17 | }; 18 | 19 | #[derive(Debug, StructOpt)] 20 | #[structopt(name = "face-detection-tf", about = "Face Identification")] 21 | struct Opt { 22 | #[structopt(short = "i", long = "input", parse(from_os_str))] 23 | input: PathBuf, 24 | 25 | #[structopt(short = "o", long = "output", parse(from_os_str))] 26 | output: PathBuf 27 | } 28 | 29 | #[derive(Copy, Clone, Debug)] 30 | pub struct BBox { 31 | pub x1: f32, 32 | pub y1: f32, 33 | pub x2: f32, 34 | pub y2: f32, 35 | pub prob: f32, 36 | } 37 | 38 | /// read image, convert to RGB and load to a tensor 39 | /// for face prediction. 40 | fn get_input_image_tensor(opt: &Opt) -> Result, Box> { 41 | let input_image = image::open(&opt.input)?; 42 | 43 | let mut flattened: Vec = Vec::new(); 44 | for (_x, _y, rgb) in input_image.pixels() { 45 | flattened.push(rgb[2] as f32); 46 | flattened.push(rgb[1] as f32); 47 | flattened.push(rgb[0] as f32); 48 | } 49 | let input = Tensor::new( 50 | &[input_image.height() as u64, input_image.width() as u64, 3]) 51 | .with_values(&flattened)?; 52 | Ok(input) 53 | } 54 | 55 | fn main() -> Result<(), Box> { 56 | let opt = Opt::from_args(); 57 | println!("{:?}", (opt.input.to_owned(), opt.output.to_owned())); 58 | let input = get_input_image_tensor(&opt)?; 59 | 60 | //First, we load up the graph as a byte array 61 | let model = include_bytes!("../mtcnn.pb"); 62 | 63 | //Then we create a tensorflow graph from the model 64 | let mut graph = Graph::new(); 65 | graph.import_graph_def(&*model, &ImportGraphDefOptions::new())?; 66 | 67 | let session = Session::new(&SessionOptions::new(), &graph)?; 68 | let min_size = Tensor::new(&[]).with_values(&[40f32])?; 69 | let thresholds = Tensor::new(&[3]).with_values(&[0.6f32, 0.7f32, 0.7f32])?; 70 | let factor = Tensor::new(&[]).with_values(&[0.709f32])?; 71 | 72 | let mut args = SessionRunArgs::new(); 73 | 74 | //Load our parameters for the model 75 | args.add_feed(&graph.operation_by_name_required("min_size")?, 0, &min_size); 76 | args.add_feed(&graph.operation_by_name_required("thresholds")?, 0, &thresholds); 77 | args.add_feed(&graph.operation_by_name_required("factor")?, 0, &factor); 78 | 79 | //Load our input image 80 | args.add_feed(&graph.operation_by_name_required("input")?, 0, &input); 81 | 82 | let bbox = args.request_fetch(&graph.operation_by_name_required("box")?, 0); 83 | let prob = args.request_fetch(&graph.operation_by_name_required("prob")?, 0); 84 | 85 | session.run(&mut args)?; 86 | 87 | let bbox_res: Tensor = args.fetch(bbox)?; 88 | let prob_res: Tensor = args.fetch(prob)?; 89 | 90 | println!("{:?}", bbox_res.dims()); // [120, 4] 91 | println!("{:?}", prob_res.dims()); // [120] 92 | 93 | //Let's store the results as a Vec 94 | let bboxes: Vec<_> = bbox_res 95 | .chunks_exact(4) // Split into chunks of 4 96 | .zip(prob_res.iter()) // Combine it with prob_res 97 | .map(|(bbox, &prob)| BBox { 98 | y1: bbox[0], 99 | x1: bbox[1], 100 | y2: bbox[2], 101 | x2: bbox[3], 102 | prob, 103 | }).collect(); 104 | println!("BBox Length: {}, Bboxes:{:#?}", bboxes.len(), bboxes); 105 | 106 | let mut output_image = image::open(&opt.input)?; 107 | 108 | for bbox in bboxes { 109 | let rect = Rect::at(bbox.x1 as i32, bbox.y1 as i32) 110 | .of_size((bbox.x2 - bbox.x1) as u32, (bbox.y2 - bbox.y1) as u32); 111 | draw_hollow_rect_mut(&mut output_image, rect, LINE_COLOUR); 112 | } 113 | output_image.save(&opt.output)?; 114 | 115 | Ok(()) 116 | } -------------------------------------------------------------------------------- /chapter6/finetuning_pytorch_image_models/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "finetuning_pytorch_image_models" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | # tch = "0.0.9" 9 | tch = { git = "https://github.com/LaurentMazare/tch-rs.git", rev = "e92fadc" } 10 | failure = "0.1.5" -------------------------------------------------------------------------------- /chapter6/finetuning_pytorch_image_models/README.md: -------------------------------------------------------------------------------- 1 | # Usage 2 | 3 | cargo run resnet18.ot hymenoptera_data 4 | -------------------------------------------------------------------------------- /chapter6/finetuning_pytorch_image_models/resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | 4 | model = torchvision.models.resnet18(pretrained=True) 5 | # model = torch.load("model-best.pth", map_location='cpu') 6 | example = torch.rand(1, 3, 224, 224) 7 | traced_script_module = torch.jit.trace(model, example) 8 | traced_script_module.save("resnet18_model.pt") -------------------------------------------------------------------------------- /chapter6/model_inference/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "model_inference" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | tch = "0.0.9" 9 | failure = "0.1.5" -------------------------------------------------------------------------------- /chapter6/model_inference/src/main.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate failure; 3 | extern crate tch; 4 | use tch::nn::ModuleT; 5 | use tch::vision::{alexnet, densenet, imagenet, inception, resnet, squeezenet, vgg}; 6 | 7 | pub fn main() -> failure::Fallible<()> { 8 | let args: Vec<_> = std::env::args().collect(); 9 | let (weights, image) = match args.as_slice() { 10 | [_, w, i] => (std::path::Path::new(w), i.to_owned()), 11 | _ => bail!("usage: main resnet18.ot image.jpg"), 12 | }; 13 | // Load the image file and resize it to the usual imagenet dimension of 224x224. 14 | let image = imagenet::load_image_and_resize224(image)?; 15 | 16 | // Create the model and load the weights from the file. 17 | let mut vs = tch::nn::VarStore::new(tch::Device::Cpu); 18 | let net: Box = match weights.file_name().unwrap().to_str().unwrap() { 19 | "resnet18.ot" => Box::new(resnet::resnet18(&vs.root(), imagenet::CLASS_COUNT)), 20 | "resnet34.ot" => Box::new(resnet::resnet34(&vs.root(), imagenet::CLASS_COUNT)), 21 | "densenet121.ot" => Box::new(densenet::densenet121(&vs.root(), imagenet::CLASS_COUNT)), 22 | "vgg16.ot" => Box::new(vgg::vgg16(&vs.root(), imagenet::CLASS_COUNT)), 23 | "squeezenet1_0.ot" => Box::new(squeezenet::v1_0(&vs.root(), imagenet::CLASS_COUNT)), 24 | "squeezenet1_1.ot" => Box::new(squeezenet::v1_1(&vs.root(), imagenet::CLASS_COUNT)), 25 | "alexnet.ot" => Box::new(alexnet::alexnet(&vs.root(), imagenet::CLASS_COUNT)), 26 | "inception-v3.ot" => Box::new(inception::v3(&vs.root(), imagenet::CLASS_COUNT)), 27 | _ => bail!("unknown model, use a weight file named e.g. resnet18.ot"), 28 | }; 29 | vs.load(weights)?; 30 | 31 | // Apply the forward pass of the model to get the logits. 32 | let output = net 33 | .forward_t(&image.unsqueeze(0), /*train=*/ false) 34 | .softmax(-1); // Convert to probability. 35 | 36 | // Print the top 5 categories for this image. 37 | for (probability, class) in imagenet::top(&output, 5).iter() { 38 | println!("{:50} {:5.2}%", class, 100.0 * probability) 39 | } 40 | Ok(()) 41 | } -------------------------------------------------------------------------------- /chapter6/pytorch-image-classification/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pytorch-image-classification" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | tch = { path = "../tch-rs" } 9 | # tch = { git = "https://github.com/LaurentMazare/tch-rs.git", rev="8110ee6" } 10 | # tch = { git = "https://github.com/LaurentMazare/tch-rs.git", rev="2ebebb808065de13495db8ff7b0ba18cb1a6fe92" } 11 | failure = "0.1.5" -------------------------------------------------------------------------------- /chapter7/goodbooks-recommender/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "goodbooks-recommender" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | reqwest = "0.9.17" 9 | failure = "0.1.5" 10 | serde = "1" 11 | serde_derive = "1" 12 | serde_json = "1" 13 | csv = "1" 14 | sbr = "0.4.0" 15 | rand = "0.6.5" 16 | structopt = "0.2.15" -------------------------------------------------------------------------------- /chapter7/high-performance-computing/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "high-performance-computing" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | # faster = "0.5.0" 9 | faster = { path = "../faster" } 10 | rblas = "0.0.13" 11 | # lapack = "0.16.0" 12 | # lapack = { git = "https://github.com/blas-lapack-rs/lapack.git", rev = "67554e6"} -------------------------------------------------------------------------------- /chapter7/high-performance-computing/src/main.rs: -------------------------------------------------------------------------------- 1 | use faster::*; 2 | use rblas::Dot; 3 | // use lapack::*; 4 | 5 | fn main() { 6 | let lots_of_3s = (&[-123.456f32; 128][..]).simd_iter(f32s(0.0)) 7 | .simd_map(|v| { 8 | f32s(9.0) * v.abs().sqrt().rsqrt().ceil().sqrt() - f32s(4.0) - f32s(2.0) 9 | }) 10 | .scalar_collect(); 11 | println!("{:?}", lots_of_3s); 12 | 13 | // making a parallel operation 14 | let my_vector: Vec = (0..10).map(|v| v as f32).collect(); 15 | let power_of_3 = (&my_vector[..]).simd_iter(f32s(0.0)) 16 | .simd_map(|v| { 17 | v * v * v 18 | }) 19 | .scalar_collect(); 20 | println!("{:?}", power_of_3); 21 | 22 | // taking the sum 23 | let reduced = (&power_of_3[..]).simd_iter(f32s(0.0)) 24 | .simd_reduce(f32s(0.0), |a, v| a + v ).sum(); 25 | println!("{:?}", reduced); 26 | 27 | let x = vec![1.0, -2.0, 3.0, 4.0]; 28 | let y = [1.0, 1.0, 1.0, 1.0, 7.0]; 29 | 30 | let d = Dot::dot(&x, &y[..x.len()]); 31 | println!("dot product {:?}", d); 32 | 33 | // let n = 3; 34 | // let mut a = vec![3.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 3.0]; 35 | // let mut w = vec![0.0; n as usize]; 36 | // let mut work = vec![0.0; 4 * n as usize]; 37 | // let lwork = 4 * n; 38 | // let mut info = 0; 39 | 40 | // unsafe { 41 | // dsyev(b'V', b'U', n, &mut a, n, &mut w, &mut work, lwork, &mut info); 42 | // } 43 | 44 | // assert!(info == 0); 45 | // for (one, another) in w.iter().zip(&[2.0, 2.0, 5.0]) { 46 | // assert!((one - another).abs() < 1e-14); 47 | // } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /chapter7/statistics/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "statistics" 3 | version = "0.1.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | ndarray = "0.12.1" -------------------------------------------------------------------------------- /chapter8/cpp_demangle/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rust2py" 3 | version = "0.1.0" 4 | edition = "2018" 5 | 6 | [lib] 7 | name = "rust2py" 8 | crate-type = ["cdylib"] 9 | 10 | [dependencies.pyo3] 11 | version = "0.7.0" 12 | features = ["extension-module"] -------------------------------------------------------------------------------- /chapter8/cpp_demangle/mangle_ex.py: -------------------------------------------------------------------------------- 1 | # import our rust library, no need for cffi 2 | from cpp_demangle import demangle 3 | 4 | # run the demangle function, prints 'mangled::foo(double)' 5 | print(demangle("_ZN7mangled3fooEd")) 6 | -------------------------------------------------------------------------------- /chapter8/cpp_demangle/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools_rust import Binding, RustExtension 3 | 4 | setup(name='cpp-demangle', 5 | version="0.0.1", 6 | rust_extensions=[RustExtension('cpp_demangle', 'Cargo.toml', binding=Binding.PyO3)], 7 | test_suite="tests", 8 | zip_safe=False) 9 | -------------------------------------------------------------------------------- /chapter8/cpp_demangle/src/lib.rs: -------------------------------------------------------------------------------- 1 | use pyo3::prelude::*; 2 | use pyo3::wrap_pyfunction; 3 | 4 | #[pymodule] 5 | fn rust2py(py: Python, m: &PyModule) -> PyResult<()> { 6 | 7 | // Note that the `#[pyfn()]` annotation automatically converts the arguments from 8 | // Python objects to Rust values; and the Rust return value back into a Python object. 9 | #[pyfn(m, "sum_as_string")] 10 | fn sum_as_string_py(_py: Python, a:i64, b:i64) -> PyResult { 11 | Ok(format!("{}", a + b)) 12 | } 13 | 14 | Ok(()) 15 | } 16 | 17 | #[pyfunction] 18 | fn double(x: usize) -> usize { 19 | x * 2 20 | } 21 | 22 | #[pymodule] 23 | fn module_with_functions(py: Python, m: &PyModule) -> PyResult<()> { 24 | m.add_wrapped(wrap_pyfunction!(double)).unwrap(); 25 | 26 | Ok(()) 27 | } 28 | 29 | 30 | /// add(a, b, /) 31 | /// -- 32 | /// 33 | /// This function adds two unsigned 64-bit integers. 34 | #[pyfunction] 35 | fn add(a: u64, b: u64) -> u64 { 36 | a + b 37 | } -------------------------------------------------------------------------------- /chapter8/crfsuite-model/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crfsuite-model" 3 | version = "0.2.0" 4 | authors = ["Joydeep Bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | # data reading and organisation 9 | csv = "1.0.7" 10 | serde = "1" 11 | serde_derive = "1" 12 | rand = "0.6.5" 13 | 14 | # machine learning 15 | crfsuite = "0.2.6" 16 | 17 | # to call from python 18 | pyo3 = { git = "https://github.com/PyO3/pyo3.git", rev = "99fdafbb880c181f4bce16bbbac03888b3cf85c8", features = ["extension-module"]} 19 | 20 | [lib] 21 | name = "crfsuite_model" 22 | crate-type = ["cdylib"] -------------------------------------------------------------------------------- /chapter8/crfsuite-model/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include pyproject.toml Cargo.toml 2 | recursive-include src * -------------------------------------------------------------------------------- /chapter8/crfsuite-model/crfsuite_model/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .crfsuite_model import CRFSuiteModel 3 | 4 | __all__ = ["CRFSuiteModel",] -------------------------------------------------------------------------------- /chapter8/crfsuite-model/crfsuite_model_prediction.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from crfsuite_model import CRFSuiteModel 3 | model = CRFSuiteModel("model.crfsuite") 4 | res = model.predict("data/ner_predict.csv") 5 | print(res) 6 | -------------------------------------------------------------------------------- /chapter8/crfsuite-model/crfsuite_model_training.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from crfsuite_model import CRFSuiteModel 3 | model = CRFSuiteModel("model.crfsuite") 4 | res = model.fit("data/ner.csv") 5 | print(res) 6 | -------------------------------------------------------------------------------- /chapter8/crfsuite-model/data/ner_predict.csv: -------------------------------------------------------------------------------- 1 | ,lemma,next-lemma,next-next-lemma,next-next-pos,next-next-shape,next-next-word,next-pos,next-shape,next-word,pos,prev-iob,prev-lemma,prev-pos,prev-prev-iob,prev-prev-lemma,prev-prev-pos,prev-prev-shape,prev-prev-word,prev-shape,prev-word,sentence_idx,shape,word 2 | 0,thousand,of,demonstr,NNS,lowercase,demonstrators,IN,lowercase,of,NNS,__START1__,__start1__,__START1__,__START2__,__start2__,__START2__,wildcard,__START2__,wildcard,__START1__,1,capitalized,Thousands 3 | 1,of,demonstr,have,VBP,lowercase,have,NNS,lowercase,demonstrators,IN,O,thousand,NNS,__START1__,__start1__,__START1__,wildcard,__START1__,capitalized,Thousands,1,lowercase,of 4 | 2,demonstr,have,march,VBN,lowercase,marched,VBP,lowercase,have,NNS,O,of,IN,O,thousand,NNS,capitalized,Thousands,lowercase,of,1,lowercase,demonstrators 5 | 3,have,march,through,IN,lowercase,through,VBN,lowercase,marched,VBP,O,demonstr,NNS,O,of,IN,lowercase,of,lowercase,demonstrators,1,lowercase,have 6 | 4,march,through,london,NNP,capitalized,London,IN,lowercase,through,VBN,O,have,VBP,O,demonstr,NNS,lowercase,demonstrators,lowercase,have,1,lowercase,marched 7 | 5,through,london,to,TO,lowercase,to,NNP,capitalized,London,IN,O,march,VBN,O,have,VBP,lowercase,have,lowercase,marched,1,lowercase,through 8 | 6,london,to,protest,VB,lowercase,protest,TO,lowercase,to,NNP,O,through,IN,O,march,VBN,lowercase,marched,lowercase,through,1,capitalized,London 9 | 7,to,protest,the,DT,lowercase,the,VB,lowercase,protest,TO,B-geo,london,NNP,O,through,IN,lowercase,through,capitalized,London,1,lowercase,to 10 | 8,protest,the,war,NN,lowercase,war,DT,lowercase,the,VB,O,to,TO,B-geo,london,NNP,capitalized,London,lowercase,to,1,lowercase,protest 11 | -------------------------------------------------------------------------------- /chapter8/crfsuite-model/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=41.0.0", "wheel", "setuptools_rust>=0.10.2", "toml"] 3 | build-backend = "setuptools.build_meta" 4 | -------------------------------------------------------------------------------- /chapter8/crfsuite-model/requirements-dev.txt: -------------------------------------------------------------------------------- 1 | pip>=19.1 2 | pytest>=3.5.0 3 | setuptools-rust>=0.10.2 4 | pytest-benchmark>=3.1.1 5 | -------------------------------------------------------------------------------- /chapter8/crfsuite-model/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from setuptools import setup 5 | from setuptools.command.test import test as TestCommand 6 | from setuptools.command.sdist import sdist as SdistCommand 7 | 8 | try: 9 | from setuptools_rust import RustExtension 10 | except ImportError: 11 | import subprocess 12 | 13 | errno = subprocess.call([sys.executable, "-m", "pip", "install", "setuptools-rust"]) 14 | if errno: 15 | print("Please install setuptools-rust package") 16 | raise SystemExit(errno) 17 | else: 18 | from setuptools_rust import RustExtension 19 | 20 | 21 | class CargoModifiedSdist(SdistCommand): 22 | """Modifies Cargo.toml to use an absolute rather than a relative path 23 | 24 | The current implementation of PEP 517 in pip always does builds in an 25 | isolated temporary directory. This causes problems with the build, because 26 | Cargo.toml necessarily refers to the current version of pyo3 by a relative 27 | path. 28 | 29 | Since these sdists are never meant to be used for anything other than 30 | tox / pip installs, at sdist build time, we will modify the Cargo.toml 31 | in the sdist archive to include an *absolute* path to pyo3. 32 | """ 33 | 34 | def make_release_tree(self, base_dir, files): 35 | """Stages the files to be included in archives""" 36 | super().make_release_tree(base_dir, files) 37 | 38 | import toml 39 | # Cargo.toml is now staged and ready to be modified 40 | cargo_loc = os.path.join(base_dir, 'Cargo.toml') 41 | assert os.path.exists(cargo_loc) 42 | 43 | with open(cargo_loc, 'r') as f: 44 | cargo_toml = toml.load(f) 45 | 46 | rel_pyo3_path = cargo_toml['dependencies']['pyo3']['path'] 47 | base_path = os.path.dirname(__file__) 48 | abs_pyo3_path = os.path.abspath(os.path.join(base_path, rel_pyo3_path)) 49 | 50 | cargo_toml['dependencies']['pyo3']['path'] = abs_pyo3_path 51 | 52 | with open(cargo_loc, 'w') as f: 53 | toml.dump(cargo_toml, f) 54 | 55 | 56 | class PyTest(TestCommand): 57 | user_options = [] 58 | 59 | def run(self): 60 | self.run_command("test_rust") 61 | 62 | import subprocess 63 | 64 | subprocess.check_call(["pytest", "tests"]) 65 | 66 | 67 | setup_requires = ["setuptools-rust>=0.10.1", "wheel"] 68 | install_requires = [] 69 | tests_require = install_requires + ["pytest", "pytest-benchmark"] 70 | 71 | setup( 72 | name="crfsuite-model", 73 | version="0.1.0", 74 | classifiers=[ 75 | "License :: OSI Approved :: MIT License", 76 | "Development Status :: 3 - Alpha", 77 | "Intended Audience :: Developers", 78 | "Programming Language :: Python", 79 | "Programming Language :: Rust", 80 | "Operating System :: POSIX", 81 | "Operating System :: MacOS :: MacOS X", 82 | ], 83 | packages=["crfsuite_model"], 84 | rust_extensions=[RustExtension("crfsuite_model.crfsuite_model", "Cargo.toml")], 85 | install_requires=install_requires, 86 | tests_require=tests_require, 87 | setup_requires=setup_requires, 88 | include_package_data=True, 89 | zip_safe=False, 90 | cmdclass={ 91 | 'test': PyTest, 92 | 'sdist': CargoModifiedSdist, 93 | }, 94 | ) 95 | -------------------------------------------------------------------------------- /chapter8/crfsuite-model/src/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate serde; 2 | #[macro_use] 3 | extern crate serde_derive; 4 | 5 | use pyo3::prelude::*; 6 | use pyo3::wrap_pyfunction; 7 | use std::fs; 8 | use std::path::PathBuf; 9 | 10 | use std::vec::Vec; 11 | use std::error::Error; 12 | 13 | use csv; 14 | use rand; 15 | use rand::thread_rng; 16 | use rand::seq::SliceRandom; 17 | 18 | use crfsuite::{Model, Attribute, CrfError}; 19 | use crfsuite::{Trainer, Algorithm, GraphicalModel}; 20 | 21 | #[pyclass(module = "crfsuite_model")] 22 | pub struct CRFSuiteModel { 23 | model_name: String, 24 | } 25 | 26 | #[pymethods] 27 | impl CRFSuiteModel { 28 | #[new] 29 | fn new(obj: &PyRawObject, path: String) { 30 | obj.init(CRFSuiteModel { 31 | model_name: path, 32 | }); 33 | } 34 | 35 | fn fit(&self, py: Python<'_>, path: String) -> PyResult { 36 | let data_file = PathBuf::from(&path[..]); 37 | let data = get_data(&data_file).unwrap(); 38 | let (test_data, train_data) = split_test_train(&data, 0.2); 39 | let (xseq_train, yseq_train) = create_xseq_yseq(&train_data); 40 | let (xseq_test, yseq_test) = create_xseq_yseq(&test_data); 41 | crfmodel_training(xseq_train, yseq_train, self.model_name.as_ref()).unwrap(); 42 | let preds = model_prediction(xseq_test, self.model_name.as_ref()).unwrap(); 43 | check_accuracy(&preds, &yseq_test); 44 | Ok("model fit done".to_string()) 45 | } 46 | 47 | fn predict(&self, predict_filename: String) -> PyResult> { 48 | let predict_data_file = PathBuf::from(predict_filename); 49 | let data = get_data_no_y(&predict_data_file).unwrap(); 50 | let xseq_test = create_xseq_for_predict(&data[..]); 51 | let preds = model_prediction(xseq_test, self.model_name.as_ref()).unwrap(); 52 | Ok(preds) 53 | } 54 | } 55 | 56 | #[derive(Debug, Deserialize, Clone)] 57 | pub struct NER { 58 | lemma: String, 59 | #[serde(rename = "next-lemma")] 60 | next_lemma: String, 61 | word: String, 62 | tag: String 63 | } 64 | 65 | #[derive(Debug, Deserialize, Clone)] 66 | pub struct NER_Only_X { 67 | lemma: String, 68 | #[serde(rename = "next-lemma")] 69 | next_lemma: String, 70 | word: String, 71 | } 72 | 73 | fn get_data_no_y(path: &PathBuf) -> Result, Box> { 74 | let csvfile = fs::File::open(path)?; 75 | let mut rdr = csv::Reader::from_reader(csvfile); 76 | let mut data = Vec::new(); 77 | for result in rdr.deserialize() { 78 | let r: NER_Only_X = result?; 79 | data.push(r); 80 | } 81 | Ok(data) 82 | } 83 | 84 | fn get_data(path: &PathBuf) -> Result, Box> { 85 | let csvfile = fs::File::open(path)?; 86 | let mut rdr = csv::Reader::from_reader(csvfile); 87 | let mut data = Vec::new(); 88 | for result in rdr.deserialize() { 89 | let r: NER = result?; 90 | data.push(r); 91 | } 92 | data.shuffle(&mut thread_rng()); 93 | Ok(data) 94 | } 95 | 96 | fn split_test_train(data: &[NER], test_size: f32) -> (Vec, Vec) { 97 | let test_size: f32 = data.len() as f32 * test_size; 98 | let test_size = test_size.round() as usize; 99 | 100 | let (test_data, train_data) = data.split_at(test_size); 101 | (test_data.to_vec(), train_data.to_vec()) 102 | } 103 | 104 | fn create_xseq_yseq(data: &[NER]) 105 | -> (Vec>, Vec) { 106 | let mut xseq = vec![]; 107 | let mut yseq = vec![]; 108 | for item in data { 109 | let seq = vec![Attribute::new(item.lemma.clone(), 1.0), 110 | Attribute::new(item.next_lemma.clone(), 0.5)]; // higher weightage for the mainword. 111 | xseq.push(seq); 112 | yseq.push(item.tag.clone()); 113 | } 114 | (xseq, yseq) 115 | } 116 | 117 | fn create_xseq_for_predict(data: &[NER_Only_X]) 118 | -> Vec> { 119 | let mut xseq = vec![]; 120 | for item in data { 121 | let seq = vec![Attribute::new(item.lemma.clone(), 1.0), 122 | Attribute::new(item.next_lemma.clone(), 0.5)]; // higher weightage for the mainword. 123 | xseq.push(seq); 124 | } 125 | xseq 126 | } 127 | 128 | fn check_accuracy(preds: &[String], actual: &[String]) { 129 | let mut hits = 0; 130 | let mut correct_hits = 0; 131 | for (predicted, actual) in preds.iter().zip(actual) { 132 | if actual != "O" { // will not consider the other category as it bloats the accuracy. 133 | if predicted == actual && actual != "O" { 134 | correct_hits += 1; 135 | } 136 | hits += 1; 137 | } 138 | } 139 | println!("accuracy={} ({}/{} correct)", 140 | correct_hits as f32 / hits as f32, 141 | correct_hits, 142 | hits); 143 | } 144 | 145 | fn crfmodel_training(xseq: Vec>, 146 | yseq: Vec, 147 | model_name: &str) -> Result<(), Box> { 148 | let mut trainer = Trainer::new(true); 149 | trainer.select(Algorithm::AROW, GraphicalModel::CRF1D)?; 150 | trainer.append(&xseq, &yseq, 0i32)?; 151 | trainer.train(model_name, -1i32)?; // using all instances for training. 152 | Ok(()) 153 | } 154 | 155 | fn model_prediction(xtest: Vec>, 156 | model_name: &str) 157 | -> Result, Box>{ 158 | let model = Model::from_file(model_name)?; 159 | let mut tagger = model.tagger()?; 160 | let preds = tagger.tag(&xtest)?; 161 | Ok(preds) 162 | } 163 | 164 | #[pymodule] 165 | fn crfsuite_model(_py: Python<'_>, m: &PyModule) -> PyResult<()> { 166 | m.add_class::()?; 167 | 168 | Ok(()) 169 | } -------------------------------------------------------------------------------- /chapter8/iris_classification_xgboost/IrisClassificationXgboost.java: -------------------------------------------------------------------------------- 1 | class IrisClassificationXgboost { 2 | private static native void fit(); 3 | private static native String predict(); 4 | 5 | static { 6 | // This actually loads the shared object that we'll be creating. 7 | // The actual location of the .so or .dll may differ based on your 8 | // platform. 9 | System.loadLibrary("iris_classification_xgboost"); 10 | } 11 | 12 | // The rest is just regular ol' Java! 13 | public static void main(String[] args) { 14 | IrisClassificationXgboost.fit(); 15 | String predictions = IrisClassificationXgboost.predict(); 16 | System.out.println(predictions); 17 | } 18 | } -------------------------------------------------------------------------------- /chapter8/iris_classification_xgboost/Makefile: -------------------------------------------------------------------------------- 1 | java_run: lib 2 | javac IrisClassificationXgboost.java && java -Djava.library.path=iris_classification_library/target/debug/ IrisClassificationXgboost 3 | 4 | .PHONY: lib 5 | 6 | javah: 7 | javah IrisClassificationXgboost 8 | 9 | lib: 10 | cd iris_classification_library && cargo build 11 | -------------------------------------------------------------------------------- /chapter8/iris_classification_xgboost/data/iris.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,species 2 | 5.1,3.5,1.4,0.2,setosa 3 | 4.9,3.0,1.4,0.2,setosa 4 | 4.7,3.2,1.3,0.2,setosa 5 | 4.6,3.1,1.5,0.2,setosa 6 | 5.0,3.6,1.4,0.2,setosa 7 | 5.4,3.9,1.7,0.4,setosa 8 | 4.6,3.4,1.4,0.3,setosa 9 | 5.0,3.4,1.5,0.2,setosa 10 | 4.4,2.9,1.4,0.2,setosa 11 | 4.9,3.1,1.5,0.1,setosa 12 | 5.4,3.7,1.5,0.2,setosa 13 | 4.8,3.4,1.6,0.2,setosa 14 | 4.8,3.0,1.4,0.1,setosa 15 | 4.3,3.0,1.1,0.1,setosa 16 | 5.8,4.0,1.2,0.2,setosa 17 | 5.7,4.4,1.5,0.4,setosa 18 | 5.4,3.9,1.3,0.4,setosa 19 | 5.1,3.5,1.4,0.3,setosa 20 | 5.7,3.8,1.7,0.3,setosa 21 | 5.1,3.8,1.5,0.3,setosa 22 | 5.4,3.4,1.7,0.2,setosa 23 | 5.1,3.7,1.5,0.4,setosa 24 | 4.6,3.6,1.0,0.2,setosa 25 | 5.1,3.3,1.7,0.5,setosa 26 | 4.8,3.4,1.9,0.2,setosa 27 | 5.0,3.0,1.6,0.2,setosa 28 | 5.0,3.4,1.6,0.4,setosa 29 | 5.2,3.5,1.5,0.2,setosa 30 | 5.2,3.4,1.4,0.2,setosa 31 | 4.7,3.2,1.6,0.2,setosa 32 | 4.8,3.1,1.6,0.2,setosa 33 | 5.4,3.4,1.5,0.4,setosa 34 | 5.2,4.1,1.5,0.1,setosa 35 | 5.5,4.2,1.4,0.2,setosa 36 | 4.9,3.1,1.5,0.1,setosa 37 | 5.0,3.2,1.2,0.2,setosa 38 | 5.5,3.5,1.3,0.2,setosa 39 | 4.9,3.1,1.5,0.1,setosa 40 | 4.4,3.0,1.3,0.2,setosa 41 | 5.1,3.4,1.5,0.2,setosa 42 | 5.0,3.5,1.3,0.3,setosa 43 | 4.5,2.3,1.3,0.3,setosa 44 | 4.4,3.2,1.3,0.2,setosa 45 | 5.0,3.5,1.6,0.6,setosa 46 | 5.1,3.8,1.9,0.4,setosa 47 | 4.8,3.0,1.4,0.3,setosa 48 | 5.1,3.8,1.6,0.2,setosa 49 | 4.6,3.2,1.4,0.2,setosa 50 | 5.3,3.7,1.5,0.2,setosa 51 | 5.0,3.3,1.4,0.2,setosa 52 | 7.0,3.2,4.7,1.4,versicolor 53 | 6.4,3.2,4.5,1.5,versicolor 54 | 6.9,3.1,4.9,1.5,versicolor 55 | 5.5,2.3,4.0,1.3,versicolor 56 | 6.5,2.8,4.6,1.5,versicolor 57 | 5.7,2.8,4.5,1.3,versicolor 58 | 6.3,3.3,4.7,1.6,versicolor 59 | 4.9,2.4,3.3,1.0,versicolor 60 | 6.6,2.9,4.6,1.3,versicolor 61 | 5.2,2.7,3.9,1.4,versicolor 62 | 5.0,2.0,3.5,1.0,versicolor 63 | 5.9,3.0,4.2,1.5,versicolor 64 | 6.0,2.2,4.0,1.0,versicolor 65 | 6.1,2.9,4.7,1.4,versicolor 66 | 5.6,2.9,3.6,1.3,versicolor 67 | 6.7,3.1,4.4,1.4,versicolor 68 | 5.6,3.0,4.5,1.5,versicolor 69 | 5.8,2.7,4.1,1.0,versicolor 70 | 6.2,2.2,4.5,1.5,versicolor 71 | 5.6,2.5,3.9,1.1,versicolor 72 | 5.9,3.2,4.8,1.8,versicolor 73 | 6.1,2.8,4.0,1.3,versicolor 74 | 6.3,2.5,4.9,1.5,versicolor 75 | 6.1,2.8,4.7,1.2,versicolor 76 | 6.4,2.9,4.3,1.3,versicolor 77 | 6.6,3.0,4.4,1.4,versicolor 78 | 6.8,2.8,4.8,1.4,versicolor 79 | 6.7,3.0,5.0,1.7,versicolor 80 | 6.0,2.9,4.5,1.5,versicolor 81 | 5.7,2.6,3.5,1.0,versicolor 82 | 5.5,2.4,3.8,1.1,versicolor 83 | 5.5,2.4,3.7,1.0,versicolor 84 | 5.8,2.7,3.9,1.2,versicolor 85 | 6.0,2.7,5.1,1.6,versicolor 86 | 5.4,3.0,4.5,1.5,versicolor 87 | 6.0,3.4,4.5,1.6,versicolor 88 | 6.7,3.1,4.7,1.5,versicolor 89 | 6.3,2.3,4.4,1.3,versicolor 90 | 5.6,3.0,4.1,1.3,versicolor 91 | 5.5,2.5,4.0,1.3,versicolor 92 | 5.5,2.6,4.4,1.2,versicolor 93 | 6.1,3.0,4.6,1.4,versicolor 94 | 5.8,2.6,4.0,1.2,versicolor 95 | 5.0,2.3,3.3,1.0,versicolor 96 | 5.6,2.7,4.2,1.3,versicolor 97 | 5.7,3.0,4.2,1.2,versicolor 98 | 5.7,2.9,4.2,1.3,versicolor 99 | 6.2,2.9,4.3,1.3,versicolor 100 | 5.1,2.5,3.0,1.1,versicolor 101 | 5.7,2.8,4.1,1.3,versicolor 102 | 6.3,3.3,6.0,2.5,virginica 103 | 5.8,2.7,5.1,1.9,virginica 104 | 7.1,3.0,5.9,2.1,virginica 105 | 6.3,2.9,5.6,1.8,virginica 106 | 6.5,3.0,5.8,2.2,virginica 107 | 7.6,3.0,6.6,2.1,virginica 108 | 4.9,2.5,4.5,1.7,virginica 109 | 7.3,2.9,6.3,1.8,virginica 110 | 6.7,2.5,5.8,1.8,virginica 111 | 7.2,3.6,6.1,2.5,virginica 112 | 6.5,3.2,5.1,2.0,virginica 113 | 6.4,2.7,5.3,1.9,virginica 114 | 6.8,3.0,5.5,2.1,virginica 115 | 5.7,2.5,5.0,2.0,virginica 116 | 5.8,2.8,5.1,2.4,virginica 117 | 6.4,3.2,5.3,2.3,virginica 118 | 6.5,3.0,5.5,1.8,virginica 119 | 7.7,3.8,6.7,2.2,virginica 120 | 7.7,2.6,6.9,2.3,virginica 121 | 6.0,2.2,5.0,1.5,virginica 122 | 6.9,3.2,5.7,2.3,virginica 123 | 5.6,2.8,4.9,2.0,virginica 124 | 7.7,2.8,6.7,2.0,virginica 125 | 6.3,2.7,4.9,1.8,virginica 126 | 6.7,3.3,5.7,2.1,virginica 127 | 7.2,3.2,6.0,1.8,virginica 128 | 6.2,2.8,4.8,1.8,virginica 129 | 6.1,3.0,4.9,1.8,virginica 130 | 6.4,2.8,5.6,2.1,virginica 131 | 7.2,3.0,5.8,1.6,virginica 132 | 7.4,2.8,6.1,1.9,virginica 133 | 7.9,3.8,6.4,2.0,virginica 134 | 6.4,2.8,5.6,2.2,virginica 135 | 6.3,2.8,5.1,1.5,virginica 136 | 6.1,2.6,5.6,1.4,virginica 137 | 7.7,3.0,6.1,2.3,virginica 138 | 6.3,3.4,5.6,2.4,virginica 139 | 6.4,3.1,5.5,1.8,virginica 140 | 6.0,3.0,4.8,1.8,virginica 141 | 6.9,3.1,5.4,2.1,virginica 142 | 6.7,3.1,5.6,2.4,virginica 143 | 6.9,3.1,5.1,2.3,virginica 144 | 5.8,2.7,5.1,1.9,virginica 145 | 6.8,3.2,5.9,2.3,virginica 146 | 6.7,3.3,5.7,2.5,virginica 147 | 6.7,3.0,5.2,2.3,virginica 148 | 6.3,2.5,5.0,1.9,virginica 149 | 6.5,3.0,5.2,2.0,virginica 150 | 6.2,3.4,5.4,2.3,virginica 151 | 5.9,3.0,5.1,1.8,virginica 152 | -------------------------------------------------------------------------------- /chapter8/iris_classification_xgboost/data/predict.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,species 2 | 5.1,3.5,1.4,0.2,setosa 3 | -------------------------------------------------------------------------------- /chapter8/iris_classification_xgboost/iris_classification_library/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "iris_classification_xgboost" 3 | version = "0.1.0" 4 | authors = ["joydeep bhattacharjee "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | csv = "1.0.7" 9 | serde = "1.0.92" 10 | serde_derive = "1.0.92" 11 | rand = "0.6" 12 | xgboost = "0.1.4" 13 | ml-utils = { path = "../../../chapter2/ml-utils" } 14 | jni = "0.12.3" 15 | 16 | 17 | [lib] 18 | name = "iris_classification_xgboost" 19 | crate-type = ["cdylib"] -------------------------------------------------------------------------------- /chapter8/my_lambda_function/.cargo/config: -------------------------------------------------------------------------------- 1 | [target.x86_64-unknown-linux-musl] 2 | linker = "x86_64-linux-musl-gcc" 3 | -------------------------------------------------------------------------------- /chapter8/my_lambda_function/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "my_lambda_function" 3 | version = "0.1.0" 4 | authors = ["joydeep bhattacharjee"] 5 | autobins = false 6 | edition = "2018" 7 | 8 | [dependencies] 9 | lambda_runtime = "0.1" 10 | serde = "^1" 11 | serde_derive = "^1" 12 | serde_json = "^1" 13 | log = "0.4" 14 | simple_logger = "^1" 15 | regex = "1" 16 | 17 | [[bin]] 18 | name = "bootstrap" 19 | path = "src/main.rs" -------------------------------------------------------------------------------- /chapter8/my_lambda_function/buildthis.sh: -------------------------------------------------------------------------------- 1 | rm -f rust.zip 2 | cargo build --release --target x86_64-unknown-linux-musl 3 | zip -j rust.zip target/x86_64-unknown-linux-musl/release/bootstrap -------------------------------------------------------------------------------- /chapter8/my_lambda_function/src/main.rs: -------------------------------------------------------------------------------- 1 | use serde_derive; 2 | use serde_derive::{Serialize, Deserialize}; 3 | use lambda_runtime; 4 | use lambda_runtime::{lambda, Context, error::HandlerError}; 5 | use log; 6 | use log::error; 7 | use std::error::Error; 8 | use std::collections; 9 | use std::collections::hash_map::Entry::{Occupied, Vacant}; 10 | use regex; 11 | use regex::Regex; 12 | 13 | #[derive(Serialize, Deserialize)] 14 | struct CustomEvent { 15 | string: String, 16 | } 17 | 18 | fn main() -> Result<(), Box> { 19 | simple_logger::init_with_level(log::Level::Debug).unwrap(); 20 | lambda!(my_handler); 21 | 22 | Ok(()) 23 | } 24 | 25 | fn my_handler(event: CustomEvent, ctx: Context) -> Result { 26 | if event.string == "" { 27 | error!("Empty string in request {}", ctx.aws_request_id); 28 | return Err(ctx.new_error("Empty input string")); 29 | } 30 | let mut map = collections::HashMap::::new(); 31 | let re = Regex::new(r"\w+").unwrap(); 32 | for caps in re.captures_iter(&event.string) { 33 | if let Some(cap) = caps.get(0) { 34 | let word = cap.as_str(); 35 | match map.entry(word.to_string()) { 36 | Occupied(mut view) => { *view.get_mut() += 1; } 37 | Vacant(view) => { view.insert(1); } 38 | } 39 | } 40 | } 41 | 42 | // Serialise to a json string 43 | let j = serde_json::to_string(&map).unwrap(); 44 | 45 | Ok(j) 46 | } 47 | -------------------------------------------------------------------------------- /errata.md: -------------------------------------------------------------------------------- 1 | # Errata for *Book Title* 2 | 3 | On **page xx** [Summary of error]: 4 | 5 | Details of error here. Highlight key pieces in **bold**. 6 | 7 | *** 8 | 9 | On **page xx** [Summary of error]: 10 | 11 | Details of error here. Highlight key pieces in **bold**. 12 | 13 | *** --------------------------------------------------------------------------------