├── .DS_Store
├── .gitattributes
├── 9781484251201.jpg
├── Contributing.md
├── LICENSE.txt
├── README.md
├── chapter1
    └── simple-scripts
    │   ├── conditions.rs
    │   ├── enumerations.rs
    │   ├── functions.rs
    │   ├── match.rs
    │   ├── oops.rs
    │   ├── ownership1.rs
    │   ├── ownership2.rs
    │   ├── ownership3.rs
    │   ├── ownership4.rs
    │   ├── ownership5.rs
    │   ├── unittestingexample
    │       ├── Cargo.toml
    │       └── src
    │       │   └── main.rs
    │   ├── variables.rs
    │   ├── variables1.rs
    │   ├── variables2.rs
    │   ├── variables3.rs
    │   ├── variables4.rs
    │   └── variables5.rs
├── chapter2
    ├── datasets
    │   ├── housing.csv
    │   └── iris.csv
    ├── iris_classification_tchrs
    │   ├── .DS_Store
    │   ├── Cargo.toml
    │   ├── mklml_mac_2019.0.5.20190502.tgz
    │   └── src
    │   │   ├── linear_with_sgd.rs
    │   │   ├── main.rs
    │   │   └── simple_nn.rs
    ├── iris_classification_xgboost
    │   ├── .DS_Store
    │   ├── Cargo.toml
    │   ├── iris.csv
    │   └── src
    │   │   └── main.rs
    ├── kmeans_rusty_machine
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    ├── logistic_regression_rustlearn
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    ├── ml-utils
    │   ├── Cargo.toml
    │   ├── calc_prob.py
    │   ├── clusim stuff.ipynb
    │   ├── easy_bayesian_AB.py
    │   ├── examples
    │   │   └── measures.rs
    │   └── src
    │   │   ├── datasets.rs
    │   │   ├── hypothesis_testing.rs
    │   │   ├── lib.rs
    │   │   ├── main.rs
    │   │   ├── sup_metrics.rs
    │   │   └── unsup_metrics.rs
    ├── nb_classification
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    ├── rust_and_tf
    │   ├── Cargo.toml
    │   ├── data
    │   │   └── housing.csv
    │   ├── fullcode.ipynb
    │   ├── src
    │   │   ├── conv_nets.rs
    │   │   ├── conv_nets_maxpooling.rs
    │   │   ├── graph_variables.rs
    │   │   ├── graph_with_placeholder.rs
    │   │   ├── linear_regression.rs
    │   │   ├── linear_regression_from_model.rs
    │   │   ├── main.rs
    │   │   └── seq_nodes.rs
    │   └── tensorflow create model.ipynb
    ├── rustlearn_classification_tasks
    │   ├── Cargo.toml
    │   └── src
    │   │   ├── binary_class_scores.rs
    │   │   ├── logistic_reg.rs
    │   │   ├── main.rs
    │   │   ├── svm.rs
    │   │   └── trees.rs
    ├── rusty_machine_classification
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    ├── rusty_machine_supervised_algos
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    └── rustymachine_regression
    │   ├── Cargo.toml
    │   ├── data
    │       └── housing.csv
    │   └── src
    │       ├── gaussian_process_reg.rs
    │       ├── glms.rs
    │       ├── lin_reg.rs
    │       └── main.rs
├── chapter3
    ├── reinforcement-learning-frozenlake
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    ├── rsrl_custom
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    └── rusty_machine_unsupervised
    │   ├── Cargo.toml
    │   ├── data
    │       └── iris.csv
    │   └── src
    │       └── main.rs
├── chapter4
    ├── SQL_db
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    ├── data_formats
    │   ├── Cargo.toml
    │   ├── data
    │   │   ├── prize.json
    │   │   └── sample_2.xml
    │   └── src
    │   │   ├── csvreading.rs
    │   │   ├── jsonreading.rs
    │   │   ├── main.rs
    │   │   └── xmlreading.rs
    ├── data_transformations_datafusion
    │   ├── Cargo.toml
    │   ├── src
    │   │   └── main.rs
    │   └── titanic
    │   │   ├── test.csv
    │   │   └── train.csv
    ├── databases
    │   ├── Cargo.toml
    │   └── src
    │   │   ├── main.rs
    │   │   ├── neo4j_db.rs
    │   │   └── postgres_db.rs
    ├── graph_db
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    ├── matrix_transformations
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    ├── s3_files
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    └── scraping
    │   ├── Cargo.toml
    │   └── src
    │       └── main.rs
├── chapter5
    ├── crfsuite-model
    │   ├── Cargo.toml
    │   ├── data
    │   │   └── ner.csv
    │   └── src
    │   │   └── main.rs
    ├── fasttext-model
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    ├── jigsaw
    │   ├── Cargo.toml
    │   ├── data
    │   │   └── train.csv
    │   ├── references.txt
    │   └── src
    │   │   └── main.rs
    └── snips-model
    │   ├── Cargo.toml
    │   ├── snips_training.md
    │   └── src
    │       └── main.rs
├── chapter6
    ├── adversarial
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    ├── face-detection-tf
    │   ├── Cargo.toml
    │   ├── mtcnn.pb
    │   └── src
    │   │   └── main.rs
    ├── finetuning_pytorch_image_models
    │   ├── Cargo.toml
    │   ├── README.md
    │   ├── resnet.py
    │   └── src
    │   │   └── main.rs
    ├── model_inference
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    └── pytorch-image-classification
    │   ├── Cargo.toml
    │   └── src
    │       └── main.rs
├── chapter7
    ├── goodbooks-recommender
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    ├── high-performance-computing
    │   ├── Cargo.toml
    │   └── src
    │   │   └── main.rs
    └── statistics
    │   ├── Cargo.toml
    │   └── src
    │       └── main.rs
├── chapter8
    ├── cpp_demangle
    │   ├── Cargo.toml
    │   ├── mangle_ex.py
    │   ├── setup.py
    │   └── src
    │   │   └── lib.rs
    ├── crfsuite-model
    │   ├── Cargo.toml
    │   ├── MANIFEST.in
    │   ├── crfsuite_model
    │   │   └── __init__.py
    │   ├── crfsuite_model_prediction.py
    │   ├── crfsuite_model_training.py
    │   ├── data
    │   │   ├── ner.csv
    │   │   └── ner_predict.csv
    │   ├── pyproject.toml
    │   ├── requirements-dev.txt
    │   ├── setup.py
    │   └── src
    │   │   └── lib.rs
    ├── iris_classification_xgboost
    │   ├── IrisClassificationXgboost.java
    │   ├── Makefile
    │   ├── data
    │   │   ├── iris.csv
    │   │   └── predict.csv
    │   └── iris_classification_library
    │   │   ├── Cargo.toml
    │   │   └── src
    │   │       └── lib.rs
    └── my_lambda_function
    │   ├── .cargo
    │       └── config
    │   ├── Cargo.toml
    │   ├── buildthis.sh
    │   └── src
    │       └── main.rs
└── errata.md


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Apress/practical-machine-learning-w-rust/b0fd379ee4f0f7bcd9276ae6d31576aa655b08d7/.DS_Store


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/9781484251201.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Apress/practical-machine-learning-w-rust/b0fd379ee4f0f7bcd9276ae6d31576aa655b08d7/9781484251201.jpg


--------------------------------------------------------------------------------
/Contributing.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Apress Source Code
 2 | 
 3 | Copyright for Apress source code belongs to the author(s). However, under fair use you are encouraged to fork and contribute minor corrections and updates for the benefit of the author(s) and other readers.
 4 | 
 5 | ## How to Contribute
 6 | 
 7 | 1. Make sure you have a GitHub account.
 8 | 2. Fork the repository for the relevant book.
 9 | 3. Create a new branch on which to make your change, e.g. 
10 | `git checkout -b my_code_contribution`
11 | 4. Commit your change. Include a commit message describing the correction. Please note that if your commit message is not clear, the correction will not be accepted.
12 | 5. Submit a pull request.
13 | 
14 | Thank you for your contribution!


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | ﻿Freeware License, some rights reserved
 2 | 
 3 | Copyright (c) 2020 Joydeep Bhattacharjee
 4 | 
 5 | Permission is hereby granted, free of charge, to anyone obtaining a copy 
 6 | of this software and associated documentation files (the "Software"), 
 7 | to work with the Software within the limits of freeware distribution and fair use. 
 8 | This includes the rights to use, copy, and modify the Software for personal use. 
 9 | Users are also allowed and encouraged to submit corrections and modifications 
10 | to the Software for the benefit of other users.
11 | 
12 | It is not allowed to reuse,  modify, or redistribute the Software for 
13 | commercial use in any way, or for a user’s educational materials such as books 
14 | or blog articles without prior permission from the copyright holder. 
15 | 
16 | The above copyright notice and this permission notice need to be included 
17 | in all copies or substantial portions of the software.
18 | 
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | AUTHORS OR COPYRIGHT HOLDERS OR APRESS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 | SOFTWARE.
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Apress Source Code
 2 | 
 3 | This repository accompanies [*Practical Machine Learning with Rust*](https://www.apress.com/9781484251201) by Joydeep Bhattacharjee (Apress, 2020).
 4 | 
 5 | [comment]: #cover
 6 | ![Cover image](9781484251201.jpg)
 7 | 
 8 | Download the files as a zip using the green button, or clone the repository to your machine using Git.
 9 | 
10 | ## Releases
11 | 
12 | Release v1.0 corresponds to the code in the published book, without corrections or updates.
13 | 
14 | ## Contributions
15 | 
16 | See the file Contributing.md for more information on how you can contribute to this repository.


--------------------------------------------------------------------------------
/chapter1/simple-scripts/conditions.rs:
--------------------------------------------------------------------------------
 1 | fn main() {
 2 | 	let place = "himalayas";
 3 | 
 4 | 	let weather = if place == "himalayas" {
 5 | 		"cold"
 6 | 	} else {
 7 | 		"hot"
 8 | 	};
 9 | 	println!("{:?}", weather);
10 | }


--------------------------------------------------------------------------------
/chapter1/simple-scripts/enumerations.rs:
--------------------------------------------------------------------------------
 1 | // enumerations.rs
 2 | 
 3 | #[derive(Debug)]
 4 | enum NationalHolidays {
 5 |     GandhiJayanti,
 6 |     RepublicDay,
 7 |     IndependenceDay,
 8 | }
 9 | 
10 | fn inspect(day: NationalHolidays) -> String {
11 |     match day {
12 |         NationalHolidays::GandhiJayanti => String::from("Oct 2"),
13 |         NationalHolidays::RepublicDay => String::from("Jan 26"),
14 |         NationalHolidays::IndependenceDay => String::from("Aug 15"),
15 |     }
16 | }
17 | 
18 | fn main() {
19 |     let day = NationalHolidays::GandhiJayanti;
20 |     let date = inspect(day);
21 |     println!("{:?}", date); // output: Oct 2
22 | }


--------------------------------------------------------------------------------
/chapter1/simple-scripts/functions.rs:
--------------------------------------------------------------------------------
1 | fn main() {
2 | 	println!("{:?}", square_of(-5));
3 | }
4 | 
5 | fn square_of(x: i32) -> i32 {
6 | 	println!("x = {:?}", x);
7 | 	x.pow(2)
8 | }


--------------------------------------------------------------------------------
/chapter1/simple-scripts/match.rs:
--------------------------------------------------------------------------------
1 | fn main() {
2 | 	let place = "himalayas";
3 | 
4 | 	let weather = match place {
5 | 		"himalayas" => "cold",
6 | 		_ => "hot",
7 | 	};
8 | 	println!("{:?}", weather);
9 | }


--------------------------------------------------------------------------------
/chapter1/simple-scripts/oops.rs:
--------------------------------------------------------------------------------
 1 | // oops.rs
 2 | // $ ./oops
 3 | // Planet { co2: 0.04, nitrogen: 78.09 }
 4 | // Planet { co2: 95.32, nitrogen: 2.7 }
 5 | // For planet Planet { co2: 0.04, nitrogen: 78.09 }: co2 = 0.04, nitrogen=78.09, other_gases=21.870003
 6 | // For planet Planet { co2: 95.32, nitrogen: 2.7 }: co2 = 95.32, nitrogen=2.7, other_gases=1.9800003
 7 | 
 8 | #[derive(Debug)]
 9 | struct Planet {
10 |     co2: f32,
11 |     nitrogen: f32
12 | }
13 | 
14 | trait Atmosphere {
15 |     fn new(co2: f32, nitrogen: f32) -> Self;
16 |     fn amount_of_other_gases(&self) -> f32;
17 |     fn summarize(&self);
18 | }
19 | 
20 | impl Atmosphere for Planet {
21 |     fn new(co2: f32, nitrogen: f32) -> Planet {
22 |         Planet { co2: co2, nitrogen: nitrogen }
23 |     }
24 | 
25 |     fn amount_of_other_gases(&self) -> f32 {
26 |         100.0 - self.co2 - self.nitrogen
27 |     }
28 | 
29 |     fn summarize(&self) {
30 |         let other_gases = self.amount_of_other_gases();
31 |         println!("For planet {planet:?}: co2 = {co2}, nitrogen={nitrogen}, other_gases={other_gases}",
32 |             planet=self, co2=self.co2, nitrogen=self.nitrogen, other_gases=other_gases);
33 |     }
34 | }
35 | 
36 | fn main() {
37 |     let earth = Planet { co2: 0.04, nitrogen: 78.09 };
38 |     println!("{:?}", earth);
39 | 
40 |     let mars = Planet { co2: 95.32, nitrogen: 2.7 };
41 |     println!("{:?}", mars);
42 | 
43 |     earth.summarize();
44 | 
45 |     mars.summarize();
46 | }
47 | 


--------------------------------------------------------------------------------
/chapter1/simple-scripts/ownership1.rs:
--------------------------------------------------------------------------------
 1 | // ownership1.rs
 2 | 
 3 | fn main() {
 4 |     let lang = "rust";
 5 |     let rust1 = add_version(&lang);
 6 |     println!("{:?}", rust1);
 7 | }
 8 | 
 9 | fn add_version(s: &str) -> String {
10 |     s.to_string() + " 2018."
11 | }
12 | 


--------------------------------------------------------------------------------
/chapter1/simple-scripts/ownership2.rs:
--------------------------------------------------------------------------------
 1 | // ownership2.rs
 2 | fn main() {
 3 |     let lang = String::from("rust");
 4 |     let rust1 = add_version(lang);
 5 |     println!("{:?}", rust1);
 6 |     let rust2 = add_lang(lang);
 7 |     println!("{:?}", rust2);
 8 | }
 9 | 
10 | fn add_version(s: String) -> String {
11 |     s + " " + "2018!!"
12 | }
13 | 
14 | fn add_lang(s: String) -> String {
15 |     s + " " + "lang."
16 | }


--------------------------------------------------------------------------------
/chapter1/simple-scripts/ownership3.rs:
--------------------------------------------------------------------------------
 1 | // ownership3.rs
 2 | 
 3 | fn main() {
 4 |     let lang = String::from("rust");
 5 |     let rust1 = add_version(&lang);
 6 |     println!("{:?}", rust1);
 7 |     let rust2 = add_lang(&lang);
 8 |     println!("{:?}", rust2);
 9 | }
10 | 
11 | fn add_version(s: &String) -> String {
12 |     s.push_str(" 2019!!");
13 |     s.to_string()
14 | }
15 | 
16 | fn add_lang(s: &String) -> String {
17 |     s.push_str(" lang.");
18 |     s.to_string()
19 | }
20 | 


--------------------------------------------------------------------------------
/chapter1/simple-scripts/ownership4.rs:
--------------------------------------------------------------------------------
 1 | // $ ./ownership4
 2 | // "rust 2019!!"
 3 | // "rust 2019!! lang."
 4 | 
 5 | fn main() {
 6 |     let mut lang = String::from("rust");
 7 |     let rust1 = add_version(&mut lang);
 8 |     println!("{:?}", rust1);
 9 |     let rust2 = add_lang(&mut lang);
10 |     println!("{:?}", rust2);
11 | }
12 | 
13 | fn add_version(s: &mut String) -> String {
14 |     s.push_str(" 2019!!");
15 |     s.to_string()
16 | }
17 | 
18 | fn add_lang(s: &mut String) -> String {
19 |     s.push_str(" lang.");
20 |     s.to_string()
21 | }


--------------------------------------------------------------------------------
/chapter1/simple-scripts/ownership5.rs:
--------------------------------------------------------------------------------
 1 | // $ ./ownership
 2 | // "rust 2018."
 3 | // "rust lang."
 4 | 
 5 | fn main() {
 6 |     let lang = "rust"; // change done here
 7 |     let rust1 = add_version(&lang); // change done here
 8 |     println!("{:?}", rust1);
 9 |     let rust2 = add_lang(&lang); // change done here
10 |     println!("{:?}", rust2);
11 | }
12 | 
13 | fn add_version(s: &str) -> String {
14 |     s.to_string() + " 2018."
15 | }
16 | 
17 | fn add_lang(s: &str) -> String {
18 |     s.to_string() + " lang."
19 | }


--------------------------------------------------------------------------------
/chapter1/simple-scripts/unittestingexample/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "unittestingexample"
3 | version = "0.1.0"
4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
5 | edition = "2018"
6 | 
7 | [dependencies]
8 | 


--------------------------------------------------------------------------------
/chapter1/simple-scripts/unittestingexample/src/main.rs:
--------------------------------------------------------------------------------
 1 | fn main() {
 2 |     let lang = "rust";
 3 |     let rust1 = add_version(&lang);
 4 |     println!("{:?}", rust1);
 5 | }
 6 | 
 7 | fn add_version(s: &str) -> String {
 8 |     s.to_string() + " 2018."
 9 | }
10 | 
11 | #[test]
12 | fn test_add_version() {
13 |     assert_eq!(add_version("abcd"), String::from("abcd 2018."));
14 | }


--------------------------------------------------------------------------------
/chapter1/simple-scripts/variables.rs:
--------------------------------------------------------------------------------
1 | fn main() {
2 |     let x = "learning rust";
3 | 
4 |     println!("{}", x);
5 | 
6 | }
7 | 


--------------------------------------------------------------------------------
/chapter1/simple-scripts/variables1.rs:
--------------------------------------------------------------------------------
 1 | #![feature(core_intrinsics)]
 2 | 
 3 | fn print_type_of<T>(_: &T) {
 4 |     println!("{}", unsafe { std::intrinsics::type_name::<T>() });
 5 | }
 6 | 
 7 | fn main() {
 8 |     let x = "learning rust";
 9 |     let y = 6;
10 |     let z = 3.14;
11 | 
12 |     println!("{}", x);
13 |     println!("type of x:");
14 |     print_type_of(&x);
15 |     println!("type of y:");
16 |     print_type_of(&y);
17 |     println!("type of z:");
18 |     print_type_of(&z);
19 | 
20 | }
21 | 


--------------------------------------------------------------------------------
/chapter1/simple-scripts/variables2.rs:
--------------------------------------------------------------------------------
1 | fn main() {
2 |     let mut x = 32;
3 |     println!("Current value of x: {}", x);
4 |     x = 64;
5 |     println!("Current value of x: {}", x);
6 | }
7 | 


--------------------------------------------------------------------------------
/chapter1/simple-scripts/variables3.rs:
--------------------------------------------------------------------------------
1 | fn main() {
2 |     let mut x = 32;
3 |     println!("Current value of x: {}", x);
4 |     x = "rust";
5 |     println!("Current value of x: {}", x);
6 | }
7 | 


--------------------------------------------------------------------------------
/chapter1/simple-scripts/variables4.rs:
--------------------------------------------------------------------------------
1 | fn main() {
2 |     let x = 1;
3 |     let x = x + 2;
4 |     let x = x * 2;
5 |     println!("Value of x: {}", x);
6 | }
7 | 


--------------------------------------------------------------------------------
/chapter1/simple-scripts/variables5.rs:
--------------------------------------------------------------------------------
1 | fn main() {
2 | 	let x = 5;
3 | 
4 | 	if 4 < 10 {
5 | 		let x = 10;
6 | 		println!("Inside if x = {:?}", x);
7 | 	}
8 | 	println!("Outside if x = {:?}", x);
9 | }


--------------------------------------------------------------------------------
/chapter2/datasets/iris.csv:
--------------------------------------------------------------------------------
  1 | sepal_length,sepal_width,petal_length,petal_width,species
  2 | 5.1,3.5,1.4,0.2,setosa
  3 | 4.9,3.0,1.4,0.2,setosa
  4 | 4.7,3.2,1.3,0.2,setosa
  5 | 4.6,3.1,1.5,0.2,setosa
  6 | 5.0,3.6,1.4,0.2,setosa
  7 | 5.4,3.9,1.7,0.4,setosa
  8 | 4.6,3.4,1.4,0.3,setosa
  9 | 5.0,3.4,1.5,0.2,setosa
 10 | 4.4,2.9,1.4,0.2,setosa
 11 | 4.9,3.1,1.5,0.1,setosa
 12 | 5.4,3.7,1.5,0.2,setosa
 13 | 4.8,3.4,1.6,0.2,setosa
 14 | 4.8,3.0,1.4,0.1,setosa
 15 | 4.3,3.0,1.1,0.1,setosa
 16 | 5.8,4.0,1.2,0.2,setosa
 17 | 5.7,4.4,1.5,0.4,setosa
 18 | 5.4,3.9,1.3,0.4,setosa
 19 | 5.1,3.5,1.4,0.3,setosa
 20 | 5.7,3.8,1.7,0.3,setosa
 21 | 5.1,3.8,1.5,0.3,setosa
 22 | 5.4,3.4,1.7,0.2,setosa
 23 | 5.1,3.7,1.5,0.4,setosa
 24 | 4.6,3.6,1.0,0.2,setosa
 25 | 5.1,3.3,1.7,0.5,setosa
 26 | 4.8,3.4,1.9,0.2,setosa
 27 | 5.0,3.0,1.6,0.2,setosa
 28 | 5.0,3.4,1.6,0.4,setosa
 29 | 5.2,3.5,1.5,0.2,setosa
 30 | 5.2,3.4,1.4,0.2,setosa
 31 | 4.7,3.2,1.6,0.2,setosa
 32 | 4.8,3.1,1.6,0.2,setosa
 33 | 5.4,3.4,1.5,0.4,setosa
 34 | 5.2,4.1,1.5,0.1,setosa
 35 | 5.5,4.2,1.4,0.2,setosa
 36 | 4.9,3.1,1.5,0.1,setosa
 37 | 5.0,3.2,1.2,0.2,setosa
 38 | 5.5,3.5,1.3,0.2,setosa
 39 | 4.9,3.1,1.5,0.1,setosa
 40 | 4.4,3.0,1.3,0.2,setosa
 41 | 5.1,3.4,1.5,0.2,setosa
 42 | 5.0,3.5,1.3,0.3,setosa
 43 | 4.5,2.3,1.3,0.3,setosa
 44 | 4.4,3.2,1.3,0.2,setosa
 45 | 5.0,3.5,1.6,0.6,setosa
 46 | 5.1,3.8,1.9,0.4,setosa
 47 | 4.8,3.0,1.4,0.3,setosa
 48 | 5.1,3.8,1.6,0.2,setosa
 49 | 4.6,3.2,1.4,0.2,setosa
 50 | 5.3,3.7,1.5,0.2,setosa
 51 | 5.0,3.3,1.4,0.2,setosa
 52 | 7.0,3.2,4.7,1.4,versicolor
 53 | 6.4,3.2,4.5,1.5,versicolor
 54 | 6.9,3.1,4.9,1.5,versicolor
 55 | 5.5,2.3,4.0,1.3,versicolor
 56 | 6.5,2.8,4.6,1.5,versicolor
 57 | 5.7,2.8,4.5,1.3,versicolor
 58 | 6.3,3.3,4.7,1.6,versicolor
 59 | 4.9,2.4,3.3,1.0,versicolor
 60 | 6.6,2.9,4.6,1.3,versicolor
 61 | 5.2,2.7,3.9,1.4,versicolor
 62 | 5.0,2.0,3.5,1.0,versicolor
 63 | 5.9,3.0,4.2,1.5,versicolor
 64 | 6.0,2.2,4.0,1.0,versicolor
 65 | 6.1,2.9,4.7,1.4,versicolor
 66 | 5.6,2.9,3.6,1.3,versicolor
 67 | 6.7,3.1,4.4,1.4,versicolor
 68 | 5.6,3.0,4.5,1.5,versicolor
 69 | 5.8,2.7,4.1,1.0,versicolor
 70 | 6.2,2.2,4.5,1.5,versicolor
 71 | 5.6,2.5,3.9,1.1,versicolor
 72 | 5.9,3.2,4.8,1.8,versicolor
 73 | 6.1,2.8,4.0,1.3,versicolor
 74 | 6.3,2.5,4.9,1.5,versicolor
 75 | 6.1,2.8,4.7,1.2,versicolor
 76 | 6.4,2.9,4.3,1.3,versicolor
 77 | 6.6,3.0,4.4,1.4,versicolor
 78 | 6.8,2.8,4.8,1.4,versicolor
 79 | 6.7,3.0,5.0,1.7,versicolor
 80 | 6.0,2.9,4.5,1.5,versicolor
 81 | 5.7,2.6,3.5,1.0,versicolor
 82 | 5.5,2.4,3.8,1.1,versicolor
 83 | 5.5,2.4,3.7,1.0,versicolor
 84 | 5.8,2.7,3.9,1.2,versicolor
 85 | 6.0,2.7,5.1,1.6,versicolor
 86 | 5.4,3.0,4.5,1.5,versicolor
 87 | 6.0,3.4,4.5,1.6,versicolor
 88 | 6.7,3.1,4.7,1.5,versicolor
 89 | 6.3,2.3,4.4,1.3,versicolor
 90 | 5.6,3.0,4.1,1.3,versicolor
 91 | 5.5,2.5,4.0,1.3,versicolor
 92 | 5.5,2.6,4.4,1.2,versicolor
 93 | 6.1,3.0,4.6,1.4,versicolor
 94 | 5.8,2.6,4.0,1.2,versicolor
 95 | 5.0,2.3,3.3,1.0,versicolor
 96 | 5.6,2.7,4.2,1.3,versicolor
 97 | 5.7,3.0,4.2,1.2,versicolor
 98 | 5.7,2.9,4.2,1.3,versicolor
 99 | 6.2,2.9,4.3,1.3,versicolor
100 | 5.1,2.5,3.0,1.1,versicolor
101 | 5.7,2.8,4.1,1.3,versicolor
102 | 6.3,3.3,6.0,2.5,virginica
103 | 5.8,2.7,5.1,1.9,virginica
104 | 7.1,3.0,5.9,2.1,virginica
105 | 6.3,2.9,5.6,1.8,virginica
106 | 6.5,3.0,5.8,2.2,virginica
107 | 7.6,3.0,6.6,2.1,virginica
108 | 4.9,2.5,4.5,1.7,virginica
109 | 7.3,2.9,6.3,1.8,virginica
110 | 6.7,2.5,5.8,1.8,virginica
111 | 7.2,3.6,6.1,2.5,virginica
112 | 6.5,3.2,5.1,2.0,virginica
113 | 6.4,2.7,5.3,1.9,virginica
114 | 6.8,3.0,5.5,2.1,virginica
115 | 5.7,2.5,5.0,2.0,virginica
116 | 5.8,2.8,5.1,2.4,virginica
117 | 6.4,3.2,5.3,2.3,virginica
118 | 6.5,3.0,5.5,1.8,virginica
119 | 7.7,3.8,6.7,2.2,virginica
120 | 7.7,2.6,6.9,2.3,virginica
121 | 6.0,2.2,5.0,1.5,virginica
122 | 6.9,3.2,5.7,2.3,virginica
123 | 5.6,2.8,4.9,2.0,virginica
124 | 7.7,2.8,6.7,2.0,virginica
125 | 6.3,2.7,4.9,1.8,virginica
126 | 6.7,3.3,5.7,2.1,virginica
127 | 7.2,3.2,6.0,1.8,virginica
128 | 6.2,2.8,4.8,1.8,virginica
129 | 6.1,3.0,4.9,1.8,virginica
130 | 6.4,2.8,5.6,2.1,virginica
131 | 7.2,3.0,5.8,1.6,virginica
132 | 7.4,2.8,6.1,1.9,virginica
133 | 7.9,3.8,6.4,2.0,virginica
134 | 6.4,2.8,5.6,2.2,virginica
135 | 6.3,2.8,5.1,1.5,virginica
136 | 6.1,2.6,5.6,1.4,virginica
137 | 7.7,3.0,6.1,2.3,virginica
138 | 6.3,3.4,5.6,2.4,virginica
139 | 6.4,3.1,5.5,1.8,virginica
140 | 6.0,3.0,4.8,1.8,virginica
141 | 6.9,3.1,5.4,2.1,virginica
142 | 6.7,3.1,5.6,2.4,virginica
143 | 6.9,3.1,5.1,2.3,virginica
144 | 5.8,2.7,5.1,1.9,virginica
145 | 6.8,3.2,5.9,2.3,virginica
146 | 6.7,3.3,5.7,2.5,virginica
147 | 6.7,3.0,5.2,2.3,virginica
148 | 6.3,2.5,5.0,1.9,virginica
149 | 6.5,3.0,5.2,2.0,virginica
150 | 6.2,3.4,5.4,2.3,virginica
151 | 5.9,3.0,5.1,1.8,virginica
152 | 


--------------------------------------------------------------------------------
/chapter2/iris_classification_tchrs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Apress/practical-machine-learning-w-rust/b0fd379ee4f0f7bcd9276ae6d31576aa655b08d7/chapter2/iris_classification_tchrs/.DS_Store


--------------------------------------------------------------------------------
/chapter2/iris_classification_tchrs/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "iris_classification_tchrs"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | csv = "1.0.5"
 9 | serde = "1.0.89"
10 | serde_derive = "1.0.89"
11 | rand = "0.6"
12 | tch = "0.0.6"
13 | ml-utils = { path = "../ml-utils" }


--------------------------------------------------------------------------------
/chapter2/iris_classification_tchrs/mklml_mac_2019.0.5.20190502.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Apress/practical-machine-learning-w-rust/b0fd379ee4f0f7bcd9276ae6d31576aa655b08d7/chapter2/iris_classification_tchrs/mklml_mac_2019.0.5.20190502.tgz


--------------------------------------------------------------------------------
/chapter2/iris_classification_tchrs/src/linear_with_sgd.rs:
--------------------------------------------------------------------------------
  1 | use std::io;
  2 | use std::vec::Vec;
  3 | use std::error::Error;
  4 | 
  5 | use csv;
  6 | use rand;
  7 | use rand::thread_rng;
  8 | use rand::seq::SliceRandom;
  9 | 
 10 | use tch;
 11 | use tch::{nn, kind, Kind, Tensor, no_grad, vision, Device};
 12 | use tch::{nn::Module, nn::OptimizerConfig};
 13 | 
 14 | use ml_utils;
 15 | use ml_utils::datasets::Flower;
 16 | 
 17 | static FEATURE_DIM: i64 = 4;
 18 | static HIDDEN_NODES: i64 = 10;
 19 | static LABELS: i64 = 3;
 20 | 
 21 | #[derive(Debug)]
 22 | struct Net {
 23 |     fc1: nn::Linear,
 24 |     fc2: nn::Linear,
 25 | }
 26 | 
 27 | impl Net {
 28 |     fn new(vs: &nn::Path) -> Net {
 29 |         let fc1 = nn::Linear::new(vs, FEATURE_DIM, HIDDEN_NODES, Default::default());
 30 |         let fc2 = nn::Linear::new(vs, HIDDEN_NODES, LABELS, Default::default());
 31 |         Net { fc1, fc2 }
 32 |     }
 33 | }
 34 | 
 35 | impl Module for Net {
 36 |     fn forward(&self, xs: &Tensor) -> Tensor {
 37 |         xs.apply(&self.fc1).relu().apply(&self.fc2)
 38 |     }
 39 | }
 40 | 
 41 | pub fn run() -> Result<(), Box<Error>> {
 42 |     // Get all the data
 43 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 44 |     let mut data = Vec::new();
 45 |     for result in rdr.deserialize() {
 46 |         let r: Flower = result?;
 47 |         data.push(r); // data contains all the records
 48 |     }
 49 | 
 50 |     // shuffle the data.
 51 |     data.shuffle(&mut thread_rng());
 52 | 
 53 |     // separate out to train and test datasets.
 54 |     let test_size: f64 = 0.5;
 55 |     let test_size: f64 = data.len() as f64 * test_size;
 56 |     let test_size = test_size.round() as usize;
 57 | 
 58 |     let (test_data, train_data) = data.split_at(test_size);
 59 |     let train_size = train_data.len();
 60 |     let test_size = test_data.len();
 61 |     assert_eq!(train_size, test_size);
 62 | 
 63 |     // differentiate the features and the labels.
 64 |     // torch needs vectors in f64
 65 |     let flower_x_train: Vec<f64> = train_data.iter().flat_map(|r| r.into_feature_vector()).map(|x| x as f64).collect();
 66 |     let flower_y_train: Vec<f64> = train_data.iter().map(|r| r.into_labels()).map(|x| x as f64).collect();
 67 | 
 68 |     let flower_x_test: Vec<f64> = test_data.iter().flat_map(|r| r.into_feature_vector()).map(|x| x as f64).collect();
 69 |     let flower_y_test: Vec<f64> = test_data.iter().map(|r| r.into_labels()).map(|x| x as f64).collect();
 70 | 
 71 |     let flower_x_train = Tensor::float_vec(flower_x_train.as_slice());
 72 |     let flower_y_train = Tensor::float_vec(flower_y_train.as_slice()).to_kind(Kind::Int64);
 73 |     let flower_x_test = Tensor::float_vec(flower_x_test.as_slice());
 74 |     let flower_y_test = Tensor::float_vec(flower_y_test.as_slice()).to_kind(Kind::Int64);
 75 | 
 76 |     // print shape of all the data.
 77 |     println!("Training data shape {:?}", flower_x_train.size());
 78 |     println!("Training flower_y_train data shape {:?}", flower_y_train.size());
 79 | 
 80 |     // reshaping examples
 81 |     // one way to reshape is using unsqueeze
 82 |     //let flower_x_train1 = flower_x_train.unsqueeze(0); // Training data shape [1, 360]
 83 |     //println!("Training data shape {:?}", flower_x_train1.size());
 84 |     let train_size = train_size as i64;
 85 |     let test_size = test_size as i64;
 86 |     let flower_x_train = flower_x_train.view(&[train_size, FEATURE_DIM]);
 87 |     let flower_x_test = flower_x_test.view(&[test_size, FEATURE_DIM]);
 88 |     let flower_y_train = flower_y_train.view(&[train_size]);
 89 |     let flower_y_test = flower_y_test.view(&[test_size]);
 90 | 
 91 |     // working on a linear neural network with SGD
 92 |     let vs = nn::VarStore::new(Device::Cpu);
 93 |     let net = Net::new(&vs.root());
 94 |     let opt = nn::Adam::default().build(&vs, 1e-3)?;
 95 |     for epoch in 1..200 {
 96 |             let loss = net
 97 |                 .forward(&flower_x_train)
 98 |                 .cross_entropy_for_logits(&flower_y_train);
 99 |             opt.backward_step(&loss);
100 |             let test_accuracy = net
101 |                 .forward(&flower_x_test)
102 |                 .accuracy_for_logits(&flower_y_test);
103 |             println!(
104 |                 "epoch: {:4} train loss: {:8.5} test acc: {:5.2}%",
105 |                 epoch,
106 |                 f64::from(&loss),
107 |                 100. * f64::from(&test_accuracy),
108 |             );
109 |     };
110 | 
111 |     Ok(())
112 | }
113 | 


--------------------------------------------------------------------------------
/chapter2/iris_classification_tchrs/src/main.rs:
--------------------------------------------------------------------------------
 1 | extern crate serde;
 2 | // This lets us write `#[derive(Deserialize)]`.
 3 | #[macro_use]
 4 | extern crate serde_derive;
 5 | 
 6 | use std::vec::Vec;
 7 | use std::process::exit;
 8 | use std::env::args;
 9 | 
10 | mod simple_nn;
11 | mod linear_with_sgd;
12 | 
13 | fn main() {
14 |     let args: Vec<String> = args().collect();
15 |     let model = if args.len() < 2 {
16 |         None
17 |     } else {
18 |         Some(args[1].as_str())
19 |     };
20 |     let res = match model {
21 |         None => {println!("Run cargo run [nn|linear sdg] to get outputs", ); Ok(())},
22 |         Some("nn") => simple_nn::run(),
23 |         Some(_) => linear_with_sgd::run(),
24 |     };
25 |     // Putting the main code in another function serves two purposes:
26 |     // 1. We can use the `?` operator.
27 |     // 2. We can call exit safely, which does not run any destructors.
28 |     exit(match res {
29 |         Ok(_) => 0,
30 |         Err(e) => {
31 |             println!("{}", e);
32 |             1
33 |         }
34 |     })
35 | }


--------------------------------------------------------------------------------
/chapter2/iris_classification_tchrs/src/simple_nn.rs:
--------------------------------------------------------------------------------
  1 | use std::io;
  2 | use std::vec::Vec;
  3 | use std::error::Error;
  4 | 
  5 | use csv;
  6 | use rand;
  7 | use rand::thread_rng;
  8 | use rand::seq::SliceRandom;
  9 | 
 10 | use tch;
 11 | use tch::{nn, kind, Kind, Tensor, no_grad, vision, Device};
 12 | use tch::{nn::Module, nn::OptimizerConfig};
 13 | 
 14 | use ml_utils;
 15 | use ml_utils::datasets::Flower;
 16 | 
 17 | static FEATURE_DIM: i64 = 4;
 18 | static HIDDEN_NODES: i64 = 10;
 19 | static LABELS: i64 = 3;
 20 | 
 21 | #[derive(Debug)]
 22 | struct Net {
 23 |     fc1: nn::Linear,
 24 |     fc2: nn::Linear,
 25 | }
 26 | 
 27 | impl Net {
 28 |     fn new(vs: &nn::Path) -> Net {
 29 |         let fc1 = nn::Linear::new(vs, FEATURE_DIM, HIDDEN_NODES, Default::default());
 30 |         let fc2 = nn::Linear::new(vs, HIDDEN_NODES, LABELS, Default::default());
 31 |         Net { fc1, fc2 }
 32 |     }
 33 | }
 34 | 
 35 | impl Module for Net {
 36 |     fn forward(&self, xs: &Tensor) -> Tensor {
 37 |         xs.apply(&self.fc1).relu().apply(&self.fc2)
 38 |     }
 39 | }
 40 | 
 41 | pub fn run() -> Result<(), Box<dyn Error>> {
 42 |     // Get all the data
 43 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 44 |     let mut data = Vec::new();
 45 |     for result in rdr.deserialize() {
 46 |         let r: Flower = result?;
 47 |         data.push(r); // data contains all the records
 48 |     }
 49 | 
 50 |     // shuffle the data.
 51 |     data.shuffle(&mut thread_rng());
 52 | 
 53 |     // separate out to train and test datasets.
 54 |     let test_size: f64 = 0.5;
 55 |     let test_size: f64 = data.len() as f64 * test_size;
 56 |     let test_size = test_size.round() as usize;
 57 | 
 58 |     let (test_data, train_data) = data.split_at(test_size);
 59 |     let train_size = train_data.len();
 60 |     let test_size = test_data.len();
 61 |     assert_eq!(train_size, test_size);
 62 | 
 63 |     // differentiate the features and the labels.
 64 |     // torch needs vectors in f64
 65 |     let flower_x_train: Vec<f64> = train_data.iter().flat_map(|r| r.into_feature_vector()).map(|x| x as f64).collect();
 66 |     let flower_y_train: Vec<f64> = train_data.iter().map(|r| r.into_labels()).map(|x| x as f64).collect();
 67 | 
 68 |     let flower_x_test: Vec<f64> = test_data.iter().flat_map(|r| r.into_feature_vector()).map(|x| x as f64).collect();
 69 |     let flower_y_test: Vec<f64> = test_data.iter().map(|r| r.into_labels()).map(|x| x as f64).collect();
 70 | 
 71 |     let flower_x_train = Tensor::float_vec(flower_x_train.as_slice());
 72 |     let flower_y_train = Tensor::float_vec(flower_y_train.as_slice()).to_kind(Kind::Int64);
 73 |     let flower_x_test = Tensor::float_vec(flower_x_test.as_slice());
 74 |     let flower_y_test = Tensor::float_vec(flower_y_test.as_slice()).to_kind(Kind::Int64);
 75 | 
 76 |     // print shape of all the data.
 77 |     println!("Training data shape {:?}", flower_x_train.size());
 78 |     println!("Training flower_y_train data shape {:?}", flower_y_train.size());
 79 | 
 80 |     // reshaping examples
 81 |     // one way to reshape is using unsqueeze
 82 |     //let flower_x_train1 = flower_x_train.unsqueeze(0); // Training data shape [1, 360]
 83 |     //println!("Training data shape {:?}", flower_x_train1.size());
 84 |     let train_size = train_size as i64;
 85 |     let test_size = test_size as i64;
 86 |     let flower_x_train = flower_x_train.view(&[train_size, FEATURE_DIM]);
 87 |     let flower_x_test = flower_x_test.view(&[test_size, FEATURE_DIM]);
 88 |     let flower_y_train = flower_y_train.view(&[train_size]);
 89 |     let flower_y_test = flower_y_test.view(&[test_size]);
 90 | 
 91 |     // working on a linear neural network with SGD
 92 |     let vs = nn::VarStore::new(Device::Cpu);
 93 |     let net = Net::new(&vs.root());
 94 |     let opt = nn::Adam::default().build(&vs, 1e-3)?;
 95 |     for epoch in 1..200 {
 96 |             let loss = net
 97 |                 .forward(&flower_x_train)
 98 |                 .cross_entropy_for_logits(&flower_y_train);
 99 |             opt.backward_step(&loss);
100 |             let test_accuracy = net
101 |                 .forward(&flower_x_test)
102 |                 .accuracy_for_logits(&flower_y_test);
103 |             println!(
104 |                 "epoch: {:4} train loss: {:8.5} test acc: {:5.2}%",
105 |                 epoch,
106 |                 f64::from(&loss),
107 |                 100. * f64::from(&test_accuracy),
108 |             );
109 |     };
110 | 
111 | 
112 |     let mut ws = Tensor::ones(&[FEATURE_DIM, 1], kind::FLOAT_CPU).set_requires_grad(true);
113 |     let mut bs = Tensor::ones(&[train_size], kind::FLOAT_CPU).set_requires_grad(true);
114 | 
115 | 
116 |     for epoch in 1..200 {
117 |         let logits = flower_x_train.mm(&ws) + &bs;
118 |         let loss = logits.squeeze().cross_entropy_for_logits(&flower_y_train); // since working on label encoded vectors.
119 |         ws.zero_grad();
120 |         bs.zero_grad();
121 |         loss.backward();
122 |         no_grad(|| {
123 |             ws += ws.grad() * (-1);
124 |             bs += bs.grad() * (-1);
125 |         });
126 |         let test_logits = flower_x_test.mm(&ws) + &bs;
127 |         let test_accuracy = test_logits
128 |             .argmax1(-1, false)
129 |             .eq1(&flower_y_test)
130 |             .to_kind(Kind::Float)
131 |             .mean()
132 |             .double_value(&[]);
133 |         println!(
134 |             "epoch: {:4} train loss: {:8.5} test acc: {:5.2}%",
135 |             epoch,
136 |             loss.double_value(&[]),
137 |             100. * test_accuracy
138 |         );
139 |     }
140 | 
141 |     Ok(())
142 | }
143 | 


--------------------------------------------------------------------------------
/chapter2/iris_classification_xgboost/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Apress/practical-machine-learning-w-rust/b0fd379ee4f0f7bcd9276ae6d31576aa655b08d7/chapter2/iris_classification_xgboost/.DS_Store


--------------------------------------------------------------------------------
/chapter2/iris_classification_xgboost/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "iris_classification_xgboost"
 3 | version = "0.1.0"
 4 | authors = ["joydeep bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | csv = "1.0.5"
 9 | serde = "1.0.89"
10 | serde_derive = "1.0.89"
11 | rand = "0.6"
12 | xgboost = "0.1.4"
13 | ml-utils = { path = "../ml-utils" }


--------------------------------------------------------------------------------
/chapter2/iris_classification_xgboost/iris.csv:
--------------------------------------------------------------------------------
  1 | sepal_length,sepal_width,petal_length,petal_width,species
  2 | 5.1,3.5,1.4,0.2,setosa
  3 | 4.9,3.0,1.4,0.2,setosa
  4 | 4.7,3.2,1.3,0.2,setosa
  5 | 4.6,3.1,1.5,0.2,setosa
  6 | 5.0,3.6,1.4,0.2,setosa
  7 | 5.4,3.9,1.7,0.4,setosa
  8 | 4.6,3.4,1.4,0.3,setosa
  9 | 5.0,3.4,1.5,0.2,setosa
 10 | 4.4,2.9,1.4,0.2,setosa
 11 | 4.9,3.1,1.5,0.1,setosa
 12 | 5.4,3.7,1.5,0.2,setosa
 13 | 4.8,3.4,1.6,0.2,setosa
 14 | 4.8,3.0,1.4,0.1,setosa
 15 | 4.3,3.0,1.1,0.1,setosa
 16 | 5.8,4.0,1.2,0.2,setosa
 17 | 5.7,4.4,1.5,0.4,setosa
 18 | 5.4,3.9,1.3,0.4,setosa
 19 | 5.1,3.5,1.4,0.3,setosa
 20 | 5.7,3.8,1.7,0.3,setosa
 21 | 5.1,3.8,1.5,0.3,setosa
 22 | 5.4,3.4,1.7,0.2,setosa
 23 | 5.1,3.7,1.5,0.4,setosa
 24 | 4.6,3.6,1.0,0.2,setosa
 25 | 5.1,3.3,1.7,0.5,setosa
 26 | 4.8,3.4,1.9,0.2,setosa
 27 | 5.0,3.0,1.6,0.2,setosa
 28 | 5.0,3.4,1.6,0.4,setosa
 29 | 5.2,3.5,1.5,0.2,setosa
 30 | 5.2,3.4,1.4,0.2,setosa
 31 | 4.7,3.2,1.6,0.2,setosa
 32 | 4.8,3.1,1.6,0.2,setosa
 33 | 5.4,3.4,1.5,0.4,setosa
 34 | 5.2,4.1,1.5,0.1,setosa
 35 | 5.5,4.2,1.4,0.2,setosa
 36 | 4.9,3.1,1.5,0.1,setosa
 37 | 5.0,3.2,1.2,0.2,setosa
 38 | 5.5,3.5,1.3,0.2,setosa
 39 | 4.9,3.1,1.5,0.1,setosa
 40 | 4.4,3.0,1.3,0.2,setosa
 41 | 5.1,3.4,1.5,0.2,setosa
 42 | 5.0,3.5,1.3,0.3,setosa
 43 | 4.5,2.3,1.3,0.3,setosa
 44 | 4.4,3.2,1.3,0.2,setosa
 45 | 5.0,3.5,1.6,0.6,setosa
 46 | 5.1,3.8,1.9,0.4,setosa
 47 | 4.8,3.0,1.4,0.3,setosa
 48 | 5.1,3.8,1.6,0.2,setosa
 49 | 4.6,3.2,1.4,0.2,setosa
 50 | 5.3,3.7,1.5,0.2,setosa
 51 | 5.0,3.3,1.4,0.2,setosa
 52 | 7.0,3.2,4.7,1.4,versicolor
 53 | 6.4,3.2,4.5,1.5,versicolor
 54 | 6.9,3.1,4.9,1.5,versicolor
 55 | 5.5,2.3,4.0,1.3,versicolor
 56 | 6.5,2.8,4.6,1.5,versicolor
 57 | 5.7,2.8,4.5,1.3,versicolor
 58 | 6.3,3.3,4.7,1.6,versicolor
 59 | 4.9,2.4,3.3,1.0,versicolor
 60 | 6.6,2.9,4.6,1.3,versicolor
 61 | 5.2,2.7,3.9,1.4,versicolor
 62 | 5.0,2.0,3.5,1.0,versicolor
 63 | 5.9,3.0,4.2,1.5,versicolor
 64 | 6.0,2.2,4.0,1.0,versicolor
 65 | 6.1,2.9,4.7,1.4,versicolor
 66 | 5.6,2.9,3.6,1.3,versicolor
 67 | 6.7,3.1,4.4,1.4,versicolor
 68 | 5.6,3.0,4.5,1.5,versicolor
 69 | 5.8,2.7,4.1,1.0,versicolor
 70 | 6.2,2.2,4.5,1.5,versicolor
 71 | 5.6,2.5,3.9,1.1,versicolor
 72 | 5.9,3.2,4.8,1.8,versicolor
 73 | 6.1,2.8,4.0,1.3,versicolor
 74 | 6.3,2.5,4.9,1.5,versicolor
 75 | 6.1,2.8,4.7,1.2,versicolor
 76 | 6.4,2.9,4.3,1.3,versicolor
 77 | 6.6,3.0,4.4,1.4,versicolor
 78 | 6.8,2.8,4.8,1.4,versicolor
 79 | 6.7,3.0,5.0,1.7,versicolor
 80 | 6.0,2.9,4.5,1.5,versicolor
 81 | 5.7,2.6,3.5,1.0,versicolor
 82 | 5.5,2.4,3.8,1.1,versicolor
 83 | 5.5,2.4,3.7,1.0,versicolor
 84 | 5.8,2.7,3.9,1.2,versicolor
 85 | 6.0,2.7,5.1,1.6,versicolor
 86 | 5.4,3.0,4.5,1.5,versicolor
 87 | 6.0,3.4,4.5,1.6,versicolor
 88 | 6.7,3.1,4.7,1.5,versicolor
 89 | 6.3,2.3,4.4,1.3,versicolor
 90 | 5.6,3.0,4.1,1.3,versicolor
 91 | 5.5,2.5,4.0,1.3,versicolor
 92 | 5.5,2.6,4.4,1.2,versicolor
 93 | 6.1,3.0,4.6,1.4,versicolor
 94 | 5.8,2.6,4.0,1.2,versicolor
 95 | 5.0,2.3,3.3,1.0,versicolor
 96 | 5.6,2.7,4.2,1.3,versicolor
 97 | 5.7,3.0,4.2,1.2,versicolor
 98 | 5.7,2.9,4.2,1.3,versicolor
 99 | 6.2,2.9,4.3,1.3,versicolor
100 | 5.1,2.5,3.0,1.1,versicolor
101 | 5.7,2.8,4.1,1.3,versicolor
102 | 6.3,3.3,6.0,2.5,virginica
103 | 5.8,2.7,5.1,1.9,virginica
104 | 7.1,3.0,5.9,2.1,virginica
105 | 6.3,2.9,5.6,1.8,virginica
106 | 6.5,3.0,5.8,2.2,virginica
107 | 7.6,3.0,6.6,2.1,virginica
108 | 4.9,2.5,4.5,1.7,virginica
109 | 7.3,2.9,6.3,1.8,virginica
110 | 6.7,2.5,5.8,1.8,virginica
111 | 7.2,3.6,6.1,2.5,virginica
112 | 6.5,3.2,5.1,2.0,virginica
113 | 6.4,2.7,5.3,1.9,virginica
114 | 6.8,3.0,5.5,2.1,virginica
115 | 5.7,2.5,5.0,2.0,virginica
116 | 5.8,2.8,5.1,2.4,virginica
117 | 6.4,3.2,5.3,2.3,virginica
118 | 6.5,3.0,5.5,1.8,virginica
119 | 7.7,3.8,6.7,2.2,virginica
120 | 7.7,2.6,6.9,2.3,virginica
121 | 6.0,2.2,5.0,1.5,virginica
122 | 6.9,3.2,5.7,2.3,virginica
123 | 5.6,2.8,4.9,2.0,virginica
124 | 7.7,2.8,6.7,2.0,virginica
125 | 6.3,2.7,4.9,1.8,virginica
126 | 6.7,3.3,5.7,2.1,virginica
127 | 7.2,3.2,6.0,1.8,virginica
128 | 6.2,2.8,4.8,1.8,virginica
129 | 6.1,3.0,4.9,1.8,virginica
130 | 6.4,2.8,5.6,2.1,virginica
131 | 7.2,3.0,5.8,1.6,virginica
132 | 7.4,2.8,6.1,1.9,virginica
133 | 7.9,3.8,6.4,2.0,virginica
134 | 6.4,2.8,5.6,2.2,virginica
135 | 6.3,2.8,5.1,1.5,virginica
136 | 6.1,2.6,5.6,1.4,virginica
137 | 7.7,3.0,6.1,2.3,virginica
138 | 6.3,3.4,5.6,2.4,virginica
139 | 6.4,3.1,5.5,1.8,virginica
140 | 6.0,3.0,4.8,1.8,virginica
141 | 6.9,3.1,5.4,2.1,virginica
142 | 6.7,3.1,5.6,2.4,virginica
143 | 6.9,3.1,5.1,2.3,virginica
144 | 5.8,2.7,5.1,1.9,virginica
145 | 6.8,3.2,5.9,2.3,virginica
146 | 6.7,3.3,5.7,2.5,virginica
147 | 6.7,3.0,5.2,2.3,virginica
148 | 6.3,2.5,5.0,1.9,virginica
149 | 6.5,3.0,5.2,2.0,virginica
150 | 6.2,3.4,5.4,2.3,virginica
151 | 5.9,3.0,5.1,1.8,virginica
152 | 


--------------------------------------------------------------------------------
/chapter2/iris_classification_xgboost/src/main.rs:
--------------------------------------------------------------------------------
  1 | extern crate serde;
  2 | // This lets us write `#[derive(Deserialize)]`.
  3 | #[macro_use]
  4 | extern crate serde_derive;
  5 | 
  6 | use std::io;
  7 | use std::process;
  8 | use std::vec::Vec;
  9 | use std::error::Error;
 10 | 
 11 | use csv;
 12 | use rand;
 13 | use rand::thread_rng;
 14 | use rand::seq::SliceRandom;
 15 | 
 16 | use xgboost;
 17 | use xgboost::{parameters, DMatrix, Booster};
 18 | 
 19 | use ml_utils::datasets::Flower;
 20 | 
 21 | fn main() {
 22 |     if let Err(err) = read_csv() {
 23 |         println!("{}", err);
 24 |         process::exit(1);
 25 |     }
 26 | }
 27 | 
 28 | fn read_csv() -> Result<(), Box<Error>> {
 29 |     // Get all the data
 30 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 31 |     let mut data = Vec::new();
 32 |     for result in rdr.deserialize() {
 33 |         let r: Flower = result?;
 34 |         data.push(r); // data contains all the records
 35 |     }
 36 | 
 37 |     // shuffle the data.
 38 |     data.shuffle(&mut thread_rng());
 39 | 
 40 |     // separate out to train and test datasets.
 41 |     let test_size: f32 = 0.2;
 42 |     let test_size: f32 = data.len() as f32 * test_size;
 43 |     let test_size = test_size.round() as usize;
 44 |     // we are keeping the val size to be the same as test_size.
 45 |     // this can be changed if required
 46 |     let val_size  = test_size.clone();
 47 | 
 48 |     let (test_data, train_and_val_data) = data.split_at(test_size);
 49 |     let (val_data, train_data) = train_and_val_data.split_at(val_size);
 50 |     let train_size = train_data.len();
 51 |     let test_size = test_data.len();
 52 |     let val_size = val_data.len();
 53 | 
 54 |     // differentiate the features and the labels.
 55 |     let flower_x_train: Vec<f32> = train_data.iter().flat_map(|r| r.into_feature_vector()).collect();
 56 |     let flower_y_train: Vec<f32> = train_data.iter().map(|r| r.into_labels()).collect();
 57 | 
 58 |     let flower_x_test: Vec<f32> = test_data.iter().flat_map(|r| r.into_feature_vector()).collect();
 59 |     let flower_y_test: Vec<f32> = test_data.iter().map(|r| r.into_labels()).collect();
 60 | 
 61 |     let flower_x_val: Vec<f32> = val_data.iter().flat_map(|r| r.into_feature_vector()).collect();
 62 |     let flower_y_val: Vec<f32> = val_data.iter().map(|r| r.into_labels()).collect();
 63 | 
 64 |     // convert training data into XGBoost's matrix format
 65 |     let mut dtrain = DMatrix::from_dense(&flower_x_train, train_size).unwrap();
 66 | 
 67 |     // set ground truth labels for the training matrix
 68 |     dtrain.set_labels(&flower_y_train).unwrap();
 69 | 
 70 |     // test matrix with 1 row
 71 |     let mut dtest = DMatrix::from_dense(&flower_x_test, test_size).unwrap();
 72 |     dtest.set_labels(&flower_y_test).unwrap();
 73 | 
 74 |     // validation matrix with 1 row
 75 |     let mut dval = DMatrix::from_dense(&flower_x_val, val_size).unwrap();
 76 |     dval.set_labels(&flower_y_val).unwrap();
 77 | 
 78 |     // configure objectives, metrics, etc.
 79 |     let learning_params = parameters::learning::LearningTaskParametersBuilder::default()
 80 |         .objective(parameters::learning::Objective::MultiSoftmax(3))
 81 |         .build().unwrap();
 82 | 
 83 |     // configure the tree-based learning model's parameters
 84 |     let tree_params = parameters::tree::TreeBoosterParametersBuilder::default()
 85 |             .max_depth(2)
 86 |             .eta(1.0)
 87 |             .build().unwrap();
 88 | 
 89 |     // overall configuration for Booster
 90 |     let booster_params = parameters::BoosterParametersBuilder::default()
 91 |         .booster_type(parameters::BoosterType::Tree(tree_params))
 92 |         .learning_params(learning_params)
 93 |         .verbose(true)
 94 |         .build().unwrap();
 95 | 
 96 |     // specify datasets to evaluate against during training
 97 |     let evaluation_sets = &[(&dtrain, "train"), (&dtest, "test")];
 98 | 
 99 |     // overall configuration for training/evaluation
100 |     let params = parameters::TrainingParametersBuilder::default()
101 |         .dtrain(&dtrain)                         // dataset to train with
102 |         .boost_rounds(2)                         // number of training iterations
103 |         .booster_params(booster_params)          // model parameters
104 |         .evaluation_sets(Some(evaluation_sets)) // optional datasets to evaluate against in each iteration
105 |         .build().unwrap();
106 | 
107 |     // train model, and print evaluation data
108 |     let booster = Booster::train(&params).unwrap();
109 | 
110 |     // get predictions
111 |     let preds = booster.predict(&dval).unwrap();
112 |     println!("preds: {:?}", preds);
113 | 
114 |     // true values
115 |     let labels = dval.get_labels().unwrap();
116 |     println!("{:?}", labels);
117 | 
118 |     // find the accuracy
119 |     let mut hits = 0;
120 |     let mut correct_hits = 0;
121 |     for (predicted, actual) in preds.iter().zip(labels.iter()) {
122 |         if predicted == actual {
123 |             correct_hits += 1;
124 |         }
125 |         hits += 1;
126 |     }
127 |     assert_eq!(hits, preds.len());
128 |     println!("accuracy={} ({}/{} correct)", correct_hits as f32 / hits as f32, correct_hits, preds.len());
129 | 
130 |     Ok(())
131 | }


--------------------------------------------------------------------------------
/chapter2/kmeans_rusty_machine/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "linear_reg_rustymachine"
 3 | version = "0.1.0"
 4 | authors = ["joydeep bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | 
 6 | [dependencies]
 7 | rusty-machine="0.5.4"
 8 | rand = "0.6.5"
 9 | ml-utils = { path = "../ml-utils" }
10 | csv = "1.0.7"


--------------------------------------------------------------------------------
/chapter2/kmeans_rusty_machine/src/main.rs:
--------------------------------------------------------------------------------
 1 | extern crate rusty_machine;
 2 | extern crate rand;
 3 | 
 4 | use rusty_machine::linalg::{Matrix, BaseMatrix};
 5 | use rusty_machine::learning::k_means::KMeansClassifier;
 6 | use rusty_machine::learning::UnSupModel;
 7 | 
 8 | use rand::thread_rng;
 9 | use rand::distributions::IndependentSample;
10 | use rand::distributions::normal::Normal;
11 | 
12 | use csv;
13 | use ml_utils;
14 | use ml_utils::datasets::Flower;
15 | 
16 | // fn generate_data(centroids: &Matrix<f64>,
17 | //                  points_per_centroid: usize,
18 | //                  noise: f64)
19 | //                  -> Matrix<f64> {
20 | //     assert!(centroids.cols() > 0, "Centroids cannot be empty.");
21 | //     assert!(centroids.rows() > 0, "Centroids cannot be empty.");
22 | //     assert!(noise >= 0f64, "Noise must be non-negative.");
23 | //     let mut raw_cluster_data = Vec::with_capacity(centroids.rows() * points_per_centroid *
24 | //                                                   centroids.cols());
25 | 
26 | //     let mut rng = thread_rng();
27 | //     let normal_rv = Normal::new(0f64, noise);
28 | 
29 | //     for _ in 0..points_per_centroid {
30 | //         // Generate points from each centroid
31 | //         for centroid in centroids.row_iter() {
32 | //             // Generate a point randomly around the centroid
33 | //             let mut point = Vec::with_capacity(centroids.cols());
34 | //             for feature in centroid.iter() {
35 | //                 point.push(feature + normal_rv.ind_sample(&mut rng));
36 | //             }
37 | 
38 | //             // Push point to raw_cluster_data
39 | //             raw_cluster_data.extend(point);
40 | //         }
41 | //     }
42 | 
43 | //     Matrix::new(centroids.rows() * points_per_centroid,
44 | //                 centroids.cols(),
45 | //                 raw_cluster_data)
46 | // }
47 | 
48 | fn main() {
49 |     let data = "sepal_length,sepal_width,petal_length,petal_width,species\n5.1,3.5,1.4,0.2,setosa\n";
50 |     let mut rdr = csv::Reader::from_reader(data.as_bytes());
51 |     let mut data = Vec::new();
52 |     for result in rdr.deserialize() {
53 |         let r: Flower = result.unwrap();
54 |         data.push(r); // data contains all the records
55 |     }
56 | 
57 |     let flower_x_train: Vec<f32> = data.iter().map(|r| r.into_features()).collect();
58 |     let flower_x_train = Matrix::new(flower_x_train.len, 4, flower_x_train);
59 | 
60 | 
61 |     println!("K-Means clustering example:");
62 | 
63 |     const SAMPLES_PER_CENTROID: usize = 2000;
64 | 
65 |     println!("Generating {0} samples from each centroids:",
66 |              SAMPLES_PER_CENTROID);
67 |     // Choose two cluster centers, at (-0.5, -0.5) and (0, 0.5).
68 |     let centroids = Matrix::new(2, 4, vec![-0.5, -0.5, -0.5, 0.0, 0.5, 0.0, 0.0, 0.0]);
69 |     println!("{}", centroids);
70 | 
71 |     // // Generate some data randomly around the centroids
72 |     // let samples = generate_data(&centroids, SAMPLES_PER_CENTROID, 0.4);
73 | 
74 |     // Create a new model with 2 clusters
75 |     let mut model = KMeansClassifier::new(2);
76 | 
77 |     // Train the model
78 |     println!("Training the model...");
79 |     // Our train function returns a Result<(), E>
80 |     model.train(&flower_x_train).unwrap();
81 | 
82 |     let centroids = model.centroids().as_ref().unwrap();
83 |     println!("Model Centroids:\n{:.3}", centroids);
84 | 
85 |     // Predict the classes and partition into
86 |     println!("Classifying the samples...");
87 |     let classes = model.predict(&flower_x_train).unwrap();
88 |     let (first, second): (Vec<usize>, Vec<usize>) = classes.data().iter().partition(|&x| *x == 0);
89 | 
90 |     println!("Samples closest to first centroid: {}", first.len());
91 |     println!("Samples closest to second centroid: {}", second.len());
92 | }
93 | 


--------------------------------------------------------------------------------
/chapter2/logistic_regression_rustlearn/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "logistic_regression"
3 | version = "0.1.0"
4 | authors = ["joydeep bhattacharjee <joydeepubuntu@gmail.com>"]
5 | 
6 | [dependencies]
7 | rustlearn = "0.3.0"
8 | bincode = "1.0.1"
9 | 


--------------------------------------------------------------------------------
/chapter2/logistic_regression_rustlearn/src/main.rs:
--------------------------------------------------------------------------------
 1 | extern crate rustlearn;
 2 | extern crate bincode;
 3 | 
 4 | use std::fs::File;
 5 | use std::io::prelude::*;
 6 | 
 7 | use rustlearn::prelude::*;
 8 | use rustlearn::linear_models::sgdclassifier::Hyperparameters;
 9 | use rustlearn::cross_validation::CrossValidation;
10 | use rustlearn::datasets::iris;
11 | use rustlearn::metrics::accuracy_score;
12 | use bincode::{serialize, deserialize};
13 | 
14 | fn main() -> std::io::Result<()> {
15 |     let (X, y) = iris::load_data();
16 |     let num_splits = 10;
17 |     let num_epochs = 5;
18 |     let mut accuracy = 0.0;
19 |     let mut model = Hyperparameters::new(X.cols())
20 |         .learning_rate(0.5)
21 |         .l2_penalty(0.0)
22 |         .l1_penalty(0.0)
23 |         .one_vs_rest();
24 | 
25 |     for (train_idx, test_idx) in CrossValidation::new(X.rows(), num_splits) {
26 |         let X_train = X.get_rows(&train_idx);
27 |         let y_train = y.get_rows(&train_idx);
28 |         let X_test = X.get_rows(&test_idx);
29 |         let y_test = y.get_rows(&test_idx);
30 | 
31 |         for _ in 0..num_epochs {
32 |             model.fit(&X_train, &y_train).unwrap();
33 |         }
34 |         let prediction = model.predict(&X_test).unwrap();
35 |         let present_acc = accuracy_score(&y_test, &prediction);
36 |         accuracy += present_acc;
37 |     }
38 |     println!("accuracy: {:#?}", accuracy / num_splits as f32);
39 | 
40 |     // serialise the library
41 |     //let encoded = serialize(&model).unwrap();
42 |     println!("{:?}", model);
43 |     //let mut file = File::create("foo.txt")?;
44 |     //file.write_all(encoded)?;
45 |     Ok(())
46 | }
47 | 


--------------------------------------------------------------------------------
/chapter2/ml-utils/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "ml-utils"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | serde = "1"
 9 | serde_derive = "1"
10 | rand = "0.6.5"
11 | ndarray = "0.12.1"
12 | itertools = "0.8.0"
13 | num-traits = "0.2.6"
14 | 
15 | [dev-dependencies]
16 | csv = "1.0.6"


--------------------------------------------------------------------------------
/chapter2/ml-utils/calc_prob.py:
--------------------------------------------------------------------------------
 1 | from math import lgamma
 2 | from numba import jit
 3 | import numpy as np
 4 | 
 5 | #defining the functions used
 6 | @jit
 7 | def h(a, b, c, d):
 8 |     num = lgamma(a + c) + lgamma(b + d) + lgamma(a + b) + lgamma(c + d)
 9 |     den = lgamma(a) + lgamma(b) + lgamma(c) + lgamma(d) + lgamma(a + b + c + d)
10 |     return np.exp(num - den)
11 | 
12 | @jit
13 | def g0(a, b, c):
14 |     return np.exp(lgamma(a + b) + lgamma(a + c) - (lgamma(a + b + c) + lgamma(a)))
15 | 
16 | @jit
17 | def hiter(a, b, c, d):
18 |     while d > 1:
19 |         d -= 1
20 |         yield h(a, b, c, d) / d
21 | 
22 | def g(a, b, c, d):
23 |     return g0(a, b, c) + sum(hiter(a, b, c, d))
24 | 
25 | def calc_prob_between(beta1, beta2):
26 |     return g(beta1.args[0], beta1.args[1], beta2.args[0], beta2.args[1])
27 | 


--------------------------------------------------------------------------------
/chapter2/ml-utils/clusim stuff.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 14,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import clusim.clugen as clugen\n",
 10 |     "from clusim.sim import contingency_table\n",
 11 |     "from clusim.clustering import print_clustering\n",
 12 |     "from clusim.sim import count_pairwise_cooccurence\n",
 13 |     "from clusim.sim import jaccard_index, rand_index, expected_rand_index"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "c1 = clugen.make_random_clustering(n_elements=9, n_clusters=3, random_model=\"num\")\n",
 23 |     "c2 = clugen.make_random_clustering(n_elements=9, n_clusters=3, random_model=\"num\")"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 4,
 29 |    "metadata": {},
 30 |    "outputs": [
 31 |     {
 32 |      "data": {
 33 |       "text/plain": [
 34 |        "[[2, 1, 1], [0, 1, 1], [1, 2, 0]]"
 35 |       ]
 36 |      },
 37 |      "execution_count": 4,
 38 |      "metadata": {},
 39 |      "output_type": "execute_result"
 40 |     }
 41 |    ],
 42 |    "source": [
 43 |     "contingency_table(c1, c2)"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 7,
 49 |    "metadata": {},
 50 |    "outputs": [
 51 |     {
 52 |      "data": {
 53 |       "text/plain": [
 54 |        "(2.0, 8.0, 8.0, 18.0)"
 55 |       ]
 56 |      },
 57 |      "execution_count": 7,
 58 |      "metadata": {},
 59 |      "output_type": "execute_result"
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "count_pairwise_cooccurence(c1, c2)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 8,
 69 |    "metadata": {},
 70 |    "outputs": [
 71 |     {
 72 |      "name": "stdout",
 73 |      "output_type": "stream",
 74 |      "text": [
 75 |       "0837|15|246\n"
 76 |      ]
 77 |     }
 78 |    ],
 79 |    "source": [
 80 |     "print_clustering(c1)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 9,
 86 |    "metadata": {},
 87 |    "outputs": [
 88 |     {
 89 |      "name": "stdout",
 90 |      "output_type": "stream",
 91 |      "text": [
 92 |       "047|1236|85\n"
 93 |      ]
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "print_clustering(c2)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 11,
103 |    "metadata": {},
104 |    "outputs": [
105 |     {
106 |      "data": {
107 |       "text/plain": [
108 |        "0.1111111111111111"
109 |       ]
110 |      },
111 |      "execution_count": 11,
112 |      "metadata": {},
113 |      "output_type": "execute_result"
114 |     }
115 |    ],
116 |    "source": [
117 |     "jaccard_index(c1, c2)"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 13,
123 |    "metadata": {},
124 |    "outputs": [
125 |     {
126 |      "data": {
127 |       "text/plain": [
128 |        "0.5555555555555556"
129 |       ]
130 |      },
131 |      "execution_count": 13,
132 |      "metadata": {},
133 |      "output_type": "execute_result"
134 |     }
135 |    ],
136 |    "source": [
137 |     "rand_index(c1, c2)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 15,
143 |    "metadata": {},
144 |    "outputs": [
145 |     {
146 |      "ename": "UnboundLocalError",
147 |      "evalue": "local variable 'expected' referenced before assignment",
148 |      "output_type": "error",
149 |      "traceback": [
150 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
151 |       "\u001b[0;31mUnboundLocalError\u001b[0m                         Traceback (most recent call last)",
152 |       "\u001b[0;32m<ipython-input-15-12c6368b0546>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mexpected_rand_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
153 |       "\u001b[0;32m~/opensource/programming-languages/rust-lang/ml-utils/venv/lib/python3.5/site-packages/clusim/sim.py\u001b[0m in \u001b[0;36mexpected_rand_index\u001b[0;34m(n_elements, random_model, n_clusters1, n_clusters2, clu_size_seq1, clu_size_seq2)\u001b[0m\n\u001b[1;32m    321\u001b[0m         \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    322\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 323\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat64\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpected\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    324\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    325\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
154 |       "\u001b[0;31mUnboundLocalError\u001b[0m: local variable 'expected' referenced before assignment"
155 |      ]
156 |     }
157 |    ],
158 |    "source": [
159 |     "expected_rand_index(c1, c2)"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": []
168 |   }
169 |  ],
170 |  "metadata": {
171 |   "kernelspec": {
172 |    "display_name": "Python 3",
173 |    "language": "python",
174 |    "name": "python3"
175 |   },
176 |   "language_info": {
177 |    "codemirror_mode": {
178 |     "name": "ipython",
179 |     "version": 3
180 |    },
181 |    "file_extension": ".py",
182 |    "mimetype": "text/x-python",
183 |    "name": "python",
184 |    "nbconvert_exporter": "python",
185 |    "pygments_lexer": "ipython3",
186 |    "version": "3.5.2"
187 |   }
188 |  },
189 |  "nbformat": 4,
190 |  "nbformat_minor": 2
191 | }
192 | 


--------------------------------------------------------------------------------
/chapter2/ml-utils/easy_bayesian_AB.py:
--------------------------------------------------------------------------------
 1 | # reference https://towardsdatascience.com/bayesian-a-b-testing-with-python-the-easy-guide-d638f89e0b8a
 2 | from scipy.stats import beta
 3 | import numpy as np
 4 | from calc_prob import calc_prob_between
 5 | 
 6 | #This is the known data: imporessions and conversions for the Control and Test set
 7 | imps_ctrl,convs_ctrl=16500, 30
 8 | imps_test, convs_test=17000, 50
 9 | 
10 | #here we create the Beta functions for the two sets
11 | a_C, b_C = convs_ctrl+1, imps_ctrl-convs_ctrl+1
12 | beta_C = beta(a_C, b_C)
13 | a_T, b_T = convs_test+1, imps_test-convs_test+1
14 | beta_T = beta(a_T, b_T)
15 | 
16 | #calculating the lift
17 | lift=(beta_T.mean()-beta_C.mean())/beta_C.mean()
18 | 
19 | #calculating the probability for Test to be better than Control
20 | prob=calc_prob_between(beta_T, beta_C)
21 | 
22 | print (f"Test option lift Conversion Rates by {lift*100:2.2f}% with {prob*100:2.1f}% probability.")
23 | #output: Test option lift Conversion Rates by 59.68% with 98.2% probability.
24 | 


--------------------------------------------------------------------------------
/chapter2/ml-utils/examples/measures.rs:
--------------------------------------------------------------------------------
 1 | use std::vec::Vec;
 2 | use std::collections::HashSet;
 3 | 
 4 | use ml_utils as ml;
 5 | use ml::unsup_metrics::{jaccard_index, hashset, rand_index};
 6 | // use ml::{jaccard_index, hashset, rand_index};
 7 | 
 8 | fn main() {
 9 |     // let clusters1 = vec![vec![0u8,8,3, 7], vec![1u8,5], vec![2u8, 4, 6]];
10 |     // let clusters1: Vec<HashSet<u8>> = clusters1.iter().map(
11 |     //     |v| hashset(&v)).collect();
12 |     // let clusters2 = vec![vec![0u8,4, 7], vec![1u8,2,3, 6], vec![8u8,5]];
13 |     // let clusters2: Vec<HashSet<u8>> = clusters2.iter().map(
14 |     //     |v| hashset(&v)).collect();
15 | 
16 |     // let ji = jaccard_index(&clusters1, &clusters2);
17 |     // println!("jaccard index: {:?}", ji);
18 | 
19 |     // let ri = rand_index(&clusters1, &clusters2);
20 |     // println!("{:?}", ri);
21 |     println!("change the example");
22 | }


--------------------------------------------------------------------------------
/chapter2/ml-utils/src/hypothesis_testing.rs:
--------------------------------------------------------------------------------
  1 | use std::vec::Vec;
  2 | use std::collections::HashMap;
  3 | // use std::io::Error;
  4 | use std::error::Error;
  5 | 
  6 | use rand;
  7 | use rand::distributions::{Bernoulli, Distribution};
  8 | 
  9 | #[derive(Debug, PartialEq, Eq, Hash)]
 10 | enum User {
 11 |     Group,
 12 |     Converted,
 13 | }
 14 | 
 15 | fn generate_data(control_size: u32, test_size: u32, p_control: f64, p_test: f64) -> Vec<HashMap<User, bool>> {
 16 |     // initiate empty container.
 17 |     let mut data = vec![];
 18 | 
 19 |     let total = control_size + test_size;
 20 |     
 21 |     let group_bern = Bernoulli::new(0.5); // we need to divide the whole population equally
 22 | 
 23 |     let control_bern = Bernoulli::new(p_control);
 24 |     let test_bern = Bernoulli::new(p_test);
 25 | 
 26 |     for _ in 0..total {
 27 | 
 28 |         let mut row = HashMap::new();
 29 |         let v = group_bern.sample(&mut rand::thread_rng());
 30 |         row.insert(User::Group, v);
 31 |         
 32 |         let converted_v = match v {
 33 |             // true means control and false means test
 34 |             true => control_bern.sample(&mut rand::thread_rng()),
 35 |             false => test_bern.sample(&mut rand::thread_rng()),
 36 |         };
 37 |         row.insert(User::Converted, converted_v);
 38 |         data.push(row);
 39 |     }
 40 |     data
 41 | }
 42 | 
 43 | fn find_rate_difference(data: &Vec<HashMap<User, bool>>) -> Result<f64, Box<Error>> {
 44 |     let mut total_control_groups: usize = 0;
 45 |     let mut converted_control_group: usize = 0;
 46 |     let mut converted_test_group: usize = 0;
 47 |     for d in data {
 48 |         let user_group = d.get(&User::Group)
 49 |             .expect("data must have group and converted");
 50 |         let user_conversion = d.get(&User::Converted)
 51 |             .expect("data must have group and converted");
 52 |         if user_group == &true {
 53 |             total_control_groups += 1;
 54 |             if user_conversion == &true {
 55 |                 converted_control_group += 1;
 56 |             }
 57 |         } else {
 58 |             if user_conversion == &true {
 59 |                 converted_test_group += 1;
 60 |             }
 61 |         }
 62 |     }
 63 |     let total_test_group = data.len() - total_control_groups;
 64 |     let control_rate = converted_control_group as f64/total_control_groups as f64;
 65 |     let test_rate = converted_test_group as f64/total_test_group as f64;
 66 |     Ok(test_rate - control_rate)
 67 | }
 68 | 
 69 | fn main() {
 70 |     // A is control and B is test
 71 |     let control_size = 1000;
 72 |     let test_size = 1000;
 73 | 
 74 |     let bcr = 0.10;  // baseline conversion rate
 75 |     let d_hat = 0.02;  // difference between the groups
 76 |     let data = generate_data(control_size, test_size, bcr, bcr + d_hat); // we want data that is a little better than baseline.
 77 |     println!("{:?}", data);
 78 | 
 79 |     let x = find_rate_difference(&data);
 80 |     println!("{:?}", x);
 81 | }
 82 | 
 83 | #[cfg(test)]
 84 | mod tests {
 85 |     use super::*;
 86 | 
 87 |     #[test]
 88 |     fn test_generate_data() {
 89 |         let data = generate_data(10, 10, 0.1, 0.02);
 90 |         assert_eq!(data.len(), 20);
 91 |         assert_eq!(data[0].contains_key(&User::Group), true);
 92 |     }
 93 | 
 94 |     #[test]
 95 |     fn test_find_rate_difference() {
 96 |         let mut data = vec![];
 97 |         let data1: HashMap<_, _> = vec![(User::Group, false), (User::Converted, false)].into_iter().collect();
 98 |         data.push(data1);
 99 |         let data2: HashMap<_, _> = vec![(User::Group, true), (User::Converted, true)].into_iter().collect();
100 |         data.push(data2);
101 |         let res = find_rate_difference(&data).unwrap();
102 |         assert_eq!(res, -1.0);
103 |     }
104 | }


--------------------------------------------------------------------------------
/chapter2/ml-utils/src/lib.rs:
--------------------------------------------------------------------------------
1 | extern crate serde;
2 | // This lets us write `#[derive(Deserialize)]`.
3 | #[macro_use]
4 | extern crate serde_derive;
5 | 
6 | pub mod unsup_metrics;
7 | pub mod sup_metrics;
8 | pub mod datasets;


--------------------------------------------------------------------------------
/chapter2/ml-utils/src/sup_metrics.rs:
--------------------------------------------------------------------------------
 1 | use std::cmp::Ordering;
 2 | 
 3 | // for regression
 4 | pub fn r_squared_score(y_test: &[f64], y_preds: &[f64]) -> f64 {
 5 |     let model_variance: f64 = y_test.iter().zip(y_preds.iter()).fold(
 6 |         0., |v, (y_i, y_i_hat)| {
 7 |             v + (y_i - y_i_hat).powi(2)
 8 |         }
 9 |     );
10 | 
11 |     // get the mean for the actual values to be used later
12 |     let y_test_mean = y_test.iter().sum::<f64>() as f64
13 |         / y_test.len() as f64;
14 | 
15 |     // finding the variance
16 |     let variance =  y_test.iter().fold(
17 |         0., |v, &x| {v + (x - y_test_mean).powi(2)}
18 |     );
19 |     let r2_calculated: f64 = 1.0 - (model_variance / variance);
20 |     r2_calculated
21 | }
22 | 
23 | // for classification
24 | pub fn accuracy(y_test: &[u32], y_preds: &[u32]) -> f32 {
25 |     let mut correct_hits = 0;
26 |     for (predicted, actual) in y_preds.iter().zip(y_test.iter()) {
27 |         if predicted == actual {
28 |             correct_hits += 1;
29 |         }
30 |     }
31 |     let acc: f32 = correct_hits as f32 / y_test.len() as f32;
32 |     acc
33 | }
34 | 
35 | pub fn logloss_score(y_test: &[f32], y_preds: &[f32], eps: f32) -> f32 {
36 |     // complete this http://wiki.fast.ai/index.php/Log_Loss#Log_Loss_vs_Cross-Entropy
37 |     let y_preds = y_preds.iter().map(|&p| {
38 |         match p.partial_cmp(&(1.0 - eps)) {
39 |             Some(Ordering::Less) => p,
40 |             _ => 1.0 - eps, // if equal or greater.
41 |         }
42 |     });
43 |     let y_preds = y_preds.map(|p| {
44 |         match p.partial_cmp(&eps) {
45 |             Some(Ordering::Less) => eps,
46 |             _ => p,
47 |         }
48 |     });
49 | 
50 |     // Now compute the logloss
51 |     let logloss_vals = y_preds.zip(y_test.iter()).map(|(predicted, &actual)| {
52 |         if actual as f32 == 1.0 {
53 |             (-1.0) * predicted.ln()
54 |         } else if actual as f32 == 0.0 {
55 |             (-1.0) * (1.0 - predicted).ln()
56 |         } else {
57 |             panic!("Invalid labels: target data is not either 0.0 or 1.0");
58 |         }
59 |     });
60 |     logloss_vals.sum()
61 | }


--------------------------------------------------------------------------------
/chapter2/ml-utils/src/unsup_metrics.rs:
--------------------------------------------------------------------------------
  1 | use std::vec::Vec;
  2 | use std::collections::HashSet;
  3 | // use std::io::Error;
  4 | use std::error::Error;
  5 | use std::cmp::Ordering;
  6 | use std::iter::FromIterator;
  7 | 
  8 | use rand;
  9 | use rand::distributions::{Bernoulli, Distribution};
 10 | use itertools;
 11 | use itertools::iproduct;
 12 | use itertools::Itertools;
 13 | use ndarray;
 14 | use ndarray::{arr2, Array, ArrayBase, OwnedRepr, Dim, Axis};
 15 | use ndarray::prelude::*;
 16 | 
 17 | fn matching_elems_count(s1: &HashSet<u64>, s2: &HashSet<u64>) -> u64 {
 18 |     let common: Vec<_> = s1.intersection(s2).collect();
 19 |     common.len() as u64
 20 | }
 21 | 
 22 | fn contingency_table(clusters1: &[HashSet<u64>], clusters2: &[HashSet<u64>]) -> ArrayBase<OwnedRepr<u64>, Dim<[usize; 2]>> {
 23 |     let length = clusters1.len();
 24 |     assert!(length == clusters2.len());
 25 |     let product = iproduct!(clusters1, clusters2);
 26 |     let cont_table_vec: Vec<u64> = product.map(
 27 |         |(c1, c2)| matching_elems_count(c1, c2)
 28 |     ).collect();
 29 |     // println!("{:?}", cont_table_vec);
 30 |     let cont_table_mat = Array::from_shape_vec((3, 3), cont_table_vec).unwrap();
 31 |     cont_table_mat
 32 |     // let v_chunked: Vec<Vec<f64>> = cont_table_vec.chunks(length).map(|x| x.to_vec()).collect();
 33 |     // v_chunked
 34 | }
 35 | 
 36 | fn cluster_size_sequence_sqsum(clusters: &[HashSet<u64>]) -> u64 {
 37 |     let cluster1_size_seq: Vec<u64> = clusters.iter().map(
 38 |         |v| v.len() as u64).collect();
 39 |     let squares = cluster1_size_seq.iter().map(
 40 |         |num| num.pow(2)
 41 |     );
 42 |     squares.sum()
 43 | }
 44 | 
 45 | fn elements_in_vectr(vectr: &[HashSet<u64>]) -> u64 {
 46 |     let flatten_array: Vec<u64> = vectr
 47 |         .iter()
 48 |         .flat_map(|array| array.iter())
 49 |         .cloned()
 50 |         .collect();
 51 |     flatten_array.len() as u64
 52 | 
 53 | }
 54 | 
 55 | fn count_pairwise_cooccurence(clusters1: &[HashSet<u64>], clusters2: &[HashSet<u64>]) -> (f64, f64, f64, f64) {
 56 |     let cont_tbl = contingency_table(&clusters1, &clusters2);
 57 |     // println!("{:?}", cont_tbl);
 58 | 
 59 |     let square_matrix = cont_tbl.mapv(|a| a.pow(2));
 60 |     // println!("{:?}", square_matrix);
 61 |     let sum_of_squares1 = square_matrix.into_raw_vec();
 62 |     let sum_of_squares: u64 = sum_of_squares1.iter().sum();
 63 |     // println!("{:?}", sum_of_squares);
 64 |     let c1_sum_sq_sizes = cluster_size_sequence_sqsum(clusters1);
 65 |     let c2_sum_sq_sizes = cluster_size_sequence_sqsum(clusters2);
 66 |     // println!("{:?}", c1_sum_sq_sizes);
 67 | 
 68 |     let c1_elements_count = elements_in_vectr(clusters1);
 69 |     let n11 = 0.5 * (sum_of_squares - c1_elements_count) as f64;
 70 |     // println!("{:?}", n11);
 71 |     let n10 = 0.5 * (c1_sum_sq_sizes - sum_of_squares) as f64;
 72 |     let n01 = 0.5 * (c2_sum_sq_sizes - sum_of_squares) as f64;
 73 |     let n00 = 0.5 * c1_elements_count as f64 * (c1_elements_count - 1) as f64 - n11 - n10 - n01;
 74 |     (n11, n10, n01, n00)
 75 | }
 76 | 
 77 | pub fn hashset(data: &[u64]) -> HashSet<u64> {
 78 |     HashSet::from_iter(data.iter().cloned())
 79 | }
 80 | 
 81 | pub fn jaccard_index(clusters1: &[HashSet<u64>], clusters2: &[HashSet<u64>]) -> f64 {
 82 |     let (n11, n10, n01, n00) = count_pairwise_cooccurence(clusters1, clusters2);
 83 |     // println!("{:?}", (n11, n10, n01, n00));
 84 |     let denominator = n11 + n10 + n01;
 85 |     if denominator > 0.0 {
 86 |         return n11 / denominator;
 87 |     } else {
 88 |         0.0
 89 |     }
 90 | }
 91 | 
 92 | pub fn rand_index(clusters1: &[HashSet<u64>], clusters2: &[HashSet<u64>]) -> f64 {
 93 |     let (n11, n10, n01, n00) = count_pairwise_cooccurence(clusters1, clusters2);
 94 |     (n11 + n00) / (n11 + n10 + n01 + n00)
 95 | }
 96 | 
 97 | #[cfg(test)]
 98 | mod tests {
 99 |     use super::*;
100 | 
101 |     fn generate_data() -> (Vec<HashSet<u64>>, Vec<HashSet<u64>>) {
102 |         let clusters1 = vec![vec![0u64,8,3, 7], vec![1u64,5], vec![2u64, 4, 6]];
103 |         let clusters1: Vec<HashSet<u64>> = clusters1.iter().map(
104 |             |v| hashset(&v)).collect();
105 |         let clusters2 = vec![vec![0u64,4, 7], vec![1u64,2,3, 6], vec![8u64,5]];
106 |         let clusters2: Vec<HashSet<u64>> = clusters2.iter().map(
107 |             |v| hashset(&v)).collect();
108 |         (clusters1, clusters2)
109 |     }
110 | 
111 |     #[test]
112 |     fn test_contingency_table() {
113 |         let (clusters1, clusters2) = generate_data();
114 |         let table = contingency_table(&clusters1, &clusters2);
115 |         println!("{:?}", table);
116 |         let table2: Vec<u64> = [2, 1, 1, 0, 1, 1, 1, 2, 0].to_vec();
117 |         let table3 = Array::from_shape_vec((3,3), table2).unwrap();
118 |         assert_eq!(table, table3);
119 |     }
120 | 
121 |     #[test]
122 |     fn test_matching_elems_count() {
123 |         let (clusters1, clusters2) = generate_data();
124 |         let s1 = &clusters1[0];
125 |         let s2 = &clusters2[0];
126 |         let res = matching_elems_count(&s1, &s2);
127 |         assert_eq!(res, 2);
128 |     }
129 | 
130 |     #[test]
131 |     fn test_cluster_size_sequence_sqsum() {
132 |         let (clusters1, _) = generate_data();
133 |         let res = cluster_size_sequence_sqsum(&clusters1);
134 |         assert_eq!(res, 29);
135 |     }
136 | 
137 |     #[test]
138 |     fn test_elements_in_vector() {
139 |         let (clusters1, _) = generate_data();
140 |         let res = elements_in_vectr(&clusters1);
141 |         assert_eq!(res, 9);
142 |     }
143 | 
144 |     #[test]
145 |     fn test_count_pairwise_cooccurence() {
146 |         let (clusters1, clusters2) = generate_data();
147 |         let res = count_pairwise_cooccurence(&clusters1, &clusters2);
148 |         assert_eq!(res, (2.0, 8.0, 8.0, 18.0));
149 |     }
150 | 
151 |     #[test]
152 |     fn test_jaccard_index() {
153 |         let (clusters1, clusters2) = generate_data();
154 |         let res = jaccard_index(&clusters1, &clusters2);
155 |         assert_eq!(res, 0.1111111111111111);
156 |     }
157 | 
158 |     #[test]
159 |     fn test_rand_index() {
160 |         let (clusters1, clusters2) = generate_data();
161 |         let res = rand_index(&clusters1, &clusters2);
162 |         assert_eq!(res, 0.5555555555555556);
163 |     }
164 | }
165 | 


--------------------------------------------------------------------------------
/chapter2/nb_classification/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "nb_classification"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | csv = "*"
 9 | rusty-machine="*"
10 | hyper = "*"
11 | rulinalg = { version = "0.4.2", features = ["io"] }
12 | serde = "1"
13 | serde_derive = "1"
14 | rand = "0.6.5"


--------------------------------------------------------------------------------
/chapter2/rust_and_tf/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rust_and_tf"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | tensorflow = { version = "0.13.0", features = ["tensorflow_unstable"] }
 9 | serde = "1"
10 | serde_derive = "1"
11 | rand = "0.6.5"
12 | transpose = "0.2.0"
13 | mnist = "0.4.0"
14 | ml-utils = { path = "../ml-utils" }
15 | random = "0.12.2"


--------------------------------------------------------------------------------
/chapter2/rust_and_tf/src/conv_nets_maxpooling.rs:
--------------------------------------------------------------------------------
 1 | #![allow(non_snake_case)]
 2 | 
 3 | use std::error::Error;
 4 | use std::result::Result;
 5 | use std::io::prelude::*;
 6 | use std::io::BufReader;
 7 | use std::path::Path;
 8 | use std::fs::File;
 9 | use std::vec::Vec;
10 | 
11 | use rand;
12 | use rand::thread_rng;
13 | use rand::seq::SliceRandom;
14 | use transpose;
15 | use mnist;
16 | use mnist::{Mnist, MnistBuilder};
17 | 
18 | use tensorflow as tf;
19 | use tf::expr::{Compiler, Constant};
20 | use tf::{Graph, Tensor, DataType, Shape};
21 | use tf::{Session, SessionOptions, SessionRunArgs};
22 | 
23 | #[cfg_attr(feature="examples_system_alloc", global_allocator)]
24 | #[cfg(feature="examples_system_alloc")]
25 | static ALLOCATOR: std::alloc::System = std::alloc::System;
26 | 
27 | pub fn run() -> Result<(), Box<dyn Error>> {
28 |     // Get all the data
29 |     let (trn_size, rows, cols) = (10_000, 28, 28);
30 | 
31 |     // Deconstruct the returned Mnist struct.
32 |     let Mnist { trn_img, trn_lbl, .. } = MnistBuilder::new()
33 |         .label_format_digit()
34 |         .training_set_length(trn_size)
35 |         .validation_set_length(10_000)
36 |         .test_set_length(10_000)
37 |         .finalize();
38 |     // Get the label of the first digit.
39 |     let first_label = trn_lbl[0];
40 |     println!("The first digit is a {}.", first_label);
41 |     println!("size of training {}.", trn_img.len());
42 | 
43 |     let trn_img: Vec<f64> = trn_img.iter().map(|&x| x as f64).collect();
44 |     let trn_lbl: Vec<f64> = trn_lbl.iter().map(|&x| x as f64).collect();
45 | 
46 | 
47 |     // Define graph.
48 |     let mut graph = Graph::new();
49 |     let X = <Tensor<f64>>::new(&[10_000, 28, 28, 1]).with_values(&trn_img[..])?;
50 |     let y = <Tensor<f64>>::new(&[10_000,]).with_values(&trn_lbl[..])?;
51 |     let z = <Tensor<f64>>::new(&[28, 28, 1, 32]).with_values(&vec![1.; 25088])?;
52 | 
53 |     let X_const = {
54 |         let mut c = graph.new_operation("Placeholder", "X")?;
55 |         c.set_attr_type("dtype", DataType::Double)?; // check the enums https://github.com/tensorflow/rust/blob/ddff61850be1c8044ac86350caeed5a55824ebe4/src/lib.rs#L297
56 |         // c.set_attr_shape("shape", &Shape::from(Some(vec![Some(28),Some(28),Some(1),Some(32)])))?;
57 |         c.set_attr_shape("shape", &Shape::from(Some(vec![Some(10_000),Some(28),Some(28),Some(1)])))?;
58 |         c.finish()?
59 |     };
60 |     // operation types https://github.com/malmaud/TensorFlow.jl/blob/063511525902bdf84a461035758ef9a73ba4a635/src/ops/op_names.txt
61 |     let max_pool = {
62 |         let mut op = graph.new_operation("MaxPool", "max_pool")?;
63 |         op.add_input(X_const.clone());
64 |         op.set_attr_string("padding", "VALID")?;
65 |         op.set_attr_int_list("strides", &[1,2,2,1])?;
66 |         op.set_attr_int_list("ksize", &[1,2,2,1])?;
67 |         op.finish()?
68 |     };
69 | 
70 |     // Run graph.
71 |     let session = Session::new(&SessionOptions::new(), &graph)?;
72 |     let mut args = SessionRunArgs::new();
73 |     args.add_feed(&X_const, 0, &X);
74 |     let max_pool_token = args.request_fetch(&max_pool, 0);
75 |     session.run(&mut args)?;
76 |     let max_pool_token_res: Tensor<f64> = args.fetch::<f64>(max_pool_token)?;
77 |     println!("Now the max_pool", );
78 |     println!("{:?}", &max_pool_token_res[..]);
79 | 
80 |     Ok(())
81 | }
82 | 


--------------------------------------------------------------------------------
/chapter2/rust_and_tf/src/graph_variables.rs:
--------------------------------------------------------------------------------
 1 | use std::error::Error;
 2 | use std::result::Result;
 3 | use tensorflow as tf;
 4 | use tf::expr::{Variable, Compiler};
 5 | use tf::{Graph, Tensor};
 6 | use tf::{Session, SessionOptions, SessionRunArgs};
 7 | 
 8 | #[cfg_attr(feature="examples_system_alloc", global_allocator)]
 9 | #[cfg(feature="examples_system_alloc")]
10 | static ALLOCATOR: std::alloc::System = std::alloc::System;
11 | 
12 | pub fn run() -> Result<(), Box<dyn Error>> {
13 |     let mut g = Graph::new();
14 | 
15 |     let (x_node, y_node, z_node) = {
16 |         let mut compiler = Compiler::new(&mut g);
17 |         let x_expr = <Variable<f32>>::new_expr(&vec![1], "x");
18 |         let y_expr = <Variable<f32>>::new_expr(&vec![1], "y");
19 |         let y_node = compiler.compile(y_expr.clone())?;
20 |         let x_node = compiler.compile(x_expr.clone())?;
21 | 
22 |         // let z = x * x * y + y + 2;
23 |         let z_node = compiler.compile(
24 |             x_expr.clone() * x_expr.clone() * y_expr.clone()
25 |             + y_expr.clone() + 2.0f32)?;
26 |         (x_node, y_node, z_node)
27 |     };
28 | 
29 |     let options = SessionOptions::new();
30 |     let mut session = Session::new(&options, &g)?;
31 | 
32 |     // Evaluate the graph.
33 |     let x = <Tensor<f32>>::new(&[1]).with_values(&[3.0_f32]).unwrap();
34 |     let y = <Tensor<f32>>::new(&[1]).with_values(&[4.0_f32]).unwrap();
35 |     let mut step = SessionRunArgs::new();
36 |     step.add_feed(&x_node, 0, &x);
37 |     step.add_feed(&y_node, 0, &y);
38 |     let output_token = step.request_fetch(&z_node, 0);
39 |     session.run(&mut step).unwrap();
40 | 
41 |     // Check our results.
42 |     let output_tensor = step.fetch::<f32>(output_token)?;
43 |     println!("working with variables: z = x * x * y + y + 2 => {:?}", output_tensor[0]);
44 |     session.close()?;
45 | 
46 |     Ok(())
47 | }


--------------------------------------------------------------------------------
/chapter2/rust_and_tf/src/graph_with_placeholder.rs:
--------------------------------------------------------------------------------
 1 | use std::error::Error;
 2 | use std::result::Result;
 3 | use tensorflow as tf;
 4 | use tf::expr::{Placeholder, Compiler};
 5 | use tf::{Graph, Tensor};
 6 | use tf::{Session, SessionOptions, SessionRunArgs};
 7 | 
 8 | #[cfg_attr(feature="examples_system_alloc", global_allocator)]
 9 | #[cfg(feature="examples_system_alloc")]
10 | static ALLOCATOR: std::alloc::System = std::alloc::System;
11 | 
12 | pub fn run() -> Result<(), Box<dyn Error>> {
13 |     let mut g = Graph::new();
14 | 
15 |     let (x_node, y_node, z_node) = {
16 |         let mut compiler = Compiler::new(&mut g);
17 |         let x_expr = <Placeholder<f32>>::new_expr(&vec![2], "x");
18 |         let y_expr = <Placeholder<f32>>::new_expr(&vec![2], "y");
19 |         let y_node = compiler.compile(y_expr.clone())?;
20 |         let x_node = compiler.compile(x_expr.clone())?;
21 | 
22 |         // let f = x * x * y + y + 2;
23 |         let z_node = compiler.compile(x_expr.clone() * x_expr.clone() * y_expr.clone() + y_expr.clone() + 2.0f32)?;
24 |         (x_node, y_node, z_node)
25 |     };
26 | 
27 |     let options = SessionOptions::new();
28 |     let mut session = Session::new(&options, &g)?;
29 | 
30 |     // Evaluate the graph.
31 |     let x = <Tensor<f32>>::new(&[2]).with_values(&[1.0_f32, 2.0]).unwrap();
32 |     let y = <Tensor<f32>>::new(&[2]).with_values(&[3.0_f32, 4.0]).unwrap();
33 |     let mut step = SessionRunArgs::new();
34 |     step.add_feed(&x_node, 0, &x);
35 |     step.add_feed(&y_node, 0, &y);
36 |     let output_token = step.request_fetch(&z_node, 0);
37 |     session.run(&mut step).unwrap();
38 | 
39 |     // Check our results.
40 |     let output_tensor = step.fetch::<f32>(output_token)?;
41 |     println!("{:?}", output_tensor[0]);
42 |     println!("{:?}", output_tensor[1]);
43 |     session.close()?;
44 | 
45 |     Ok(())
46 | }


--------------------------------------------------------------------------------
/chapter2/rust_and_tf/src/linear_regression.rs:
--------------------------------------------------------------------------------
  1 | #![allow(non_snake_case)]
  2 | 
  3 | use std::error::Error;
  4 | use std::result::Result;
  5 | use std::vec::Vec;
  6 | 
  7 | use rand;
  8 | use rand::thread_rng;
  9 | use rand::seq::SliceRandom;
 10 | use transpose;
 11 | 
 12 | use tensorflow as tf;
 13 | use tf::{Graph, Tensor, DataType};
 14 | use tf::{Session, SessionOptions, SessionRunArgs};
 15 | 
 16 | use ml_utils;
 17 | use ml_utils::datasets::get_boston_records_from_file;
 18 | use ml_utils::sup_metrics::r_squared_score;
 19 | 
 20 | #[cfg_attr(feature="examples_system_alloc", global_allocator)]
 21 | #[cfg(feature="examples_system_alloc")]
 22 | static ALLOCATOR: std::alloc::System = std::alloc::System;
 23 | 
 24 | pub fn run() -> Result<(), Box<dyn Error>> {
 25 |     // Get all the data
 26 |     let filename = "data/housing.csv";
 27 |     let mut data = get_boston_records_from_file(&filename);
 28 | 
 29 |     // shuffle the data.
 30 |     data.shuffle(&mut thread_rng());
 31 | 
 32 |     // separate out to train and test datasets.
 33 |     let test_size: f64 = 0.2;
 34 |     let test_size: f64 = data.len() as f64 * test_size;
 35 |     let test_size = test_size.round() as usize;
 36 |     let (test_data, train_data) = data.split_at(test_size);
 37 |     let train_size = train_data.len();
 38 |     let test_size = test_data.len();
 39 | 
 40 |     // differentiate the features and the targets.
 41 |     let boston_x_train: Vec<f64> = train_data.iter().flat_map(|r| r.into_feature_vector()).collect();
 42 |     let boston_y_train: Vec<f64> = train_data.iter().map(|r| r.into_targets()).collect();
 43 | 
 44 |     let boston_x_test: Vec<f64> = test_data.iter().flat_map(|r| r.into_feature_vector()).collect();
 45 |     let boston_y_test: Vec<f64> = test_data.iter().map(|r| r.into_targets()).collect();
 46 | 
 47 |     // println!("{:?}", boston_y_train.len());
 48 |     // println!("{:?}", boston_x_train.len());
 49 | 
 50 |     // Define graph.
 51 |     let mut graph = Graph::new();
 52 |     let dim = (boston_y_train.len() as u64, 13);
 53 |     let test_dim = (boston_y_test.len() as u64, dim.1);
 54 |     let X_train = <Tensor<f64>>::new(&[dim.0, dim.1]).with_values(&boston_x_train)?;
 55 |     let y_train = <Tensor<f64>>::new(&[dim.0, 1]).with_values(&boston_y_train)?;
 56 |     let X_test = <Tensor<f64>>::new(&[test_dim.0, test_dim.1]).with_values(&boston_x_test)?;
 57 |     // let y_test = <Tensor<f64>>::new(&[test_dim.0, 1]).with_values(&boston_y_test)?;
 58 | 
 59 |     let mut output_array = vec![0.0; (dim.0 * dim.1) as usize];
 60 |     transpose::transpose(&boston_x_train, &mut output_array, dim.1 as usize, dim.0 as usize);
 61 |     let XT =  <Tensor<f64>>::new(&[dim.1, dim.0]).with_values(&output_array[..])?;
 62 |     let XT_const = {
 63 |         let mut op = graph.new_operation("Const", "XT")?;
 64 |         op.set_attr_tensor("value", XT)?;
 65 |         op.set_attr_type("dtype", DataType::Double)?; // check the enums https://github.com/tensorflow/rust/blob/ddff61850be1c8044ac86350caeed5a55824ebe4/src/lib.rs#L297
 66 |         op.finish()?
 67 |     };
 68 |     let X_const = {
 69 |         let mut op = graph.new_operation("Const", "X_train")?;
 70 |         op.set_attr_tensor("value", X_train)?;
 71 |         op.set_attr_type("dtype", DataType::Double)?; // check the enums https://github.com/tensorflow/rust/blob/ddff61850be1c8044ac86350caeed5a55824ebe4/src/lib.rs#L297
 72 |         op.finish()?
 73 |     };
 74 |     // operation types https://github.com/malmaud/TensorFlow.jl/blob/063511525902bdf84a461035758ef9a73ba4a635/src/ops/op_names.txt
 75 |     let y_const = {
 76 |         let mut op = graph.new_operation("Const", "y_train")?;
 77 |         op.set_attr_tensor("value", y_train)?;
 78 |         op.set_attr_type("dtype", DataType::Double)?;
 79 |         op.finish()?
 80 |     };
 81 |     let mul = {
 82 |         let mut op = graph.new_operation("MatMul", "mul")?;
 83 |         op.add_input(XT_const.clone());
 84 |         op.add_input(X_const.clone());
 85 |         op.finish()?
 86 |     };
 87 |     let inverse = {
 88 |         let mut op = graph.new_operation("MatrixInverse", "mul_inv")?;
 89 |         op.add_input(mul);
 90 |         op.finish()?
 91 |     };
 92 |     let mul2 = {
 93 |         let mut op = graph.new_operation("MatMul", "mul2")?;
 94 |         op.add_input(inverse);
 95 |         op.add_input(XT_const.clone());
 96 |         op.finish()?
 97 |     };
 98 |     let theta = {
 99 |         let mut op = graph.new_operation("MatMul", "theta")?;
100 |         op.add_input(mul2);
101 |         op.add_input(y_const);
102 |         op.finish()?
103 |     };
104 | 
105 |     // running predictions
106 |     // y = X_test .* theta
107 |     let X_test_const = {
108 |         let mut op = graph.new_operation("Const", "X_test")?;
109 |         op.set_attr_tensor("value", X_test)?;
110 |         op.set_attr_type("dtype", DataType::Double)?;
111 |         op.finish()?
112 |     };
113 |     let predictions = {
114 |         let mut op = graph.new_operation("MatMul", "preds")?;
115 |         op.add_input(X_test_const);
116 |         op.add_input(theta);
117 |         op.finish()?
118 |     };
119 | 
120 |     // Run graph.
121 |     let session = Session::new(&SessionOptions::new(), &graph)?;
122 |     let mut args = SessionRunArgs::new();
123 |     let preds_token = args.request_fetch(&predictions, 0);
124 |     session.run(&mut args)?;
125 |     let preds_token_res: Tensor<f64> = args.fetch::<f64>(preds_token)?;
126 |     // println!("Now the preds", );
127 |     // println!("{:?}", &preds_token_res[..]);
128 |     println!("r-squared error score: {:?}", r_squared_score(&preds_token_res.to_vec(), &boston_y_test));
129 | 
130 |     Ok(())
131 | }
132 | 


--------------------------------------------------------------------------------
/chapter2/rust_and_tf/src/linear_regression_from_model.rs:
--------------------------------------------------------------------------------
  1 | #![allow(non_snake_case)]
  2 | 
  3 | use std::error::Error;
  4 | use std::result::Result;
  5 | use std::vec::Vec;
  6 | 
  7 | use rand;
  8 | use rand::thread_rng;
  9 | use rand::seq::SliceRandom;
 10 | use transpose;
 11 | 
 12 | use tensorflow as tf;
 13 | use tf::{Graph, Tensor, DataType};
 14 | use tf::{Session, SessionOptions, SessionRunArgs};
 15 | 
 16 | use ml_utils;
 17 | use ml_utils::datasets::get_boston_records_from_file;
 18 | use ml_utils::sup_metrics::r_squared_score;
 19 | 
 20 | use random;
 21 | use random::Source;
 22 | use std::path::Path;
 23 | use std::process::exit;
 24 | use tensorflow::Code;
 25 | use tensorflow::Status;
 26 | 
 27 | #[cfg_attr(feature="examples_system_alloc", global_allocator)]
 28 | #[cfg(feature="examples_system_alloc")]
 29 | static ALLOCATOR: std::alloc::System = std::alloc::System;
 30 | 
 31 | pub fn run() -> Result<(), Box<dyn Error>> {
 32 |     // Get all the data
 33 |     let filename = "data/housing.csv";
 34 |     let mut data = get_boston_records_from_file(&filename);
 35 | 
 36 |     // shuffle the data.
 37 |     data.shuffle(&mut thread_rng());
 38 | 
 39 |     // separate out to train and test datasets.
 40 |     let test_size: f64 = 0.2;
 41 |     let test_size: f64 = data.len() as f64 * test_size;
 42 |     let test_size = test_size.round() as usize;
 43 |     let (test_data, train_data) = data.split_at(test_size);
 44 |     let train_size = train_data.len();
 45 |     let test_size = test_data.len();
 46 | 
 47 |     // differentiate the features and the targets.
 48 |     let boston_x_train: Vec<f64> = train_data.iter().flat_map(|r| r.into_feature_vector()).collect();
 49 |     let boston_y_train: Vec<f64> = train_data.iter().map(|r| r.into_targets()).collect();
 50 | 
 51 |     let boston_x_test: Vec<f64> = test_data.iter().flat_map(|r| r.into_feature_vector()).collect();
 52 |     let boston_y_test: Vec<f64> = test_data.iter().map(|r| r.into_targets()).collect();
 53 | 
 54 |     // println!("{:?}", boston_y_train.len());
 55 |     println!("{:?}", boston_x_train.len());
 56 | 
 57 |     // Define graph.
 58 |     let mut graph = Graph::new();
 59 |     let dim = (boston_y_train.len() as u64, 13);
 60 |     let test_dim = (boston_y_test.len() as u64, dim.1);
 61 |     let X_train = <Tensor<f64>>::new(&[dim.0, dim.1]).with_values(&boston_x_train)?;
 62 |     let y_train = <Tensor<f64>>::new(&[dim.0, 1]).with_values(&boston_y_train)?;
 63 |     let X_test = <Tensor<f64>>::new(&[test_dim.0, test_dim.1]).with_values(&boston_x_test)?;
 64 |     // let y_test = <Tensor<f64>>::new(&[test_dim.0, 1]).with_values(&boston_y_test)?;
 65 | 
 66 |     let export_dir = "boston_regression/"; // y = w * x + b
 67 |     if !Path::new(export_dir).exists() {
 68 |         return Err(Box::new(Status::new_set(Code::NotFound,
 69 |                                             &format!("Run the code in the tensorflow notebook in \
 70 |                                                       {} and try again.",
 71 |                                                      export_dir))
 72 |             .unwrap()));
 73 |     }
 74 | 
 75 |     // Load the saved model exported by regression_savedmodel.py.
 76 |     let mut graph = Graph::new();
 77 |     let session = Session::from_saved_model(&SessionOptions::new(),
 78 |                                             &["train", "serve"],
 79 |                                             &mut graph,
 80 |                                             export_dir)?;
 81 |     let op_x = graph.operation_by_name_required("x")?;
 82 |     let op_x_test = graph.operation_by_name_required("x_test")?;
 83 |     let op_y = graph.operation_by_name_required("y")?;
 84 |     let op_train = graph.operation_by_name_required("train")?;
 85 |     let op_w = graph.operation_by_name_required("w")?;
 86 |     let op_y_preds = graph.operation_by_name_required("y_preds")?;
 87 | 
 88 |     Session::new(&SessionOptions::new(), &graph)?;
 89 |     let mut args = SessionRunArgs::new();
 90 |     args.add_feed(&op_x, 0, &X_train);
 91 |     args.add_feed(&op_x_test, 0, &X_test);
 92 |     args.add_feed(&op_y, 0, &y_train);
 93 |     args.add_target(&op_train);
 94 |     let preds_token = args.request_fetch(&op_y_preds, 0);
 95 |     for _ in 0..10 {
 96 |         session.run(&mut args)?;
 97 |     };
 98 |     let preds_token_res: Tensor<f64> = args.fetch::<f64>(preds_token)?;
 99 |     println!("Now the preds", );
100 |     println!("{:?}", &preds_token_res[..]);
101 |     println!("{:?}", &boston_y_test);
102 |     println!("{:?}", r_squared_score(&preds_token_res[..], &boston_y_test));
103 | 
104 |     Ok(())
105 | }
106 | 


--------------------------------------------------------------------------------
/chapter2/rust_and_tf/src/main.rs:
--------------------------------------------------------------------------------
 1 | /// This package is about the different ways you can use tensorflow in rust.
 2 | /// Current possible arguments.
 3 | /// # Arguments
 4 | /// * `` - WIll run the placeholders example.
 5 | /// * `seq` - WIll run the sequence of nodes example
 6 | /// * `cars` - Will run the example with graph variables
 7 | ///
 8 | /// # Example
 9 | /// ```
10 | /// $ cargo run seq
11 | /// ➜  rust_and_tf git:(master) ✗ cargo run seq
12 | ///    Finished dev [unoptimized + debuginfo] target(s) in 0.15s
13 | ///     Running `target/debug/rust_and_tf seq`
14 | /// 2019-04-07 12:55:41.781908: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
15 | /// 2019-04-07 12:55:41.814069: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 1996260000 Hz
16 | /// 2019-04-07 12:55:41.814902: I tensorflow/compiler/xla/service/service.cc:150] XLA service 0x56514fdfded0 executing computations on platform Host. Devices:
17 | /// 2019-04-07 12:55:41.814966: I tensorflow/compiler/xla/service/service.cc:158]   StreamExecutor device (0): <undefined>, <undefined>
18 | /// constant evaluation: w = 3; x = w + 2; y = x + 5; z = x * 3
19 | /// y => 10.0
20 | /// z => 15.0
21 | /// ```
22 | 
23 | // reference: https://github.com/danieldk/dpar
24 | // https://www.tensorflow.org/tutorials/estimators/cnn
25 | 
26 | extern crate serde;
27 | // This lets us write `#[derive(Deserialize)]`.
28 | #[macro_use]
29 | extern crate serde_derive;
30 | 
31 | use std::process::exit;
32 | use std::env::args;
33 | 
34 | mod graph_with_placeholder;
35 | mod seq_nodes;
36 | mod graph_variables;
37 | mod linear_regression;
38 | mod conv_nets;
39 | mod linear_regression_from_model;
40 | mod conv_nets_maxpooling;
41 | 
42 | 
43 | #[cfg_attr(feature="examples_system_alloc", global_allocator)]
44 | #[cfg(feature="examples_system_alloc")]
45 | static ALLOCATOR: std::alloc::System = std::alloc::System;
46 | 
47 | fn main() {
48 |     let args: Vec<String> = args().collect();
49 |     let model = if args.len() < 2 {
50 |         None
51 |     } else {
52 |         Some(args[1].as_str())
53 |     };
54 |     let res = match model {
55 |         None => graph_with_placeholder::run(),
56 |         Some("seq") => seq_nodes::run(),
57 |         Some("vars") => graph_variables::run(),
58 |         Some("lr") => linear_regression::run(),
59 |         Some("lr_py") => linear_regression_from_model::run(),
60 |         Some("conv") => conv_nets::run(),
61 |         Some("conv_mp") => conv_nets_maxpooling::run(),
62 |         Some(_) => graph_with_placeholder::run(),
63 |     };
64 |     // Putting the main code in another function serves two purposes:
65 |     // 1. We can use the `?` operator.
66 |     // 2. We can call exit safely, which does not run any destructors.
67 |     exit(match res {
68 |         Ok(_) => 0,
69 |         Err(e) => {
70 |             println!("{}", e);
71 |             1
72 |         }
73 |     })
74 | }


--------------------------------------------------------------------------------
/chapter2/rust_and_tf/src/seq_nodes.rs:
--------------------------------------------------------------------------------
 1 | use std::error::Error;
 2 | use std::result::Result;
 3 | 
 4 | use tensorflow as tf;
 5 | use tf::expr::{Compiler, Constant};
 6 | use tf::{Graph, Tensor};
 7 | use tf::{Session, SessionOptions, SessionRunArgs};
 8 | 
 9 | #[cfg_attr(feature="examples_system_alloc", global_allocator)]
10 | #[cfg(feature="examples_system_alloc")]
11 | static ALLOCATOR: std::alloc::System = std::alloc::System;
12 | 
13 | pub fn run() -> Result<(), Box<dyn Error>> {
14 |     let mut g = Graph::new();
15 | 
16 |     let (y_node, z_node) = {
17 |         let mut compiler = Compiler::new(&mut g);
18 |         let w = <Tensor<f32>>::new(&[1]).with_values(&[3.0_f32]).unwrap();
19 |         let w_expr = <Constant<f32>>::new_expr(w);
20 |         let x_expr = w_expr.clone() + 2.0f32;
21 |         let y_expr = x_expr.clone() + 5.0f32;
22 |         let z_expr = x_expr.clone() * 3.0f32;
23 | 
24 |         let y_node = compiler.compile(y_expr.clone())?;
25 |         let z_node = compiler.compile(z_expr.clone())?;
26 |         (y_node, z_node)
27 |     };
28 | 
29 |     let options = SessionOptions::new();
30 |     let mut session = Session::new(&options, &g)?;
31 | 
32 |     // Evaluate the graph.
33 |     let mut step = SessionRunArgs::new();
34 |     let output_token_y = step.request_fetch(&y_node, 0);
35 |     let output_token_z = step.request_fetch(&z_node, 0);
36 |     session.run(&mut step).unwrap();
37 | 
38 |     // Check our results.
39 |     let output_tensor_y = step.fetch::<f32>(output_token_y)?;
40 |     let output_tensor_z = step.fetch::<f32>(output_token_z)?;
41 |     println!("constant evaluation: w = 3; x = w + 2; y = x + 5; z = x * 3");
42 |     println!("y => {:?}", output_tensor_y[0]);
43 |     println!("z => {:?}", output_tensor_z[0]);
44 |     session.close()?;
45 | 
46 |     Ok(())
47 | }


--------------------------------------------------------------------------------
/chapter2/rust_and_tf/tensorflow create model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 15,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import matplotlib.pyplot as plt\n",
 10 |     "%matplotlib inline\n",
 11 |     "import tensorflow as tf\n",
 12 |     "import numpy as np\n",
 13 |     "\n",
 14 |     "from numpy import genfromtxt\n",
 15 |     "from sklearn.datasets import load_boston\n",
 16 |     "from tensorflow.python.saved_model.builder import SavedModelBuilder\n",
 17 |     "from tensorflow.python.saved_model.signature_def_utils import build_signature_def\n",
 18 |     "from tensorflow.python.saved_model.signature_constants import REGRESS_METHOD_NAME\n",
 19 |     "from tensorflow.python.saved_model.tag_constants import TRAINING, SERVING\n",
 20 |     "from tensorflow.python.saved_model.utils import build_tensor_info"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 19,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "learning_rate = 0.01\n",
 30 |     "cost_history = np.empty(shape=[1],dtype=float)\n",
 31 |     "\n",
 32 |     "X = tf.placeholder(tf.float64,[None,n_dim], name=\"x\")\n",
 33 |     "X_test = tf.placeholder(tf.float64,[None,n_dim], name=\"x_test\")\n",
 34 |     "Y = tf.placeholder(tf.float64,[None,1], name=\"y\")\n",
 35 |     "W = tf.Variable(tf.ones([n_dim,1],dtype=tf.float64), name=\"w\")\n",
 36 |     "\n",
 37 |     "init = tf.variables_initializer(tf.global_variables(), name=\"init\")"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 20,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "y_ = tf.matmul(X, W, name=\"y_hat\")\n",
 47 |     "y_preds = tf.matmul(X_test, W, name=\"y_preds\")\n",
 48 |     "cost = tf.reduce_mean(tf.square(y_ - Y))\n",
 49 |     "training_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost, name=\"train\")"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "http://aqibsaeed.github.io/2016-07-07-TensorflowLR/"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 28,
 62 |    "metadata": {},
 63 |    "outputs": [
 64 |     {
 65 |      "name": "stdout",
 66 |      "output_type": "stream",
 67 |      "text": [
 68 |       "INFO:tensorflow:Assets added to graph.\n",
 69 |       "INFO:tensorflow:No assets to write.\n",
 70 |       "INFO:tensorflow:SavedModel written to: boston_regression/saved_model.pb\n"
 71 |      ]
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "directory = 'boston_regression'\n",
 76 |     "builder = SavedModelBuilder(directory)\n",
 77 |     "\n",
 78 |     "with tf.Session(graph=tf.get_default_graph()) as sess:\n",
 79 |     "    sess.run(init)\n",
 80 |     "\n",
 81 |     "    signature_inputs = {\n",
 82 |     "        \"x\": build_tensor_info(X),\n",
 83 |     "        \"x_test\": build_tensor_info(X_test),\n",
 84 |     "        \"y\": build_tensor_info(Y)\n",
 85 |     "    }\n",
 86 |     "    signature_outputs = {\n",
 87 |     "        \"out\": build_tensor_info(y_preds)\n",
 88 |     "    }\n",
 89 |     "    signature_def = build_signature_def(\n",
 90 |     "        signature_inputs, signature_outputs,\n",
 91 |     "        REGRESS_METHOD_NAME)\n",
 92 |     "    builder.add_meta_graph_and_variables(\n",
 93 |     "        sess, [TRAINING, SERVING],\n",
 94 |     "        signature_def_map={\n",
 95 |     "            REGRESS_METHOD_NAME: signature_def\n",
 96 |     "        },\n",
 97 |     "        assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS))\n",
 98 |     "    builder.save(as_text=False)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": []
107 |   }
108 |  ],
109 |  "metadata": {
110 |   "kernelspec": {
111 |    "display_name": "Python 3",
112 |    "language": "python",
113 |    "name": "python3"
114 |   },
115 |   "language_info": {
116 |    "codemirror_mode": {
117 |     "name": "ipython",
118 |     "version": 3
119 |    },
120 |    "file_extension": ".py",
121 |    "mimetype": "text/x-python",
122 |    "name": "python",
123 |    "nbconvert_exporter": "python",
124 |    "pygments_lexer": "ipython3",
125 |    "version": "3.5.2"
126 |   }
127 |  },
128 |  "nbformat": 4,
129 |  "nbformat_minor": 2
130 | }
131 | 


--------------------------------------------------------------------------------
/chapter2/rustlearn_classification_tasks/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rustlearn_classification_tasks"
 3 | version = "0.1.0"
 4 | authors = ["joydeep bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | rustlearn = "0.5.0"
 9 | csv = "1.0.5"
10 | serde = "1.0.89"
11 | serde_derive = "1.0.89"
12 | rand = "0.6"
13 | ml-utils = { path = "../ml-utils" }


--------------------------------------------------------------------------------
/chapter2/rustlearn_classification_tasks/src/binary_class_scores.rs:
--------------------------------------------------------------------------------
 1 | use std::error::Error;
 2 | 
 3 | use rustlearn::prelude::*;
 4 | use rustlearn::metrics::{accuracy_score, roc_auc_score};
 5 | 
 6 | use ml_utils;
 7 | use ml_utils::sup_metrics::{accuracy, logloss_score};
 8 | 
 9 | pub fn run() -> Result<(), Box<dyn Error>> {
10 |     let preds = vec![1., 0.0001, 0.908047338626, 0.0199900075962, 0.904058545833, 0.321508119045, 0.657086320195];
11 |     let actuals = vec![1., 0., 0., 1., 1., 0., 0.];
12 |     println!("logloss score: {:?}", logloss_score(&actuals, &preds, 1e-15));
13 |     println!("roc auc scores: {:?}", roc_auc_score(&Array::from(actuals), &Array::from(preds))?);
14 | 
15 |     Ok(())
16 | }


--------------------------------------------------------------------------------
/chapter2/rustlearn_classification_tasks/src/logistic_reg.rs:
--------------------------------------------------------------------------------
 1 | use std::io;
 2 | use std::vec::Vec;
 3 | use std::error::Error;
 4 | 
 5 | use csv;
 6 | use rand;
 7 | use rand::thread_rng;
 8 | use rand::seq::SliceRandom;
 9 | 
10 | use rustlearn::prelude::*;
11 | use rustlearn::linear_models::sgdclassifier::Hyperparameters as logistic_regression;
12 | use rustlearn::metrics::{accuracy_score, roc_auc_score};
13 | 
14 | use ml_utils;
15 | use ml_utils::sup_metrics::{accuracy, logloss_score};
16 | use ml_utils::datasets::Flower;
17 | 
18 | pub fn run() -> Result<(), Box<dyn Error>> {
19 |     // Get all the data
20 |     let mut rdr = csv::Reader::from_reader(io::stdin());
21 |     let mut data = Vec::new();
22 |     for result in rdr.deserialize() {
23 |         let r: Flower = result?;
24 |         data.push(r); // data contains all the records
25 |     }
26 | 
27 |     // shuffle the data.
28 |     data.shuffle(&mut thread_rng());
29 | 
30 |     // separate out to train and test datasets.
31 |     let test_size: f32 = 0.2;
32 |     let test_size: f32 = data.len() as f32 * test_size;
33 |     let test_size = test_size.round() as usize;
34 |     let (test_data, train_data) = data.split_at(test_size);
35 |     let train_size = train_data.len();
36 |     let test_size = test_data.len();
37 | 
38 |     // differentiate the features and the labels.
39 |     let flower_x_train: Vec<f32> = train_data.iter().flat_map(|r| r.into_feature_vector()).collect();
40 |     let flower_y_train: Vec<f32> = train_data.iter().map(|r| r.into_labels()).collect();
41 | 
42 |     let flower_x_test: Vec<f32> = test_data.iter().flat_map(|r| r.into_feature_vector()).collect();
43 |     let flower_y_test: Vec<f32> = test_data.iter().map(|r| r.into_labels()).collect();
44 | 
45 |     // Since rustlearn works with arrays we need to convert the vectors to a dense matrix or a sparse matrix
46 |     let mut flower_x_train = Array::from(flower_x_train); // as opposed to rusty machine, all floats here are f32 reference : https://github.com/maciejkula/rustlearn/blob/7daf692fe504966aa84d920321b884afe19caa79/src/array/dense.rs#L129
47 |     flower_x_train.reshape(train_size, 4);
48 | 
49 |     let flower_y_train = Array::from(flower_y_train);
50 | 
51 |     let mut flower_x_test = Array::from(flower_x_test);
52 |     flower_x_test.reshape(test_size, 4);
53 | 
54 |     let flower_y_test = Array::from(flower_y_test);
55 | 
56 |     // working with Stochastic Gradient descent.
57 |     // uses adaptive per parameter learning rate Adagrad
58 |     let mut model = logistic_regression::new(4)
59 |         .learning_rate(1.0)
60 |         .l2_penalty(0.5)
61 |         .l1_penalty(0.0)
62 |         .one_vs_rest();
63 |     let num_epochs = 100;
64 | 
65 |     for _ in 0..num_epochs {
66 |         model.fit(&flower_x_train, &flower_y_train).unwrap();
67 |     }
68 | 
69 |     let prediction = model.predict(&flower_x_test).unwrap();
70 |     let acc1 = accuracy_score(&flower_y_test, &prediction);
71 |     // let acc2 = accuracy(flower_y_test.data()[..], prediction.data()[..]);
72 |     println!("Logistic Regression: accuracy: {:?}", acc1);
73 |     // println!("Logistic Regression: accuracy: {:?}", acc2);
74 | 
75 |     Ok(())
76 | }


--------------------------------------------------------------------------------
/chapter2/rustlearn_classification_tasks/src/main.rs:
--------------------------------------------------------------------------------
 1 | use std::vec::Vec;
 2 | use std::process::exit;
 3 | use std::env::args;
 4 | 
 5 | mod trees;
 6 | mod logistic_reg;
 7 | mod svm;
 8 | mod binary_class_scores;
 9 | 
10 | fn main() {
11 |     let args: Vec<String> = args().collect();
12 |     let model = if args.len() < 2 {
13 |         None
14 |     } else {
15 |         Some(args[1].as_str())
16 |     };
17 |     let res = match model {
18 |         None => {println!("nothing", ); Ok(())},
19 |         Some("lr") => logistic_reg::run(),
20 |         Some("svm") => svm::run(),
21 |         Some("bs") => binary_class_scores::run(),
22 |         Some(_) => trees::run(),
23 |     };
24 |     // Putting the main code in another function serves two purposes:
25 |     // 1. We can use the `?` operator.
26 |     // 2. We can call exit safely, which does not run any destructors.
27 |     exit(match res {
28 |         Ok(_) => 0,
29 |         Err(e) => {
30 |             println!("{}", e);
31 |             1
32 |         }
33 |     })
34 | }


--------------------------------------------------------------------------------
/chapter2/rustlearn_classification_tasks/src/svm.rs:
--------------------------------------------------------------------------------
 1 | use std::io;
 2 | use std::vec::Vec;
 3 | use std::error::Error;
 4 | 
 5 | use csv;
 6 | use rand;
 7 | use rand::thread_rng;
 8 | use rand::seq::SliceRandom;
 9 | 
10 | use rustlearn::prelude::*;
11 | use rustlearn::svm::libsvm::svc::{Hyperparameters as libsvm_svc, KernelType};
12 | use rustlearn::metrics::{accuracy_score, roc_auc_score};
13 | 
14 | use ml_utils;
15 | use ml_utils::sup_metrics::{accuracy, logloss_score};
16 | use ml_utils::datasets::Flower;
17 | 
18 | pub fn run() -> Result<(), Box<dyn Error>> {
19 |     // Get all the data
20 |     let mut rdr = csv::Reader::from_reader(io::stdin());
21 |     let mut data = Vec::new();
22 |     for result in rdr.deserialize() {
23 |         let r: Flower = result?;
24 |         data.push(r); // data contains all the records
25 |     }
26 | 
27 |     // shuffle the data.
28 |     data.shuffle(&mut thread_rng());
29 | 
30 |     // separate out to train and test datasets.
31 |     let test_size: f32 = 0.2;
32 |     let test_size: f32 = data.len() as f32 * test_size;
33 |     let test_size = test_size.round() as usize;
34 |     let (test_data, train_data) = data.split_at(test_size);
35 |     let train_size = train_data.len();
36 |     let test_size = test_data.len();
37 | 
38 |     // differentiate the features and the labels.
39 |     let flower_x_train: Vec<f32> = train_data.iter().flat_map(|r| r.into_feature_vector()).collect();
40 |     let flower_y_train: Vec<f32> = train_data.iter().map(|r| r.into_labels()).collect();
41 | 
42 |     let flower_x_test: Vec<f32> = test_data.iter().flat_map(|r| r.into_feature_vector()).collect();
43 |     let flower_y_test: Vec<f32> = test_data.iter().map(|r| r.into_labels()).collect();
44 | 
45 |     // Since rustlearn works with arrays we need to convert the vectors to a dense matrix or a sparse matrix
46 |     let mut flower_x_train = Array::from(flower_x_train); // as opposed to rusty machine, all floats here are f32 reference : https://github.com/maciejkula/rustlearn/blob/7daf692fe504966aa84d920321b884afe19caa79/src/array/dense.rs#L129
47 |     flower_x_train.reshape(train_size, 4);
48 | 
49 |     let flower_y_train = Array::from(flower_y_train);
50 | 
51 |     let mut flower_x_test = Array::from(flower_x_test);
52 |     flower_x_test.reshape(test_size, 4);
53 | 
54 |     let flower_y_test = Array::from(flower_y_test);
55 | 
56 |     // Working with svms
57 |     let svm_linear_model = libsvm_svc::new(4, KernelType::Linear, 3)
58 |         .C(0.3)
59 |         .build();
60 |     let svm_poly_model = libsvm_svc::new(4, KernelType::Polynomial, 3)
61 |         .C(0.3)
62 |         .build();
63 |     let svm_rbf_model = libsvm_svc::new(4, KernelType::RBF, 3)
64 |         .C(0.3)
65 |         .build();
66 |     let svm_sigmoid_model = libsvm_svc::new(4, KernelType::Sigmoid, 3)
67 |         .C(0.3)
68 |         .build();
69 |     let svm_kernel_types = ["linear", "polynomial", "rbf", "sigmoid"];
70 |     let mut svm_model_types = [svm_linear_model, svm_poly_model, svm_rbf_model, svm_sigmoid_model];
71 |     for (kernel_type, svm_model) in svm_kernel_types.iter().zip(svm_model_types.iter_mut()) {
72 |         svm_model.fit(&flower_x_train, &flower_y_train).unwrap();
73 | 
74 |         let prediction = svm_model.predict(&flower_x_test).unwrap();
75 |         let acc = accuracy_score(&flower_y_test, &prediction);
76 |         println!("Lib svm {kernel}: accuracy: {accuracy}", accuracy=acc, kernel=kernel_type);
77 |     };
78 | 
79 |     let preds = vec![1., 0.0001, 0.908047338626, 0.0199900075962, 0.904058545833, 0.321508119045, 0.657086320195];
80 |     let actuals = vec![1., 0., 0., 1., 1., 0., 0.];
81 |     println!("logloss score: {:?}", logloss_score(&actuals, &preds, 1e-15));
82 |     println!("roc auc scores: {:?}", roc_auc_score(&Array::from(actuals), &Array::from(preds))?);
83 | 
84 |     Ok(())
85 | }


--------------------------------------------------------------------------------
/chapter2/rustlearn_classification_tasks/src/trees.rs:
--------------------------------------------------------------------------------
 1 | use std::io;
 2 | use std::vec::Vec;
 3 | use std::error::Error;
 4 | 
 5 | use csv;
 6 | use rand;
 7 | use rand::thread_rng;
 8 | use rand::seq::SliceRandom;
 9 | 
10 | use rustlearn::prelude::*;
11 | use rustlearn::ensemble::random_forest::Hyperparameters as randomforest;
12 | use rustlearn::trees::decision_tree;
13 | use rustlearn::metrics::{accuracy_score, roc_auc_score};
14 | 
15 | use ml_utils;
16 | use ml_utils::sup_metrics::{accuracy, logloss_score};
17 | use ml_utils::datasets::Flower;
18 | 
19 | pub fn run() -> Result<(), Box<Error>> {
20 |     // Get all the data
21 |     let mut rdr = csv::Reader::from_reader(io::stdin());
22 |     let mut data = Vec::new();
23 |     for result in rdr.deserialize() {
24 |         let r: Flower = result?;
25 |         data.push(r); // data contains all the records
26 |     }
27 | 
28 |     // shuffle the data.
29 |     data.shuffle(&mut thread_rng());
30 | 
31 |     // separate out to train and test datasets.
32 |     let test_size: f32 = 0.2;
33 |     let test_size: f32 = data.len() as f32 * test_size;
34 |     let test_size = test_size.round() as usize;
35 |     let (test_data, train_data) = data.split_at(test_size);
36 |     let train_size = train_data.len();
37 |     let test_size = test_data.len();
38 | 
39 |     // differentiate the features and the labels.
40 |     let flower_x_train: Vec<f32> = train_data.iter().flat_map(|r| r.into_feature_vector()).collect();
41 |     let flower_y_train: Vec<f32> = train_data.iter().map(|r| r.into_labels()).collect();
42 | 
43 |     let flower_x_test: Vec<f32> = test_data.iter().flat_map(|r| r.into_feature_vector()).collect();
44 |     let flower_y_test: Vec<f32> = test_data.iter().map(|r| r.into_labels()).collect();
45 | 
46 |     // Since rustlearn works with arrays we need to convert the vectors to a dense matrix or a sparse matrix
47 |     let mut flower_x_train = Array::from(flower_x_train); // as opposed to rusty machine, all floats here are f32 reference : https://github.com/maciejkula/rustlearn/blob/7daf692fe504966aa84d920321b884afe19caa79/src/array/dense.rs#L129
48 |     flower_x_train.reshape(train_size, 4);
49 | 
50 |     let flower_y_train = Array::from(flower_y_train);
51 | 
52 |     let mut flower_x_test = Array::from(flower_x_test);
53 |     flower_x_test.reshape(test_size, 4);
54 | 
55 |     let flower_y_test = Array::from(flower_y_test);
56 | 
57 |     // create a decision tree model
58 |     let mut decision_tree_model = decision_tree::Hyperparameters::new(flower_x_train.cols())
59 |         .one_vs_rest();
60 |     decision_tree_model.fit(&flower_x_train, &flower_y_train).unwrap();
61 | 
62 |     let prediction = decision_tree_model.predict(&flower_x_test).unwrap();
63 |     let acc = accuracy_score(&flower_y_test, &prediction);
64 |     println!("DecisionTree model accuracy: {:?}", acc);
65 | 
66 |     
67 | 
68 |     // create a random forest model
69 |     let mut tree_params = decision_tree::Hyperparameters::new(flower_x_train.cols());
70 |     tree_params.min_samples_split(10)
71 |         .max_features(4);
72 | 
73 |     let mut random_forest_model = randomforest::new(tree_params, 10).one_vs_rest();
74 | 
75 |     random_forest_model.fit(&flower_x_train, &flower_y_train).unwrap();
76 | 
77 |     // Optionally serialize and deserialize the model
78 | 
79 |     // let encoded = bincode::rustc_serialize::encode(&model,
80 |     //                                               bincode::SizeLimit::Infinite).unwrap();
81 |     // let decoded: OneVsRestWrapper<RandomForest> = bincode::rustc_serialize::decode(&encoded).unwrap();
82 | 
83 |     let prediction = random_forest_model.predict(&flower_x_test).unwrap();
84 |     let acc = accuracy_score(&flower_y_test, &prediction);
85 |     println!("Random Forest: accuracy: {:?}", acc);
86 | 
87 |     Ok(())
88 | }


--------------------------------------------------------------------------------
/chapter2/rusty_machine_classification/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rusty_machine_classification"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | rusty-machine = { path = "../rusty-machine" }
 9 | ml-utils = { path = "../ml-utils" }
10 | rand = "0.6.5"
11 | csv = "1.0.7"


--------------------------------------------------------------------------------
/chapter2/rusty_machine_classification/src/main.rs:
--------------------------------------------------------------------------------
  1 | use std::io;
  2 | use std::vec::Vec;
  3 | use std::error::Error;
  4 | 
  5 | use rusty_machine as rm;
  6 | use rm::linalg::Matrix;
  7 | use rm::linalg::Vector;
  8 | use rm::learning::knn::KNNClassifier;
  9 | use rusty_machine::learning::knn::{KDTree, BallTree, BruteForce};
 10 | use rm::learning::SupModel;
 11 | use csv;
 12 | use rand;
 13 | use rand::thread_rng;
 14 | use rand::seq::SliceRandom;
 15 | 
 16 | use ml_utils;
 17 | use ml_utils::datasets::Flower;
 18 | use ml_utils::sup_metrics::accuracy;
 19 | 
 20 | fn main() -> Result<(), Box<Error>> {
 21 |     // Get all the data
 22 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 23 |     let mut data = Vec::new();
 24 |     for result in rdr.deserialize() {
 25 |         let r: Flower = result?;
 26 |         data.push(r); // data contains all the records
 27 |     }
 28 | 
 29 |     // shuffle the data.
 30 |     data.shuffle(&mut thread_rng());
 31 | 
 32 |     // separate out to train and test datasets.
 33 |     let test_size: f64 = 0.2;
 34 |     let test_size: f64 = data.len() as f64 * test_size;
 35 |     let test_size = test_size.round() as usize;
 36 |     let (test_data, train_data) = data.split_at(test_size);
 37 |     let train_size = train_data.len();
 38 |     let test_size = test_data.len();
 39 | 
 40 |     // differentiate the features and the labels.
 41 |     let flower_x_train: Vec<f64> = train_data.iter().flat_map(|r| {
 42 |         let features = r.into_feature_vector();
 43 |         let features: Vec<f64> = features.iter().map(|&x| x as f64).collect();
 44 |         features
 45 |     }).collect();
 46 |     let flower_y_train: Vec<usize> = train_data.iter().map(
 47 |         |r| r.into_int_labels() as usize).collect();
 48 | 
 49 |     let flower_x_test: Vec<f64> = test_data.iter().flat_map(|r| {
 50 |         let features = r.into_feature_vector();
 51 |         let features: Vec<f64> = features.iter().map(|&x| x as f64).collect();
 52 |         features
 53 |     }).collect();
 54 |     let flower_y_test: Vec<u32> = test_data.iter().map(|r| r.into_int_labels() as u32).collect();
 55 | 
 56 |     // COnvert the data into matrices for rusty machine
 57 |     let flower_x_train = Matrix::new(train_size, 4, flower_x_train);
 58 |     let flower_y_train = Vector::new(flower_y_train);
 59 |     let flower_x_test = Matrix::new(test_size, 4, flower_x_test);
 60 | 
 61 |     // train the classifier to search 2 nearest. this is the same as kdtree
 62 |     let mut knn = KNNClassifier::new(2);
 63 |     println!("{:?}", knn);
 64 | 
 65 |     // train the classifier
 66 |     knn.train(&flower_x_train, &flower_y_train).unwrap();
 67 | 
 68 |     // predict new points
 69 |     let preds = knn.predict(&flower_x_test).unwrap();
 70 |     let preds: Vec<u32> = preds.data().iter().map(|&x| x as u32).collect();
 71 |     println!("default is binary tree");
 72 |     println!("accuracy {:?}", accuracy(preds.as_slice(), &flower_y_test));
 73 | 
 74 |     // Ball tree is good when the number of dimensions are huge.
 75 |     let mut knn = KNNClassifier::new_specified(2, BallTree::new(30));
 76 |     println!("{:?}", knn);
 77 | 
 78 |     // train the classifier
 79 |     knn.train(&flower_x_train, &flower_y_train).unwrap();
 80 | 
 81 |     // predict new points
 82 |     let preds = knn.predict(&flower_x_test).unwrap();
 83 |     let preds: Vec<u32> = preds.data().iter().map(|&x| x as u32).collect();
 84 |     println!("accuracy for ball tree {:?}", accuracy(preds.as_slice(), &flower_y_test));
 85 | 
 86 |     // The k-d tree is a binary tree in which every leaf node is a k-dimensional point
 87 |     let mut knn = KNNClassifier::new_specified(2, KDTree::default());
 88 |     println!("{:?}", knn);
 89 | 
 90 |     // train the classifier
 91 |     knn.train(&flower_x_train, &flower_y_train).unwrap();
 92 | 
 93 |     // predict new points
 94 |     let preds = knn.predict(&flower_x_test).unwrap();
 95 |     let preds: Vec<u32> = preds.data().iter().map(|&x| x as u32).collect();
 96 |     println!("accuracy for kdtree tree {:?}", accuracy(preds.as_slice(), &flower_y_test));
 97 | 
 98 |     // The k-d tree is a binary tree in which every leaf node is a k-dimensional point
 99 |     let mut knn = KNNClassifier::new_specified(2, KDTree::default());
100 |     println!("{:?}", knn);
101 | 
102 |     // train the classifier
103 |     knn.train(&flower_x_train, &flower_y_train).unwrap();
104 | 
105 |     // predict new points
106 |     let preds = knn.predict(&flower_x_test).unwrap();
107 |     let preds: Vec<u32> = preds.data().iter().map(|&x| x as u32).collect();
108 |     println!("accuracy for ball tree {:?}", accuracy(preds.as_slice(), &flower_y_test));
109 | 
110 |     // Brute force means all the nearest neighbors are looked into
111 |     let mut knn = KNNClassifier::new_specified(2, BruteForce::default());
112 |     println!("{:?}", knn);
113 | 
114 |     // train the classifier
115 |     knn.train(&flower_x_train, &flower_y_train).unwrap();
116 | 
117 |     // predict new points
118 |     let preds = knn.predict(&flower_x_test).unwrap();
119 |     let preds: Vec<u32> = preds.data().iter().map(|&x| x as u32).collect();
120 |     println!("accuracy for brute force {:?}", accuracy(preds.as_slice(), &flower_y_test));
121 | 
122 | 
123 |     Ok(())
124 | }
125 | 


--------------------------------------------------------------------------------
/chapter2/rusty_machine_supervised_algos/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rusty_machine_supervised_algos"
 3 | version = "0.1.0"
 4 | authors = ["joydeep bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | csv = "*"
 9 | rusty-machine="*"
10 | serde = "1"
11 | serde_derive = "1"
12 | rand = "0.6.5"


--------------------------------------------------------------------------------
/chapter2/rustymachine_regression/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rustlymachine_regression"
 3 | version = "0.1.0"
 4 | authors = ["joydeep bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | rusty-machine = "0.5.4"
 9 | serde = "1"
10 | serde_derive = "1"
11 | rand = "0.6.5"
12 | ml-utils = { path = "../ml-utils" }


--------------------------------------------------------------------------------
/chapter2/rustymachine_regression/src/gaussian_process_reg.rs:
--------------------------------------------------------------------------------
 1 | /// Data gathered from https://www.kaggle.com/vikrishnan/boston-house-prices
 2 | /// Boston dataset: https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html
 3 | /// This module shows how to run regression models
 4 | use std::io::prelude::*;
 5 | use std::io::BufReader;
 6 | use std::path::Path;
 7 | use std::fs::File;
 8 | use std::vec::Vec;
 9 | use std::error::Error;
10 | use std::process::exit;
11 | use std::env::args;
12 | 
13 | use rusty_machine;
14 | use rusty_machine::linalg::Matrix;
15 | // use rusty_machine::linalg::BaseMatrix;
16 | use rusty_machine::linalg::Vector;
17 | use rusty_machine::learning::gp::GaussianProcess;
18 | use rusty_machine::learning::gp::ConstMean;
19 | use rusty_machine::learning::toolkit::kernel;
20 | use rusty_machine::analysis::score::neg_mean_squared_error;
21 | use rusty_machine::learning::SupModel;
22 | 
23 | // use ndarray::{Array, arr1};
24 | use rand;
25 | use rand::thread_rng;
26 | use rand::seq::SliceRandom;
27 | 
28 | use ml_utils::datasets::get_boston_records_from_file;
29 | use ml_utils::sup_metrics::r_squared_score;
30 | 
31 | pub fn run() -> Result<(), Box<dyn Error>> {
32 |     // Get all the data
33 |     let filename = "data/housing.csv";
34 |     let mut data = get_boston_records_from_file(&filename); // file must be in the folder data
35 | 
36 |     // shuffle the data.
37 |     data.shuffle(&mut thread_rng());
38 | 
39 |     // separate out to train and test datasets.
40 |     let test_size: f64 = 0.2;
41 |     let test_size: f64 = data.len() as f64 * test_size;
42 |     let test_size = test_size.round() as usize;
43 |     let (test_data, train_data) = data.split_at(test_size);
44 |     let train_size = train_data.len();
45 |     let test_size = test_data.len();
46 | 
47 |     // differentiate the features and the targets.
48 |     let boston_x_train: Vec<f64> = train_data.iter().flat_map(|r| r.into_feature_vector()).collect();
49 |     let boston_y_train: Vec<f64> = train_data.iter().map(|r| r.into_targets()).collect();
50 | 
51 |     let boston_x_test: Vec<f64> = test_data.iter().flat_map(|r| r.into_feature_vector()).collect();
52 |     let boston_y_test: Vec<f64> = test_data.iter().map(|r| r.into_targets()).collect();
53 | 
54 |     // using ndarray
55 |     // let boston_x_train = Array::from_shape_vec((train_size, 13), boston_x_train).unwrap();
56 |     // let boston_y_train = Array::from_vec(boston_y_train);
57 |     // let boston_x_test = Array::from_shape_vec((test_size, 13), boston_x_test).unwrap();
58 |     // let boston_y_test = Array::from_vec(boston_y_test);
59 | 
60 |     // COnvert the data into matrices for rusty machine
61 |     let boston_x_train = Matrix::new(train_size, 13, boston_x_train);
62 |     let boston_y_train = Vector::new(boston_y_train);
63 |     let boston_x_test = Matrix::new(test_size, 13, boston_x_test);
64 |     // let boston_y_test = Vector::new(boston_y_test);
65 |     let boston_y_test = Matrix::new(test_size, 1, boston_y_test);
66 | 
67 |     // Create a gaussian process regression
68 |     // A squared exponential kernel with lengthscale 2 and amplitude 1
69 |     let ker = kernel::SquaredExp::new(2., 1.);
70 | 
71 |     // zero function as mean function
72 |     let zero_mean = ConstMean::default();
73 | 
74 |     // defining the model with noise 10
75 |     let mut gaus_model = GaussianProcess::new(ker, zero_mean, 10f64);
76 | 
77 |     gaus_model.train(&boston_x_train, &boston_y_train)?;
78 | 
79 |     let predictions = gaus_model.predict(&boston_x_test).unwrap();
80 |     let predictions = Matrix::new(test_size, 1, predictions);
81 |     let acc = neg_mean_squared_error(&predictions, &boston_y_test);
82 |     println!("gaussian process regression error: {:?}", acc);
83 |     println!("gaussian process regression R2 score: {:?}", r_squared_score(
84 |         &boston_y_test.data(), &predictions.data()));
85 | 
86 |     Ok(())
87 | }
88 | 


--------------------------------------------------------------------------------
/chapter2/rustymachine_regression/src/glms.rs:
--------------------------------------------------------------------------------
 1 | /// Data gathered from https://www.kaggle.com/vikrishnan/boston-house-prices
 2 | /// Boston dataset: https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html
 3 | /// This module shows how to run regression models
 4 | use std::io::prelude::*;
 5 | use std::io::BufReader;
 6 | use std::path::Path;
 7 | use std::fs::File;
 8 | use std::vec::Vec;
 9 | use std::error::Error;
10 | use std::process::exit;
11 | use std::env::args;
12 | 
13 | use rusty_machine;
14 | use rusty_machine::linalg::Matrix;
15 | use rusty_machine::linalg::Vector;
16 | use rusty_machine::learning::glm::{GenLinearModel, Normal};
17 | use rusty_machine::analysis::score::neg_mean_squared_error;
18 | use rusty_machine::learning::SupModel;
19 | 
20 | // use ndarray::{Array, arr1};
21 | use rand;
22 | use rand::thread_rng;
23 | use rand::seq::SliceRandom;
24 | 
25 | use ml_utils::datasets::get_boston_records_from_file;
26 | use ml_utils::sup_metrics::r_squared_score;
27 | 
28 | pub fn run() -> Result<(), Box<dyn Error>> {
29 |     // Get all the data
30 |     let filename = "data/housing.csv";
31 |     let mut data = get_boston_records_from_file(&filename); // file must be in the folder data
32 | 
33 |     // shuffle the data.
34 |     data.shuffle(&mut thread_rng());
35 | 
36 |     // separate out to train and test datasets.
37 |     let test_size: f64 = 0.2;
38 |     let test_size: f64 = data.len() as f64 * test_size;
39 |     let test_size = test_size.round() as usize;
40 |     let (test_data, train_data) = data.split_at(test_size);
41 |     let train_size = train_data.len();
42 |     let test_size = test_data.len();
43 | 
44 |     // differentiate the features and the targets.
45 |     let boston_x_train: Vec<f64> = train_data.iter().flat_map(|r| r.into_feature_vector()).collect();
46 |     let boston_y_train: Vec<f64> = train_data.iter().map(|r| r.into_targets()).collect();
47 | 
48 |     let boston_x_test: Vec<f64> = test_data.iter().flat_map(|r| r.into_feature_vector()).collect();
49 |     let boston_y_test: Vec<f64> = test_data.iter().map(|r| r.into_targets()).collect();
50 | 
51 |     // using ndarray
52 |     // let boston_x_train = Array::from_shape_vec((train_size, 13), boston_x_train).unwrap();
53 |     // let boston_y_train = Array::from_vec(boston_y_train);
54 |     // let boston_x_test = Array::from_shape_vec((test_size, 13), boston_x_test).unwrap();
55 |     // let boston_y_test = Array::from_vec(boston_y_test);
56 | 
57 |     // COnvert the data into matrices for rusty machine
58 |     let boston_x_train = Matrix::new(train_size, 13, boston_x_train);
59 |     let boston_y_train = Vector::new(boston_y_train);
60 |     let boston_x_test = Matrix::new(test_size, 13, boston_x_test);
61 |     // let boston_y_test = Vector::new(boston_y_test);
62 |     let boston_y_test = Matrix::new(test_size, 1, boston_y_test);
63 | 
64 |     // Create a normal generalised linear model
65 |     let mut normal_model = GenLinearModel::new(Normal);
66 |     normal_model.train(&boston_x_train, &boston_y_train)?;
67 | 
68 |     let predictions = normal_model.predict(&boston_x_test).unwrap();
69 |     let predictions = Matrix::new(test_size, 1, predictions);
70 |     let acc = neg_mean_squared_error(&predictions, &boston_y_test);
71 |     println!("glm poisson accuracy: {:?}", acc);
72 |     println!("glm poisson R2 score: {:?}", r_squared_score(
73 |         &boston_y_test.data(), &predictions.data()));
74 | 
75 |     Ok(())
76 | }
77 | 


--------------------------------------------------------------------------------
/chapter2/rustymachine_regression/src/lin_reg.rs:
--------------------------------------------------------------------------------
 1 | /// Data gathered from https://www.kaggle.com/vikrishnan/boston-house-prices
 2 | /// Boston dataset: https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html
 3 | /// This module shows how to run regression models
 4 | use std::io::prelude::*;
 5 | use std::io::BufReader;
 6 | use std::path::Path;
 7 | use std::fs::File;
 8 | use std::vec::Vec;
 9 | use std::error::Error;
10 | use std::process::exit;
11 | use std::env::args;
12 | 
13 | use rusty_machine;
14 | use rusty_machine::linalg::Matrix;
15 | // use rusty_machine::linalg::BaseMatrix;
16 | use rusty_machine::linalg::Vector;
17 | use rusty_machine::learning::lin_reg::LinRegressor;
18 | use rusty_machine::learning::gp::GaussianProcess;
19 | use rusty_machine::learning::gp::ConstMean;
20 | use rusty_machine::learning::toolkit::kernel;
21 | use rusty_machine::learning::glm::{GenLinearModel, Normal};
22 | use rusty_machine::analysis::score::neg_mean_squared_error;
23 | use rusty_machine::learning::SupModel;
24 | 
25 | // use ndarray::{Array, arr1};
26 | use rand;
27 | use rand::thread_rng;
28 | use rand::seq::SliceRandom;
29 | 
30 | use ml_utils::datasets::get_boston_records_from_file;
31 | use ml_utils::sup_metrics::r_squared_score;
32 | 
33 | pub fn run() -> Result<(), Box<dyn Error>> {
34 |     // Get all the data
35 |     let filename = "data/housing.csv";
36 |     let mut data = get_boston_records_from_file(&filename); // file must be in the folder data
37 | 
38 |     // shuffle the data.
39 |     data.shuffle(&mut thread_rng());
40 | 
41 |     // separate out to train and test datasets.
42 |     let test_size: f64 = 0.2;
43 |     let test_size: f64 = data.len() as f64 * test_size;
44 |     let test_size = test_size.round() as usize;
45 |     let (test_data, train_data) = data.split_at(test_size);
46 |     let train_size = train_data.len();
47 |     let test_size = test_data.len();
48 | 
49 |     // differentiate the features and the targets.
50 |     let boston_x_train: Vec<f64> = train_data.iter().flat_map(|r| r.into_feature_vector()).collect();
51 |     let boston_y_train: Vec<f64> = train_data.iter().map(|r| r.into_targets()).collect();
52 | 
53 |     let boston_x_test: Vec<f64> = test_data.iter().flat_map(|r| r.into_feature_vector()).collect();
54 |     let boston_y_test: Vec<f64> = test_data.iter().map(|r| r.into_targets()).collect();
55 | 
56 |     // using ndarray
57 |     // let boston_x_train = Array::from_shape_vec((train_size, 13), boston_x_train).unwrap();
58 |     // let boston_y_train = Array::from_vec(boston_y_train);
59 |     // let boston_x_test = Array::from_shape_vec((test_size, 13), boston_x_test).unwrap();
60 |     // let boston_y_test = Array::from_vec(boston_y_test);
61 | 
62 |     // COnvert the data into matrices for rusty machine
63 |     let boston_x_train = Matrix::new(train_size, 13, boston_x_train);
64 |     let boston_y_train = Vector::new(boston_y_train);
65 |     let boston_x_test = Matrix::new(test_size, 13, boston_x_test);
66 |     // let boston_y_test = Vector::new(boston_y_test);
67 |     let boston_y_test = Matrix::new(test_size, 1, boston_y_test);
68 | 
69 |     // Create a linear regression model
70 |     let mut lin_model = LinRegressor::default();
71 |     println!("{:?}", lin_model);
72 | 
73 |     // Train the model
74 |     lin_model.train(&boston_x_train, &boston_y_train);
75 | 
76 |     // Now we will predict
77 |     let predictions = lin_model.predict(&boston_x_test).unwrap();
78 |     let predictions = Matrix::new(test_size, 1, predictions);
79 |     let acc = neg_mean_squared_error(&predictions, &boston_y_test);
80 |     println!("linear regression error: {:?}", acc);
81 |     println!("linear regression R2 score: {:?}", r_squared_score(
82 |         &boston_y_test.data(), &predictions.data()));
83 | 
84 |     Ok(())
85 | }
86 | 


--------------------------------------------------------------------------------
/chapter2/rustymachine_regression/src/main.rs:
--------------------------------------------------------------------------------
 1 | /// Data gathered from https://www.kaggle.com/vikrishnan/boston-house-prices
 2 | /// Boston dataset: https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html
 3 | /// This module shows how to run regression models
 4 | extern crate serde;
 5 | // This lets us write `#[derive(Deserialize)]`.
 6 | #[macro_use]
 7 | extern crate serde_derive;
 8 | 
 9 | use std::vec::Vec;
10 | use std::process::exit;
11 | use std::env::args;
12 | 
13 | mod lin_reg;
14 | mod gaussian_process_reg;
15 | mod glms;
16 | 
17 | fn main() {
18 |     let args: Vec<String> = args().collect();
19 |     let model = if args.len() < 2 {
20 |         None
21 |     } else {
22 |         Some(args[1].as_str())
23 |     };
24 |     let res = match model {
25 |         None => {println!("nothing", ); Ok(())},
26 |         Some("lr") => lin_reg::run(),
27 |         Some("gp") => gaussian_process_reg::run(),
28 |         Some("glms") => glms::run(),
29 |         Some(_) => lin_reg::run(),
30 |     };
31 |     // Putting the main code in another function serves two purposes:
32 |     // 1. We can use the `?` operator.
33 |     // 2. We can call exit safely, which does not run any destructors.
34 |     exit(match res {
35 |         Ok(_) => 0,
36 |         Err(e) => {
37 |             println!("{}", e);
38 |             1
39 |         }
40 |     })
41 | }


--------------------------------------------------------------------------------
/chapter3/reinforcement-learning-frozenlake/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "reinforcement-learning-frozenlake"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | rsrl = { git = "https://github.com/Ploppz/rsrl" }
 9 | slog = "2.4.1"
10 | ndarray = "0.12.1"
11 | ndarray-linalg = "0.10"


--------------------------------------------------------------------------------
/chapter3/reinforcement-learning-frozenlake/src/main.rs:
--------------------------------------------------------------------------------
 1 | extern crate rsrl;
 2 | #[macro_use]
 3 | extern crate slog;
 4 | 
 5 | use rsrl::{
 6 |     control::gtd::GreedyGQ,
 7 |     core::{make_shared, run, Evaluation, Parameter, SerialExperiment},
 8 |     domains::{Domain, MountainCar},
 9 |     fa::{basis::{Composable, fixed::Fourier}, LFA},
10 |     geometry::Space,
11 |     logging,
12 |     policies::fixed::{EpsilonGreedy, Greedy, Random},
13 | };
14 | 
15 | fn main() {
16 |     let logger = logging::root(logging::stdout());
17 | 
18 |     let domain = MountainCar::default();
19 |     let mut agent = {
20 |         let n_actions = domain.action_space().card().into();
21 | 
22 |         // Build the linear value functions using a fourier basis projection.
23 |         let bases = Fourier::from_space(3, domain.state_space());
24 |         let v_func = LFA::scalar(bases.clone());
25 |         let q_func = make_shared(LFA::vector(bases, n_actions));
26 | 
27 |         // Build a stochastic behaviour policy with exponential epsilon.
28 |         let policy = EpsilonGreedy::new(
29 |             Greedy::new(q_func.clone()),
30 |             Random::new(n_actions),
31 |             Parameter::exponential(0.2, 0.0001, 0.99),
32 |         );
33 | 
34 |         GreedyGQ::new(q_func, v_func, policy, 0.01, 0.001, 0.99)
35 |     };
36 | 
37 |     let domain_builder = Box::new(MountainCar::default);
38 | 
39 |     // Training phase:
40 |     let _training_result = {
41 |         // Start a serial learning experiment up to 1000 steps per episode.
42 |         let e = SerialExperiment::new(&mut agent, domain_builder.clone(), 1000);
43 | 
44 |         // Realise 1000 episodes of the experiment generator.
45 |         run(e, 2000, Some(logger.clone()))
46 |     };
47 | 
48 |     // Testing phase:
49 |     let testing_result = Evaluation::new(&mut agent, domain_builder).next().unwrap();
50 | 
51 |     info!(logger, "solution"; testing_result);
52 | }


--------------------------------------------------------------------------------
/chapter3/rsrl_custom/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rsrl_custom"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | rsrl = { git = "https://github.com/infinite-Joy/rsrl", branch = "mymodel" }
 9 | slog = "2.4.1"
10 | ndarray = "0.12.0"


--------------------------------------------------------------------------------
/chapter3/rusty_machine_unsupervised/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rusty_machine_unsupervised"
 3 | version = "0.1.0"
 4 | authors = ["joydeep bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | csv = "1.1.1"
 9 | #rusty-machine="0.5.4"
10 | rusty-machine= { path = "../rusty-machine" }
11 | serde = "1"
12 | serde_derive = "1"
13 | rand = "0.7.0"
14 | ml-utils = { path = "../../chapter2/ml-utils" }


--------------------------------------------------------------------------------
/chapter3/rusty_machine_unsupervised/data/iris.csv:
--------------------------------------------------------------------------------
  1 | sepal_length,sepal_width,petal_length,petal_width,species
  2 | 5.1,3.5,1.4,0.2,setosa
  3 | 4.9,3.0,1.4,0.2,setosa
  4 | 4.7,3.2,1.3,0.2,setosa
  5 | 4.6,3.1,1.5,0.2,setosa
  6 | 5.0,3.6,1.4,0.2,setosa
  7 | 5.4,3.9,1.7,0.4,setosa
  8 | 4.6,3.4,1.4,0.3,setosa
  9 | 5.0,3.4,1.5,0.2,setosa
 10 | 4.4,2.9,1.4,0.2,setosa
 11 | 4.9,3.1,1.5,0.1,setosa
 12 | 5.4,3.7,1.5,0.2,setosa
 13 | 4.8,3.4,1.6,0.2,setosa
 14 | 4.8,3.0,1.4,0.1,setosa
 15 | 4.3,3.0,1.1,0.1,setosa
 16 | 5.8,4.0,1.2,0.2,setosa
 17 | 5.7,4.4,1.5,0.4,setosa
 18 | 5.4,3.9,1.3,0.4,setosa
 19 | 5.1,3.5,1.4,0.3,setosa
 20 | 5.7,3.8,1.7,0.3,setosa
 21 | 5.1,3.8,1.5,0.3,setosa
 22 | 5.4,3.4,1.7,0.2,setosa
 23 | 5.1,3.7,1.5,0.4,setosa
 24 | 4.6,3.6,1.0,0.2,setosa
 25 | 5.1,3.3,1.7,0.5,setosa
 26 | 4.8,3.4,1.9,0.2,setosa
 27 | 5.0,3.0,1.6,0.2,setosa
 28 | 5.0,3.4,1.6,0.4,setosa
 29 | 5.2,3.5,1.5,0.2,setosa
 30 | 5.2,3.4,1.4,0.2,setosa
 31 | 4.7,3.2,1.6,0.2,setosa
 32 | 4.8,3.1,1.6,0.2,setosa
 33 | 5.4,3.4,1.5,0.4,setosa
 34 | 5.2,4.1,1.5,0.1,setosa
 35 | 5.5,4.2,1.4,0.2,setosa
 36 | 4.9,3.1,1.5,0.1,setosa
 37 | 5.0,3.2,1.2,0.2,setosa
 38 | 5.5,3.5,1.3,0.2,setosa
 39 | 4.9,3.1,1.5,0.1,setosa
 40 | 4.4,3.0,1.3,0.2,setosa
 41 | 5.1,3.4,1.5,0.2,setosa
 42 | 5.0,3.5,1.3,0.3,setosa
 43 | 4.5,2.3,1.3,0.3,setosa
 44 | 4.4,3.2,1.3,0.2,setosa
 45 | 5.0,3.5,1.6,0.6,setosa
 46 | 5.1,3.8,1.9,0.4,setosa
 47 | 4.8,3.0,1.4,0.3,setosa
 48 | 5.1,3.8,1.6,0.2,setosa
 49 | 4.6,3.2,1.4,0.2,setosa
 50 | 5.3,3.7,1.5,0.2,setosa
 51 | 5.0,3.3,1.4,0.2,setosa
 52 | 7.0,3.2,4.7,1.4,versicolor
 53 | 6.4,3.2,4.5,1.5,versicolor
 54 | 6.9,3.1,4.9,1.5,versicolor
 55 | 5.5,2.3,4.0,1.3,versicolor
 56 | 6.5,2.8,4.6,1.5,versicolor
 57 | 5.7,2.8,4.5,1.3,versicolor
 58 | 6.3,3.3,4.7,1.6,versicolor
 59 | 4.9,2.4,3.3,1.0,versicolor
 60 | 6.6,2.9,4.6,1.3,versicolor
 61 | 5.2,2.7,3.9,1.4,versicolor
 62 | 5.0,2.0,3.5,1.0,versicolor
 63 | 5.9,3.0,4.2,1.5,versicolor
 64 | 6.0,2.2,4.0,1.0,versicolor
 65 | 6.1,2.9,4.7,1.4,versicolor
 66 | 5.6,2.9,3.6,1.3,versicolor
 67 | 6.7,3.1,4.4,1.4,versicolor
 68 | 5.6,3.0,4.5,1.5,versicolor
 69 | 5.8,2.7,4.1,1.0,versicolor
 70 | 6.2,2.2,4.5,1.5,versicolor
 71 | 5.6,2.5,3.9,1.1,versicolor
 72 | 5.9,3.2,4.8,1.8,versicolor
 73 | 6.1,2.8,4.0,1.3,versicolor
 74 | 6.3,2.5,4.9,1.5,versicolor
 75 | 6.1,2.8,4.7,1.2,versicolor
 76 | 6.4,2.9,4.3,1.3,versicolor
 77 | 6.6,3.0,4.4,1.4,versicolor
 78 | 6.8,2.8,4.8,1.4,versicolor
 79 | 6.7,3.0,5.0,1.7,versicolor
 80 | 6.0,2.9,4.5,1.5,versicolor
 81 | 5.7,2.6,3.5,1.0,versicolor
 82 | 5.5,2.4,3.8,1.1,versicolor
 83 | 5.5,2.4,3.7,1.0,versicolor
 84 | 5.8,2.7,3.9,1.2,versicolor
 85 | 6.0,2.7,5.1,1.6,versicolor
 86 | 5.4,3.0,4.5,1.5,versicolor
 87 | 6.0,3.4,4.5,1.6,versicolor
 88 | 6.7,3.1,4.7,1.5,versicolor
 89 | 6.3,2.3,4.4,1.3,versicolor
 90 | 5.6,3.0,4.1,1.3,versicolor
 91 | 5.5,2.5,4.0,1.3,versicolor
 92 | 5.5,2.6,4.4,1.2,versicolor
 93 | 6.1,3.0,4.6,1.4,versicolor
 94 | 5.8,2.6,4.0,1.2,versicolor
 95 | 5.0,2.3,3.3,1.0,versicolor
 96 | 5.6,2.7,4.2,1.3,versicolor
 97 | 5.7,3.0,4.2,1.2,versicolor
 98 | 5.7,2.9,4.2,1.3,versicolor
 99 | 6.2,2.9,4.3,1.3,versicolor
100 | 5.1,2.5,3.0,1.1,versicolor
101 | 5.7,2.8,4.1,1.3,versicolor
102 | 6.3,3.3,6.0,2.5,virginica
103 | 5.8,2.7,5.1,1.9,virginica
104 | 7.1,3.0,5.9,2.1,virginica
105 | 6.3,2.9,5.6,1.8,virginica
106 | 6.5,3.0,5.8,2.2,virginica
107 | 7.6,3.0,6.6,2.1,virginica
108 | 4.9,2.5,4.5,1.7,virginica
109 | 7.3,2.9,6.3,1.8,virginica
110 | 6.7,2.5,5.8,1.8,virginica
111 | 7.2,3.6,6.1,2.5,virginica
112 | 6.5,3.2,5.1,2.0,virginica
113 | 6.4,2.7,5.3,1.9,virginica
114 | 6.8,3.0,5.5,2.1,virginica
115 | 5.7,2.5,5.0,2.0,virginica
116 | 5.8,2.8,5.1,2.4,virginica
117 | 6.4,3.2,5.3,2.3,virginica
118 | 6.5,3.0,5.5,1.8,virginica
119 | 7.7,3.8,6.7,2.2,virginica
120 | 7.7,2.6,6.9,2.3,virginica
121 | 6.0,2.2,5.0,1.5,virginica
122 | 6.9,3.2,5.7,2.3,virginica
123 | 5.6,2.8,4.9,2.0,virginica
124 | 7.7,2.8,6.7,2.0,virginica
125 | 6.3,2.7,4.9,1.8,virginica
126 | 6.7,3.3,5.7,2.1,virginica
127 | 7.2,3.2,6.0,1.8,virginica
128 | 6.2,2.8,4.8,1.8,virginica
129 | 6.1,3.0,4.9,1.8,virginica
130 | 6.4,2.8,5.6,2.1,virginica
131 | 7.2,3.0,5.8,1.6,virginica
132 | 7.4,2.8,6.1,1.9,virginica
133 | 7.9,3.8,6.4,2.0,virginica
134 | 6.4,2.8,5.6,2.2,virginica
135 | 6.3,2.8,5.1,1.5,virginica
136 | 6.1,2.6,5.6,1.4,virginica
137 | 7.7,3.0,6.1,2.3,virginica
138 | 6.3,3.4,5.6,2.4,virginica
139 | 6.4,3.1,5.5,1.8,virginica
140 | 6.0,3.0,4.8,1.8,virginica
141 | 6.9,3.1,5.4,2.1,virginica
142 | 6.7,3.1,5.6,2.4,virginica
143 | 6.9,3.1,5.1,2.3,virginica
144 | 5.8,2.7,5.1,1.9,virginica
145 | 6.8,3.2,5.9,2.3,virginica
146 | 6.7,3.3,5.7,2.5,virginica
147 | 6.7,3.0,5.2,2.3,virginica
148 | 6.3,2.5,5.0,1.9,virginica
149 | 6.5,3.0,5.2,2.0,virginica
150 | 6.2,3.4,5.4,2.3,virginica
151 | 5.9,3.0,5.1,1.8,virginica
152 | 


--------------------------------------------------------------------------------
/chapter4/SQL_db/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "SQL_db"
3 | version = "0.1.0"
4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
5 | edition = "2018"
6 | 
7 | [dependencies]
8 | postgres = "0.15.2"
9 | 


--------------------------------------------------------------------------------
/chapter4/SQL_db/src/main.rs:
--------------------------------------------------------------------------------
 1 | use postgres;
 2 | use postgres::{Connection, TlsMode, Error};
 3 | 
 4 | #[derive(Debug)]
 5 | struct Weather {
 6 |     id: i32,
 7 |     month: String,
 8 |     normal: f64,
 9 |     warmest: f64,
10 |     coldest: f64
11 | }
12 | 
13 | fn main() -> Result<(), Box<Error>> {
14 |     let conn = Connection::connect("postgresql://postgres:postgres@localhost:5432/postgres",
15 |                                     TlsMode::None)?;
16 | 
17 |      conn.execute("CREATE TABLE IF NOT EXISTS weather (
18 |                     id              SERIAL PRIMARY KEY,
19 |                     month           VARCHAR NOT NULL,
20 |                     normal          DOUBLE PRECISION NOT NULL,
21 |                     warmest         DOUBLE PRECISION NOT NULL,
22 |                     coldest         DOUBLE PRECISION NOT NULL
23 |                   )", &[])?;
24 |     let weathers = vec![
25 |         ("January", 21.3, 27.3, 15.1),
26 |         ("February", 23.6, 30.1, 17.0),
27 |         ("March", 26.1, 32.7, 19.5),
28 |         ("April", 28.0, 34.2, 21.8),
29 |         ("May", 27.4, 33.2, 21.4),
30 |         ("June", 24.6, 29.2, 20.1),
31 |         ("July", 23.9, 28.1, 19.7),
32 |         ("August", 23.5, 27.4, 19.5),
33 |         ("September", 23.9, 28.2, 19.6),
34 |         ("October", 23.7, 28.0, 19.3),
35 |         ("November", 22.2, 27.0, 17.5),
36 |         ("December", 21.1, 26.2, 16.0)
37 |     ];
38 | 
39 |     for weather in &weathers {
40 |         conn.execute("INSERT INTO weather (month, normal, warmest, coldest) VALUES ($1, $2, $3, $4)",
41 |                  &[&weather.0, &weather.1, &weather.2, &weather.3])?;
42 |     }
43 | 
44 |     for row in &conn.query("SELECT id, month, normal, warmest, coldest FROM weather", &[])? {
45 |         let weather = Weather {
46 |             id: row.get(0),
47 |             month: row.get(1),
48 |             normal: row.get(2),
49 |             warmest: row.get(3),
50 |             coldest: row.get(4)
51 |         };
52 |         println!("{:?}", weather);
53 |     }
54 | 
55 |     // get the average value
56 |     for row in &conn.query("SELECT AVG(warmest) FROM weather;", &[])? {
57 |         let x: f64 = row.get(0);
58 |         println!("{:?}", x);
59 |     }
60 | 
61 |     Ok(())
62 | }


--------------------------------------------------------------------------------
/chapter4/data_formats/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "data_formats"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | serde = "1.0.90"
 9 | serde_derive = "1.0.90"
10 | serde_json = "1.0"
11 | serde-xml-rs = "0.3.1"
12 | csv = "1.0.7"


--------------------------------------------------------------------------------
/chapter4/data_formats/data/sample_2.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8" standalone="yes"?>
 2 | <project name="project-name">
 3 |     <libraries>
 4 |         <library groupId="org.example" artifactId="&lt;name&gt;" version="0.1"/>
 5 |         <library groupId="com.example" artifactId="&quot;cool-lib&amp;" version="999"/>
 6 |     </libraries>
 7 |     <module name="module-1">
 8 |         <files>
 9 |             <file name="somefile.java" type="java">
10 |                 Some &lt;java&gt; class
11 |             </file>
12 |             <file name="another_file.java" type="java">
13 |                 Another &quot;java&quot; class
14 |             </file>
15 |             <file name="config.xml" type="xml">
16 |                 Weird &apos;XML&apos; config
17 |             </file>
18 |         </files>
19 |         <libraries>
20 |             <library groupId="junit" artifactId="junit" version="1.9.5"/>
21 |         </libraries>
22 |     </module>
23 |     <module name="module-2">
24 |         <files>
25 |             <file name="program.js" type="javascript">
26 |                 JavaScript &amp; program
27 |             </file>
28 |             <file name="style.css" type="css">
29 |                 Cascading style sheet: &#xA9; - &#1161;
30 |             </file>
31 |         </files>
32 |     </module>
33 | </project>
34 | 
35 | 


--------------------------------------------------------------------------------
/chapter4/data_formats/src/csvreading.rs:
--------------------------------------------------------------------------------
 1 | use std::result::Result;
 2 | use std::vec::Vec;
 3 | use std::error::Error;
 4 | use std::fs::File;
 5 | use std::env;
 6 | use std::ffi::OsString;
 7 | 
 8 | use csv;
 9 | use csv::ReaderBuilder;
10 | 
11 | #[derive(Debug)]
12 | #[derive(Deserialize)]
13 | struct IrisRecord {
14 |     sepal_length: f64,
15 |     sepal_width: f64,
16 |     petal_length: f64,
17 |     petal_width: f64,
18 |     species: String,
19 | }
20 | 
21 | /// Returns the first positional argument sent to this process. If there are no
22 | /// positional arguments, then this returns an error.
23 | fn get_first_arg() -> Result<OsString, Box<dyn Error>> {
24 |     match env::args_os().nth(2) {
25 |         None => Err(From::from("expected 2 arguments, but got none")),
26 |         Some(file_path) => Ok(file_path),
27 |     }
28 | }
29 | pub fn run() -> Result<(), Box<dyn Error>> {
30 |     let file_path = get_first_arg()?;
31 |     let file = File::open(file_path)?;
32 |     let mut rdr = ReaderBuilder::new()
33 |         .has_headers(true)
34 |         .from_reader(file);
35 |     let mut iris_matrix: Vec<IrisRecord> = vec![];
36 | 
37 |     for result in rdr.deserialize() {
38 |         let record: IrisRecord = result?;
39 |         iris_matrix.push(record);
40 |     }
41 |     println!("{:#?}", iris_matrix);
42 | 
43 |     Ok(())
44 | }


--------------------------------------------------------------------------------
/chapter4/data_formats/src/jsonreading.rs:
--------------------------------------------------------------------------------
 1 | use std::str::FromStr;
 2 | use serde::{de, Deserialize, Deserializer};
 3 | use std::result::Result;
 4 | use std::vec::Vec;
 5 | use std::error::Error;
 6 | use std::fs::File;
 7 | 
 8 | #[derive(Debug, Serialize, Deserialize)]
 9 | struct Prizes {
10 |     prizes: Vec<Prize>,
11 | }
12 | 
13 | #[derive(Debug, Serialize, Deserialize)]
14 | #[allow(non_snake_case)]
15 | struct Prize {
16 |     category: String,
17 |     #[serde(default)]
18 |     overallMotivation: Option<String>,
19 |     laureates: Vec<NobelLaureate>,
20 |     #[serde(deserialize_with = "de_u16_from_str")]
21 |     year: u16,
22 | }
23 | 
24 | #[derive(Debug, Serialize, Deserialize)]
25 | struct NobelLaureate {
26 |     share: String,
27 |     #[serde(default)]
28 |     motivation: Option<String>,
29 |     surname: String,
30 |     #[serde(deserialize_with = "de_u16_from_str")]
31 |     id: u16,
32 |     firstname: String,
33 | }
34 | 
35 | fn de_u16_from_str<'a, D>(deserializer: D) -> Result<u16, D::Error>
36 |     where D: Deserializer<'a>
37 | {
38 |     let s = String::deserialize(deserializer)?;
39 |     u16::from_str(&s).map_err(de::Error::custom)
40 | }
41 | 
42 | pub fn run() -> Result<(), Box<dyn Error>> {
43 |     let the_file = r#"{
44 |         "FirstName": "John",
45 |         "LastName": "Doe",
46 |         "Age": 43,
47 |         "Address": {
48 |             "Street": "Downing Street 10",
49 |             "City": "London",
50 |             "Country": "Great Britain"
51 |         },
52 |         "PhoneNumbers": [
53 |             "+44 1234567",
54 |             "+44 2345678"
55 |         ]
56 |     }"#;
57 |     // random json string
58 |     let person: serde_json::Value = serde_json::from_str(the_file).expect("JSON was not well-formatted");
59 |     let address = person.get("Address").unwrap();
60 |     println!("{:?}", address.get("City").unwrap());
61 | 
62 |     println!("from prizes json file", );
63 |     let file = File::open("data/prize.json")
64 |         .expect("file should open read only");
65 |     let prizes_data: Prizes = serde_json::from_reader(file)
66 |         .expect("file should be proper JSON");
67 |     let prizes_0 = &prizes_data.prizes[0];
68 |     println!("category: {:?}", prizes_0.category);
69 |     println!("laureates: {:?}", prizes_0.laureates);
70 |     println!("overall motivation: {:?}", prizes_0.overallMotivation);
71 |     println!("year: {:?}", prizes_0.year);
72 | 
73 |     Ok(())
74 | }
75 | 


--------------------------------------------------------------------------------
/chapter4/data_formats/src/main.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | extern crate serde_derive;
 3 | 
 4 | extern crate serde;
 5 | extern crate serde_json;
 6 | extern crate serde_xml_rs;
 7 | 
 8 | use std::vec::Vec;
 9 | use std::process::exit;
10 | use std::env::args;
11 | 
12 | mod jsonreading;
13 | mod xmlreading;
14 | mod csvreading;
15 | 
16 | fn main() {
17 |     let args: Vec<String> = args().collect();
18 |     let model = if args.len() < 2 {
19 |         None
20 |     } else {
21 |         Some(args[1].as_str())
22 |     };
23 |     let res = match model {
24 |         None => {println!("nothing", ); Ok(())},
25 |         Some("json") => jsonreading::run(),
26 |         Some("xml") => xmlreading::run(),
27 |         Some(_) => csvreading::run(),
28 |     };
29 |     // Putting the main code in another function serves two purposes:
30 |     // 1. We can use the `?` operator.
31 |     // 2. We can call exit safely, which does not run any destructors.
32 |     exit(match res {
33 |         Ok(_) => 0,
34 |         Err(e) => {
35 |             println!("{}", e);
36 |             1
37 |         }
38 |     })
39 | }


--------------------------------------------------------------------------------
/chapter4/data_formats/src/xmlreading.rs:
--------------------------------------------------------------------------------
 1 | use std::fs::File;
 2 | use std::result::Result;
 3 | use std::error::Error;
 4 | 
 5 | use serde_xml_rs;
 6 | use serde_xml_rs::from_reader;
 7 | 
 8 | #[derive(Deserialize, Debug)]
 9 | struct Project {
10 |     name: String,
11 |     libraries: Vec<Libraries>,
12 |     module: Vec<Module>,
13 | }
14 | 
15 | #[derive(Deserialize, Debug)]
16 | struct Module {
17 |     files: Vec<Files>,
18 |     #[serde(default)]
19 |     libraries: Vec<Libraries>,
20 | }
21 | 
22 | #[derive(Deserialize, Debug)]
23 | struct Files {
24 |     file: Vec<FileName>,
25 | }
26 | 
27 | #[derive(Deserialize, Debug)]
28 | struct FileName {
29 |     name: String,
30 |     #[serde(rename = "type")]
31 |     lang: String,
32 |     #[serde(rename = "$value")]
33 |     body: String,
34 | }
35 | 
36 | #[derive(Deserialize, Debug)]
37 | struct Libraries {
38 |     library: Vec<Library>,
39 | }
40 | 
41 | #[derive(Deserialize, Debug)]
42 | struct Library {
43 |     #[serde(rename = "groupId")]
44 |     group_id: String,
45 |     #[serde(rename = "artifactId")]
46 |     artifact_id: String,
47 |     version: String,
48 | }
49 | 
50 | pub fn run() -> Result<(), Box<dyn Error>> {
51 |     let file = File::open("data/sample_2.xml").unwrap();
52 |     let project: Project = from_reader(file).unwrap();
53 |     println!("{:#?}", project.libraries[0].library[0]);
54 |     println!("{:#?}", project);
55 |     Ok(())
56 | }
57 | 


--------------------------------------------------------------------------------
/chapter4/data_transformations_datafusion/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "data_transformations_datafusion"
3 | version = "0.1.0"
4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
5 | edition = "2018"
6 | 
7 | [dependencies]
8 | datafusion = "0.13.0"
9 | arrow = "0.13.0"


--------------------------------------------------------------------------------
/chapter4/data_transformations_datafusion/src/main.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Arc;
 2 | 
 3 | use arrow;
 4 | use datafusion;
 5 | use arrow::array::{BinaryArray, Float64Array, UInt16Array, ListArray};
 6 | use arrow::datatypes::{DataType, Field, Schema};
 7 | 
 8 | use datafusion::execution::context::ExecutionContext;
 9 | 
10 | fn main() {
11 |     // create local execution context
12 |     let mut ctx = ExecutionContext::new();
13 | 
14 |     // define schema for data source (csv file)
15 |     let schema = Arc::new(Schema::new(vec![
16 |         Field::new("PassengerId", DataType::Int32, false),
17 |         Field::new("Survived", DataType::Int32, false),
18 |         Field::new("Pclass", DataType::Int32, false),
19 |         Field::new("Name", DataType::Utf8, false),
20 |         Field::new("Sex", DataType::Utf8, false),
21 |         Field::new("Age", DataType::Int32, true),
22 |         Field::new("SibSp", DataType::Int32, false),
23 |         Field::new("Parch", DataType::Int32, false),
24 |         Field::new("Ticket", DataType::Utf8, false),
25 |         Field::new("Fare", DataType::Float64, false),
26 |         Field::new("Cabin", DataType::Utf8, true),
27 |         Field::new("Embarked", DataType::Utf8, false),
28 |     ]));
29 | 
30 |     // register csv file with the execution context
31 |     ctx.register_csv(
32 |         "titanic",
33 |         "titanic/train.csv",
34 |         &schema,
35 |         true,
36 |     );
37 | 
38 |     // simple projection and selection
39 |     let sql = "SELECT Name, Sex FROM titanic WHERE Fare > 8";
40 |     let sql1 = "SELECT MAX(Fare) FROM titanic WHERE Survived = 1";
41 | 
42 |     // execute the query
43 |     let relation = ctx.sql(&sql, 1024 * 1024).unwrap();
44 |     let relation1 = ctx.sql(&sql1, 1024 * 1024).unwrap();
45 | 
46 |     // display the relation
47 |     let mut results = relation.borrow_mut();
48 |     let mut results1 = relation1.borrow_mut();
49 | 
50 |     while let Some(batch) = results.next().unwrap() {
51 |         println!(
52 |             "RecordBatch has {} rows and {} columns",
53 |             batch.num_rows(),
54 |             batch.num_columns()
55 |         );
56 | 
57 |         let name = batch
58 |             .column(0)
59 |             .as_any()
60 |             .downcast_ref::<BinaryArray>()
61 |             .unwrap();
62 | 
63 |         let sex = batch
64 |             .column(1)
65 |             .as_any()
66 |             // .downcast_ref::<Float64Array>()
67 |             .downcast_ref::<BinaryArray>()
68 |             .unwrap();
69 | 
70 |         for i in 0..batch.num_rows() {
71 |             let name_value: String = String::from_utf8(name.value(i).to_vec()).unwrap();
72 |             let sex_value: String = String::from_utf8(sex.value(i).to_vec()).unwrap();
73 | 
74 |             println!("name: {}, sex: {}", name_value, sex_value,);
75 |         }
76 |     }
77 |     while let Some(batch) = results1.next().unwrap() {
78 |         println!(
79 |             "RecordBatch has {} rows and {} columns",
80 |             batch.num_rows(),
81 |             batch.num_columns()
82 |         );
83 | 
84 |         let name = batch
85 |             .column(0)
86 |             .as_any()
87 |             .downcast_ref::<Float64Array>()
88 |             .unwrap();
89 | 
90 |         for i in 0..batch.num_rows() {
91 |             let name_value: f64 = name.value(i);
92 | 
93 |             println!("name: {}", name_value,);
94 |         }
95 |     }
96 |     println!("Hello, world!");
97 | }


--------------------------------------------------------------------------------
/chapter4/databases/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "databases"
3 | version = "0.1.0"
4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
5 | edition = "2018"
6 | 
7 | [dependencies]
8 | postgres = "0.15.2"
9 | rusted_cypher = "1.1.0"


--------------------------------------------------------------------------------
/chapter4/databases/src/main.rs:
--------------------------------------------------------------------------------
 1 | use std::vec::Vec;
 2 | use std::process::exit;
 3 | use std::env::args;
 4 | 
 5 | mod neo4j_db;
 6 | mod postgres_db;
 7 | 
 8 | fn main() {
 9 |     let args: Vec<String> = args().collect();
10 |     let model = if args.len() < 2 {
11 |         None
12 |     } else {
13 |         Some(args[1].as_str())
14 |     };
15 |     let res = match model {
16 |         None => {println!("nothing", ); Ok(())},
17 |         Some("postgres") => postgres_db::run(),
18 |         Some("neo4j") => neo4j_db::run(),
19 |         Some(_) => {println!("nothing", ); Ok(())},
20 |     };
21 |     // Putting the main code in another function serves two purposes:
22 |     // 1. We can use the `?` operator.
23 |     // 2. We can call exit safely, which does not run any destructors.
24 |     exit(match res {
25 |         Ok(_) => 0,
26 |         Err(e) => {
27 |             println!("{}", e);
28 |             1
29 |         }
30 |     })
31 | }


--------------------------------------------------------------------------------
/chapter4/databases/src/neo4j_db.rs:
--------------------------------------------------------------------------------
  1 | use std::error::Error;
  2 | 
  3 | use rusted_cypher;
  4 | use rusted_cypher::{GraphClient, Statement};
  5 | use std::iter::repeat;
  6 | 
  7 | pub fn run() -> Result<(), Box<dyn Error>> {
  8 |     // let graph = GraphClient::connect(
  9 |     //     "http://neo4j:neo4j@localhost:7474/db/data");
 10 |     let graph = GraphClient::connect(
 11 |         "http://localhost:7474/db/data")?;
 12 | 
 13 |     let mut query = graph.query();
 14 | 
 15 |     // create index
 16 |     let statement1 = Statement::new(
 17 |         "CREATE CONSTRAINT ON (m:Movie) ASSERT m.id IS UNIQUE;");
 18 |     let statement2 = Statement::new(
 19 |         " CREATE CONSTRAINT ON (u:User) ASSERT u.id IS UNIQUE;"
 20 |     );
 21 |     let statement3 = Statement::new(
 22 |         " CREATE CONSTRAINT ON (g:Genre) ASSERT g.name IS UNIQUE;"
 23 |     );
 24 | 
 25 |     query.add_statement(statement1);
 26 |     query.add_statement(statement2);
 27 |     query.add_statement(statement3);
 28 | 
 29 |     query.send()?;
 30 | 
 31 |     // import movies.csv
 32 |     graph.exec(
 33 |         "USING PERIODIC COMMIT LOAD CSV WITH HEADERS \
 34 |         FROM \"http://10.0.1.43:8000/movies.csv\" AS line \
 35 |         WITH line, SPLIT(line.genres, \"|\") AS Genres \
 36 |         CREATE (m:Movie { id: TOINTEGER(line.`movieId`), title: line.`title` }) \
 37 |         WITH Genres \
 38 |         UNWIND RANGE(0, SIZE(Genres)-1) as i \
 39 |         MERGE (g:Genre {name: UPPER(Genres[i])}) \
 40 |         CREATE (m)-[r:GENRE {position:i+1}]->(g);"
 41 |     )?;
 42 | 
 43 |     // import ratings.csv
 44 |     graph.exec(
 45 |         " USING PERIODIC COMMIT LOAD CSV WITH HEADERS \
 46 |         FROM \"http://10.0.1.43:8000/ratings.csv\" AS line \
 47 |         WITH line \
 48 |         MATCH (m:Movie { id: TOINTEGER(line.`movieId`) }) \
 49 |         MATCH (u:User { id: TOINTEGER(line.`userId`) }) \
 50 |         CREATE (u)-[r:RATING {rating: TOFLOAT(line.`rating`)}]->(m);"
 51 |     )?;
 52 | 
 53 |     // import tags
 54 |     graph.exec(
 55 |         " USING PERIODIC COMMIT LOAD CSV WITH HEADERS \
 56 |         FROM \"http://10.0.1.43:8000/tags.csv\" AS line \
 57 |         WITH line \
 58 |         MATCH (m:Movie { id: TOINTEGER(line.`movieId`) }) \
 59 |         MERGE (u:User { id: TOINTEGER(line.`userId`) }) \
 60 |         CREATE (u)-[r:TAG {tag: line.`tag`}]->(m);"
 61 |     )?;
 62 | 
 63 |     let result = graph.exec(
 64 |         "MATCH (u:User {id: 119}) RETURN u.id")?;
 65 | 
 66 |     assert_eq!(result.data.len(), 1);
 67 | 
 68 |     for row in result.rows() {
 69 |         let id: u16 = row.get("u.id")?;
 70 |         println!("user id: {}", id);
 71 |     }
 72 | 
 73 |     // understand the shortest paths between all
 74 | 
 75 |     let all_users = graph.exec(
 76 |         "MATCH (u:User) RETURN u.id")?;
 77 |     let all_users: Vec<u32> = all_users.rows().map(|x| x.get("u.id").unwrap()).collect();
 78 | 
 79 |     let mut length_of_paths = vec![];
 80 |     for (u1, u2) in all_users.iter()
 81 |             .enumerate()
 82 |             .flat_map(|(i, val)| repeat(val).zip(all_users.iter().skip(i + 1))) {
 83 |         let statement = format!(
 84 |             "MATCH (n:User) where n.id IN [{user1}, {user2}]
 85 |             WITH collect(n) as nodes
 86 |             UNWIND nodes as n
 87 |             UNWIND nodes as m
 88 |             WITH * WHERE id(n) < id(m)
 89 |             MATCH path = allShortestPaths( (n)-[*..4]-(m) )
 90 |             RETURN min(length(path))", user1=u1, user2=u2);
 91 |         let min_paths = graph.exec(statement)?;
 92 |         let min_paths: Vec<Option<u32>> = min_paths.rows().map(|x| x.get("min(length(path))").unwrap()).collect();
 93 |         match min_paths[0] {
 94 |             Some(mp) => {length_of_paths.push((u1, u2, mp)); ()},
 95 |             _ => (),
 96 |         };
 97 |     }
 98 |     println!("{:?}", length_of_paths);
 99 | 
100 |     Ok(())
101 | }


--------------------------------------------------------------------------------
/chapter4/databases/src/postgres_db.rs:
--------------------------------------------------------------------------------
 1 | use std::error::Error;
 2 | 
 3 | use postgres;
 4 | use postgres::{Connection, TlsMode};
 5 | 
 6 | #[derive(Debug)]
 7 | struct Weather {
 8 |     id: i32,
 9 |     month: String,
10 |     normal: f64,
11 |     warmest: f64,
12 |     coldest: f64
13 | }
14 | pub fn run() -> Result<(), Box<dyn Error>> {
15 |     let conn = Connection::connect("postgresql://postgres:postgres@localhost:5432/postgres",
16 |                                     TlsMode::None)?;
17 | 
18 |      conn.execute("CREATE TABLE IF NOT EXISTS weather (
19 |                     id              SERIAL PRIMARY KEY,
20 |                     month           VARCHAR NOT NULL,
21 |                     normal          DOUBLE PRECISION NOT NULL,
22 |                     warmest         DOUBLE PRECISION NOT NULL,
23 |                     coldest         DOUBLE PRECISION NOT NULL
24 |                   )", &[])?;
25 |     let weathers = vec![
26 |         ("January", 21.3, 27.3, 15.1),
27 |         ("February", 23.6, 30.1, 17.0),
28 |         ("March", 26.1, 32.7, 19.5),
29 |         ("April", 28.0, 34.2, 21.8),
30 |         ("May", 27.4, 33.2, 21.4),
31 |         ("June", 24.6, 29.2, 20.1),
32 |         ("July", 23.9, 28.1, 19.7),
33 |         ("August", 23.5, 27.4, 19.5),
34 |         ("September", 23.9, 28.2, 19.6),
35 |         ("October", 23.7, 28.0, 19.3),
36 |         ("November", 22.2, 27.0, 17.5),
37 |         ("December", 21.1, 26.2, 16.0)
38 |     ];
39 | 
40 |     for weather in &weathers {
41 |         conn.execute("INSERT INTO weather (month, normal, warmest, coldest) VALUES ($1, $2, $3, $4)",
42 |                  &[&weather.0, &weather.1, &weather.2, &weather.3])?;
43 |     }
44 | 
45 |     for row in &conn.query("SELECT id, month, normal, warmest, coldest FROM weather", &[])? {
46 |         let weather = Weather {
47 |             id: row.get(0),
48 |             month: row.get(1),
49 |             normal: row.get(2),
50 |             warmest: row.get(3),
51 |             coldest: row.get(4)
52 |         };
53 |         println!("{:?}", weather);
54 |     }
55 | 
56 |     // get the average value
57 |     for row in &conn.query("SELECT AVG(warmest) FROM weather;", &[])? {
58 |         let x: f64 = row.get(0);
59 |         println!("{:?}", x);
60 |     }
61 | 
62 |     Ok(())
63 | }


--------------------------------------------------------------------------------
/chapter4/graph_db/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "graph_db"
3 | version = "0.1.0"
4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
5 | edition = "2018"
6 | 
7 | [dependencies]
8 | rusted_cypher = "1.1.0"
9 | 


--------------------------------------------------------------------------------
/chapter4/graph_db/src/main.rs:
--------------------------------------------------------------------------------
 1 | use rusted_cypher;
 2 | use rusted_cypher::{GraphClient, Statement, GraphError};
 3 | use std::iter::repeat;
 4 | 
 5 | fn main() -> Result<(), Box<GraphError>> {
 6 |     // let graph = GraphClient::connect(
 7 |     //     "http://neo4j:neo4j@localhost:7474/db/data");
 8 |     let graph = GraphClient::connect(
 9 |         "http://localhost:7474/db/data")?;
10 | 
11 |     let mut query = graph.query();
12 | 
13 |     // create index
14 |     let statement1 = Statement::new(
15 |         "CREATE CONSTRAINT ON (m:Movie) ASSERT m.id IS UNIQUE;");
16 |     let statement2 = Statement::new(
17 |         " CREATE CONSTRAINT ON (u:User) ASSERT u.id IS UNIQUE;"
18 |     );
19 |     let statement3 = Statement::new(
20 |         " CREATE CONSTRAINT ON (g:Genre) ASSERT g.name IS UNIQUE;"
21 |     );
22 | 
23 |     query.add_statement(statement1);
24 |     query.add_statement(statement2);
25 |     query.add_statement(statement3);
26 | 
27 |     query.send()?;
28 | 
29 |     // import movies.csv
30 |     graph.exec(
31 |         "USING PERIODIC COMMIT LOAD CSV WITH HEADERS \
32 |         FROM \"http://10.0.1.43:8000/movies.csv\" AS line \
33 |         WITH line, SPLIT(line.genres, \"|\") AS Genres \
34 |         CREATE (m:Movie { id: TOINTEGER(line.`movieId`), title: line.`title` }) \
35 |         WITH Genres \
36 |         UNWIND RANGE(0, SIZE(Genres)-1) as i \
37 |         MERGE (g:Genre {name: UPPER(Genres[i])}) \
38 |         CREATE (m)-[r:GENRE {position:i+1}]->(g);"
39 |     )?;
40 | 
41 |     // import ratings.csv
42 |     graph.exec(
43 |         " USING PERIODIC COMMIT LOAD CSV WITH HEADERS \
44 |         FROM \"http://10.0.1.43:8000/ratings.csv\" AS line \
45 |         WITH line \
46 |         MATCH (m:Movie { id: TOINTEGER(line.`movieId`) }) \
47 |         MATCH (u:User { id: TOINTEGER(line.`userId`) }) \
48 |         CREATE (u)-[r:RATING {rating: TOFLOAT(line.`rating`)}]->(m);"
49 |     )?;
50 | 
51 |     // import tags
52 |     graph.exec(
53 |         " USING PERIODIC COMMIT LOAD CSV WITH HEADERS \
54 |         FROM \"http://10.0.1.43:8000/tags.csv\" AS line \
55 |         WITH line \
56 |         MATCH (m:Movie { id: TOINTEGER(line.`movieId`) }) \
57 |         MERGE (u:User { id: TOINTEGER(line.`userId`) }) \
58 |         CREATE (u)-[r:TAG {tag: line.`tag`}]->(m);"
59 |     )?;
60 | 
61 |     let result = graph.exec(
62 |         "MATCH (u:User {id: 119}) RETURN u.id")?;
63 | 
64 |     assert_eq!(result.data.len(), 1);
65 | 
66 |     for row in result.rows() {
67 |         let id: u16 = row.get("u.id")?;
68 |         println!("user id: {}", id);
69 |     }
70 | 
71 |     // understand the shortest paths between all
72 | 
73 |     let all_users = graph.exec(
74 |         "MATCH (u:User) RETURN u.id")?;
75 |     let all_users: Vec<u32> = all_users.rows().map(|x| x.get("u.id").unwrap()).collect();
76 | 
77 |     let mut length_of_paths = vec![];
78 |     for (u1, u2) in all_users.iter()
79 |             .enumerate()
80 |             .flat_map(|(i, val)| repeat(val).zip(all_users.iter().skip(i + 1))) {
81 |         let statement = format!(
82 |             "MATCH (n:User) where n.id IN [{user1}, {user2}]
83 |             WITH collect(n) as nodes
84 |             UNWIND nodes as n
85 |             UNWIND nodes as m
86 |             WITH * WHERE id(n) < id(m)
87 |             MATCH path = allShortestPaths( (n)-[*..4]-(m) )
88 |             RETURN min(length(path))", user1=u1, user2=u2);
89 |         let min_paths = graph.exec(statement)?;
90 |         let min_paths: Vec<Option<u32>> = min_paths.rows().map(|x| x.get("min(length(path))").unwrap()).collect();
91 |         match min_paths[0] {
92 |             Some(mp) => {length_of_paths.push((u1, u2, mp)); ()},
93 |             _ => (),
94 |         };
95 |     }
96 |     println!("{:?}", length_of_paths);
97 | 
98 |     Ok(())
99 | }


--------------------------------------------------------------------------------
/chapter4/matrix_transformations/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "matrix_transformations"
3 | version = "0.1.0"
4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
5 | edition = "2018"
6 | 
7 | [dependencies]
8 | ndarray-rand = "0.9.0"
9 | ndarray = "0.12.1"


--------------------------------------------------------------------------------
/chapter4/matrix_transformations/src/main.rs:
--------------------------------------------------------------------------------
 1 | use ndarray;
 2 | use ndarray::prelude::*;
 3 | use ndarray::stack;
 4 | 
 5 | fn main() {
 6 |     let a1 = arr2(&[[0., 1., 2.],
 7 |                     [3., 4., 5.]]);
 8 |     println!("{:?}", a1);
 9 |     println!("------------------------", );
10 | 
11 |     let a2 = Array::from_shape_vec((2, 3).strides((3, 1)),
12 |         vec![0., 1., 2., 3., 4., 5.]).unwrap();
13 |     assert!(a1 == a2);
14 | 
15 |     let a_T = a1.t();
16 |     println!("transposed matrix:");
17 |     println!("{:?}", a_T);
18 |     println!("------------------------", );
19 | 
20 |     let a_mm = a1.dot(&a_T);
21 |     println!("dot product:");
22 |     println!("{:?}", a_mm);
23 |     println!("{:?}", a_mm.shape()); // output [2, 2]
24 |     println!("------------------------", );
25 | }
26 | 


--------------------------------------------------------------------------------
/chapter4/s3_files/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "s3_files"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | rusoto_s3 = "0.38.0"
 9 | rusoto_core = "0.38.0"
10 | env_logger = "0.6.1"
11 | futures = "0.1.26"
12 | futures-fs = "0.0.5"
13 | rand = "0.6.5"
14 | csv = "1.0.7"
15 | ml-utils = { path = "../../ml-utils" }
16 | rustlearn = "0.5.0"


--------------------------------------------------------------------------------
/chapter4/scraping/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "scraping"
3 | version = "0.1.0"
4 | authors = ["joydeep bhattacharjee"]
5 | edition = "2018"
6 | 
7 | [dependencies]
8 | reqwest = "0.9.15"
9 | scraper = "0.10.0"


--------------------------------------------------------------------------------
/chapter4/scraping/src/main.rs:
--------------------------------------------------------------------------------
 1 | use std::time::{SystemTime, UNIX_EPOCH};
 2 | use reqwest;
 3 | use scraper::{Selector, Html};
 4 | 
 5 | 
 6 | fn main() -> Result<(), Box<std::error::Error>> {
 7 |     let start = SystemTime::now();
 8 |     let since_the_epoch = start.duration_since(UNIX_EPOCH)
 9 |         .expect("Time went backwards");
10 | 
11 |     let mut resp = reqwest::get(
12 |         "https://www.moneycontrol.com/india/stockpricequote/power-generation-distribution/ntpc/NTP")?;
13 |     assert!(resp.status().is_success());
14 | 
15 |     let body = resp.text().unwrap();
16 |     let fragment = Html::parse_document(&body);
17 |     // let stories = Selector::parse("#Bse_Prc_tick > strong:nth-child(1)").unwrap();
18 |     let stories = Selector::parse("#mktdet_2 > div:nth-child(2) > div:nth-child(3) > div.FR.gD_12").unwrap();
19 | 
20 |     for price in fragment.select(&stories) {
21 |         let price_txt = price.text().collect::<Vec<_>>();
22 |         if price_txt.len() == 1 {
23 |             println!("{:?}", (since_the_epoch, price_txt[0]));
24 |         }
25 |     }
26 | 
27 |     Ok(())
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/chapter5/crfsuite-model/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "crfsuite-model"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | # data reading and organisation
 9 | csv = "1.0.7"
10 | serde = "1"
11 | serde_derive = "1"
12 | rand = "0.6.5"
13 | 
14 | # machine learning
15 | crfsuite = "0.2.6"


--------------------------------------------------------------------------------
/chapter5/crfsuite-model/src/main.rs:
--------------------------------------------------------------------------------
  1 | extern crate serde;
  2 | #[macro_use]
  3 | extern crate serde_derive;
  4 | 
  5 | use std::io;
  6 | use std::vec::Vec;
  7 | use std::error::Error;
  8 | 
  9 | use csv;
 10 | use rand;
 11 | use rand::thread_rng;
 12 | use rand::seq::SliceRandom;
 13 | 
 14 | use crfsuite::{Model, Attribute, CrfError};
 15 | use crfsuite::{Trainer, Algorithm, GraphicalModel};
 16 | 
 17 | #[derive(Debug, Deserialize, Clone)]
 18 | pub struct NER {
 19 |     // #[serde(rename = "")]
 20 |     // id: String,
 21 |     lemma: String,
 22 |     #[serde(rename = "next-lemma")]
 23 |     next_lemma: String,
 24 |     // next-next-lemma: String,
 25 |     // next-next-pos: String,
 26 |     // next-next-shape: String,
 27 |     // next-next-word: String,
 28 |     // next-pos: String,
 29 |     // next-shape: String,
 30 |     // next-word: String,
 31 |     // pos: String,
 32 |     // prev-iob: String,
 33 |     // prev-lemma: String,
 34 |     // prev-pos: String,
 35 |     // prev-prev-iob: String,
 36 |     // prev-prev-lemma: String,
 37 |     // prev-prev-pos: String,
 38 |     // prev-prev-shape: String,
 39 |     // prev-prev-word: String,
 40 |     // prev-shape: String,
 41 |     // prev-word: String,
 42 |     // sentence_idx: String,
 43 |     // shape: String,
 44 |     word: String,
 45 |     tag: String
 46 | }
 47 | 
 48 | fn get_data() -> Result<Vec<NER>, Box<dyn Error>> {
 49 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 50 |     let mut data = Vec::new();
 51 |     for result in rdr.deserialize() {
 52 |         let r: NER = result?;
 53 |         data.push(r);
 54 |     }
 55 |     // println!("{:?}", data.len());
 56 |     data.shuffle(&mut thread_rng());
 57 |     Ok(data)
 58 | }
 59 | 
 60 | fn split_test_train(data: &[NER], test_size: f32) -> (Vec<NER>, Vec<NER>) {
 61 |     let test_size: f32 = data.len() as f32 * test_size;
 62 |     let test_size = test_size.round() as usize;
 63 | 
 64 |     let (test_data, train_data) = data.split_at(test_size);
 65 |     (test_data.to_vec(), train_data.to_vec())
 66 | }
 67 | 
 68 | fn create_xseq_yseq(data: &[NER]) 
 69 |         -> (Vec<Vec<Attribute>>, Vec<String>) {
 70 |     let mut xseq = vec![];
 71 |     let mut yseq = vec![];
 72 |     for item in data {
 73 |         let seq = vec![Attribute::new(item.lemma.clone(), 1.0),
 74 |             Attribute::new(item.next_lemma.clone(), 0.5)]; // higher weightage for the mainword.
 75 |         xseq.push(seq);
 76 |         yseq.push(item.tag.clone());
 77 |     }
 78 |     (xseq, yseq)
 79 | }
 80 | 
 81 | fn check_accuracy(preds: &[String], actual: &[String]) {
 82 |     let mut hits = 0;
 83 |     let mut correct_hits = 0;
 84 |     for (predicted, actual) in preds.iter().zip(actual) {
 85 |         if actual != "O" { // will not consider the other category as it bloats the accuracy.
 86 |             if predicted == actual && actual != "O" {
 87 |                 correct_hits += 1;
 88 |             }
 89 |             hits += 1;    
 90 |         }
 91 |     }
 92 |     println!("accuracy={} ({}/{} correct)",
 93 |         correct_hits as f32 / hits as f32,
 94 |         correct_hits,
 95 |         hits);
 96 | }
 97 | 
 98 | fn crfmodel_training(xseq: Vec<Vec<Attribute>>, 
 99 |                      yseq: Vec<String>,
100 |                      model_name: &str) -> Result<(), Box<CrfError>> {
101 |     let mut trainer = Trainer::new(true);
102 |     trainer.select(Algorithm::AROW, GraphicalModel::CRF1D)?;
103 |     trainer.append(&xseq, &yseq, 0i32)?;
104 |     trainer.train(model_name, -1i32)?; // using all instances for training.
105 |     Ok(())
106 | }
107 | 
108 | fn model_prediction(xtest: Vec<Vec<Attribute>>,
109 |                     model_name: &str)
110 |                     -> Result<Vec<String>, Box<CrfError>>{
111 |     let model = Model::from_file(model_name)?;
112 |     let mut tagger = model.tagger()?;
113 |     let preds = tagger.tag(&xtest)?;
114 |     Ok(preds)
115 | }
116 | 
117 | fn main() {
118 |     let data = get_data().unwrap();
119 |     let (test_data, train_data) = split_test_train(&data, 0.2);
120 |     let (xseq_train, yseq_train) = create_xseq_yseq(&train_data);
121 |     let (xseq_test, yseq_test) = create_xseq_yseq(&test_data);
122 |     crfmodel_training(xseq_train, yseq_train, "rustml.crfsuite").unwrap();
123 |     let preds = model_prediction(xseq_test, "rustml.crfsuite").unwrap();
124 |     check_accuracy(&preds, &yseq_test);
125 | }
126 | 


--------------------------------------------------------------------------------
/chapter5/fasttext-model/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "fasttext-model"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | # data reading and organisation
 9 | csv = "1.0.7"
10 | serde = "1"
11 | serde_derive = "1"
12 | rand = "0.6.5"
13 | 
14 | # machine learning
15 | fasttext = "0.4.1"
16 | 
17 | # text normalisation
18 | stopwords = "0.1.0"
19 | vtext = "0.1.0-alpha.1"
20 | rust-stemmers = "1.1.0"
21 | 
22 | # others
23 | itertools = "0.8.0"


--------------------------------------------------------------------------------
/chapter5/fasttext-model/src/main.rs:
--------------------------------------------------------------------------------
  1 | extern crate serde;
  2 | // This lets us write `#[derive(Deserialize)]`.
  3 | #[macro_use]
  4 | extern crate serde_derive;
  5 | 
  6 | use std::io;
  7 | use std::vec::Vec;
  8 | use std::error::Error;
  9 | use std::io::Write;
 10 | use std::fs::File;
 11 | 
 12 | use csv;
 13 | use rand;
 14 | use rand::thread_rng;
 15 | use rand::seq::SliceRandom;
 16 | 
 17 | use fasttext::{FastText, Args, ModelName, LossName};
 18 | use stopwords;
 19 | use std::collections::HashSet;
 20 | use stopwords::{Spark, Language, Stopwords};
 21 | use itertools::Itertools;
 22 | use vtext::tokenize::VTextTokenizer;
 23 | use rust_stemmers::{Algorithm, Stemmer};
 24 | 
 25 | const TRAIN_FILE: &str = "data.train";
 26 | const TEST_FILE: &str = "data.test";
 27 | const MODEL: &str = "model.bin";
 28 | 
 29 | #[derive(Debug, Deserialize)]
 30 | pub struct SpookyAuthor {
 31 |     id: String,
 32 |     text: String,
 33 |     author: String
 34 | }
 35 | 
 36 | impl SpookyAuthor {
 37 |     pub fn into_tokens(&self) -> String {
 38 |         // convert all to lowercase
 39 |         let lc_text = self.text.to_lowercase();
 40 | 
 41 |         // tokenise the words
 42 |         let tok = VTextTokenizer::new("en");
 43 |         let tokens: Vec<&str> = tok.tokenize(lc_text.as_str()).collect();
 44 | 
 45 |         // stem the words
 46 |         let en_stemmer = Stemmer::create(Algorithm::English);
 47 |         let tokens: Vec<String> = tokens.iter().map(|x| en_stemmer.stem(x).into_owned()).collect();
 48 |         let mut tokens: Vec<&str> = tokens.iter().map(|x| x.as_str()).collect();
 49 | 
 50 |         // remove the stopwords
 51 |         let stops: HashSet<_> = Spark::stopwords(Language::English)
 52 |             .unwrap().iter().collect();
 53 |         tokens.retain(|s| !stops.contains(s));
 54 | 
 55 |         // join the tokens and return
 56 |         tokens.iter().join(" ")
 57 |     }
 58 | 
 59 |     fn into_labels(&self) -> String {
 60 |         match self.author.as_str() {
 61 |             "EAP" => String::from("__label__EAP"),
 62 |             "HPL" => String::from("__label__HPL"),
 63 |             "MWS" => String::from("__label__MWS"),
 64 |             l => panic!("Not able to parse the target. Some other target got passed. {:?}", l),
 65 |         }
 66 |     }
 67 | }
 68 | 
 69 | fn push_training_data_to_file(train_data: &[SpookyAuthor], filename: &str) -> Result<(), Box<dyn Error>> {
 70 |     let mut f = File::create(filename)?;
 71 |     for item in train_data {
 72 |         writeln!(f, "{} {}", item.into_labels(), item.into_tokens())?;
 73 |     }
 74 |     Ok(())
 75 | }
 76 | 
 77 | fn push_test_data_to_file(test_data: &[SpookyAuthor], filename: &str) -> Result<(), Box<dyn Error>> {
 78 |     let mut f = File::create(filename)?;
 79 |     for item in test_data {
 80 |         writeln!(f, "{}", item.into_tokens())?;
 81 |     }
 82 |     Ok(())
 83 | }
 84 | 
 85 | fn main() -> Result<(), Box<dyn Error>> {
 86 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 87 |     let mut data = Vec::new();
 88 |     for result in rdr.deserialize() {
 89 |         let r: SpookyAuthor = result?;
 90 |         data.push(r);
 91 |     }
 92 |     data.shuffle(&mut thread_rng());
 93 | 
 94 |     // separate out to train and test datasets.
 95 |     let test_size: f32 = 0.2;
 96 |     let test_size: f32 = data.len() as f32 * test_size;
 97 |     let test_size = test_size.round() as usize;
 98 | 
 99 |     let (test_data, train_data) = data.split_at(test_size);
100 |     push_training_data_to_file(train_data.to_owned(), TRAIN_FILE)?;
101 |     push_test_data_to_file(test_data.to_owned(), TEST_FILE)?;
102 | 
103 |     // model initiation and training
104 |     let mut args = Args::new();
105 |     args.set_input(TRAIN_FILE);
106 |     args.set_model(ModelName::SUP);
107 |     args.set_loss(LossName::SOFTMAX);
108 |     let mut ft_model = FastText::new();
109 |     ft_model.train(&args).unwrap();
110 | 
111 |     // accuracy
112 |     let preds = test_data.iter().map(|x| ft_model.predict(x.text.as_str(), 1, 0.0));
113 |     let test_labels = test_data.iter().map(|x| x.into_labels());
114 |     let mut hits = 0;
115 |     let mut correct_hits = 0;
116 |     let preds_clone = preds.clone();
117 |     for (predicted, actual) in preds.zip(test_labels) {
118 |         let predicted = predicted?;
119 |         let predicted = &predicted[0]; // only taking the first value.
120 |         if predicted.clone().label == actual {
121 |             correct_hits += 1;
122 |         }
123 |         hits += 1;
124 |     }
125 |     assert_eq!(hits, preds_clone.len());
126 |     println!("accuracy={} ({}/{} correct)", correct_hits as f32 / hits as f32, correct_hits, preds_clone.len());
127 |     ft_model.save_model(MODEL)?;
128 | 
129 |     Ok(())
130 | }
131 | 
132 | #[cfg(test)]
133 | mod tests {
134 |     use super::*;
135 |     use csv;
136 | 
137 |     #[test]
138 |     fn test_spooky_author() {
139 |         let data = "\"id\",\"text\",\"author\"\n\"id26305\",\"This process, however, afforded me no means of ascertaining the dimensions of my dungeon; as I might make its circuit, and return to the point whence I set out, without being aware of the fact; so perfectly uniform seemed the wall.\",\"EAP\"\n\"id17569\",\"It never once occurred to me that the fumbling might be a mere mistake.\",\"HPL\"";
140 |         let mut rdr = csv::Reader::from_reader(data.as_bytes());
141 |         let mut data = Vec::new();
142 |         for result in rdr.deserialize() {
143 |             let r: SpookyAuthor = result.unwrap();
144 |             data.push(r);
145 |         }
146 |         assert_eq!(data[0].into_training_string(), "__label__EAP This process, however, afforded me no means of ascertaining the dimensions of my dungeon; as I might make its circuit, and return to the point whence I set out, without being aware of the fact; so perfectly uniform seemed the wall.");
147 |     }
148 | }
149 | 


--------------------------------------------------------------------------------
/chapter5/jigsaw/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "jigsaw"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | # nlp related
 9 | fasttext = "0.4.1"
10 | vtext = "0.1.0-alpha.1"
11 | stopwords = "0.1.0"
12 | 
13 | # matrices
14 | sprs = "0.6.4"
15 | ndarray = "0.12.1"
16 | 
17 | # data reading and organisation
18 | csv = "1.0.7"
19 | serde = "1"
20 | serde_derive = "1"
21 | rand = "0.6.5"
22 | 
23 | # model building
24 | rustlearn = "0.5.0"


--------------------------------------------------------------------------------
/chapter5/jigsaw/references.txt:
--------------------------------------------------------------------------------
1 | https://www.kaggle.com/sandeepkumar121995/keras-bi-gru-lstm-attention-fasttext
2 | https://www.kaggle.com/abhishek/approaching-almost-any-nlp-problem-on-kaggle
3 | 
4 |     // now we can probably load them to different vectors.
5 | 
6 |     // check out the rust repl https://github.com/google/evcxr


--------------------------------------------------------------------------------
/chapter5/snips-model/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "snips-model"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | snips-nlu-lib = { git = "https://github.com/snipsco/snips-nlu-rs", branch = "master" }
 9 | rocket = "0.4.0"
10 | rocket_contrib = "0.4.0"
11 | serde = "1.0"
12 | serde_json = "1.0"
13 | serde_derive = "1.0"
14 | 


--------------------------------------------------------------------------------
/chapter5/snips-model/snips_training.md:
--------------------------------------------------------------------------------
1 |  python3 -m venv venv
2 |  source venv/bin/activate
3 |  pip install snips-nlu
4 |  python -m snips_nlu download en
5 |  snips-nlu download-all-languages
6 |  snips-nlu train dataset.json snips.model -v
7 | 


--------------------------------------------------------------------------------
/chapter5/snips-model/src/main.rs:
--------------------------------------------------------------------------------
 1 | #![feature(proc_macro_hygiene, decl_macro)]
 2 | 
 3 | #[macro_use] extern crate rocket;
 4 | #[macro_use] extern crate rocket_contrib;
 5 | #[macro_use] extern crate serde_derive;
 6 | extern crate snips_nlu_lib;
 7 | 
 8 | #[cfg(test)] mod tests;
 9 | 
10 | use std::sync::Mutex;
11 | 
12 | use snips_nlu_lib::SnipsNluEngine;
13 | use rocket::{Rocket, State};
14 | use rocket_contrib::json::Json;
15 | 
16 | type Engine = Mutex<SnipsNluEngine>;
17 | 
18 | #[derive(Serialize, Deserialize)]
19 | struct Message {
20 |     contents: String
21 | }
22 | 
23 | fn init_engine() -> SnipsNluEngine {
24 |     let engine_dir = "/home/saionee/opensource/programming-languages/rust-lang/chapter5/snips-nlu-rs/snips.model";
25 |     println!("\nLoading the nlu engine...");
26 |     let engine = SnipsNluEngine::from_path(engine_dir).unwrap();
27 |     engine
28 | }
29 | 
30 | #[get("/")]
31 | fn hello() -> &'static str {
32 |     "Hello, from snips model inference!"
33 | }
34 | 
35 | #[post("/infer", format = "json", data = "<message>")]
36 | fn infer(message: Json<Message>, engine: State<Engine>) -> String {
37 |     let query = message.0.contents;
38 |     let engine = engine.lock().unwrap();
39 |     let result = engine.get_intents(query.trim()).unwrap();
40 |     let result_json = serde_json::to_string_pretty(&result).unwrap();
41 |     result_json
42 | }
43 | 
44 | 
45 | fn rocket() -> Rocket {
46 |     // load the snips ingerence engine.
47 |     let engine = init_engine();
48 | 
49 |     // Have Rocket manage the engine to be passed to the functions.
50 |     rocket::ignite()
51 |         .manage(Mutex::new(engine))
52 |         .mount("/", routes![hello, infer])
53 | }
54 | 
55 | fn main() {
56 |     rocket().launch();
57 | }
58 | 


--------------------------------------------------------------------------------
/chapter6/adversarial/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "adversarial"
3 | version = "0.1.0"
4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
5 | edition = "2018"
6 | 
7 | [dependencies]
8 | tch = { git = "https://github.com/LaurentMazare/tch-rs.git", rev = "e92fadc" }
9 | failure = "0.1.5"


--------------------------------------------------------------------------------
/chapter6/adversarial/src/main.rs:
--------------------------------------------------------------------------------
 1 | // CNN model. This should rearch 99.1% accuracy.
 2 | 
 3 | use tch::{nn, nn::ModuleT, nn::OptimizerConfig, Device, Tensor};
 4 | 
 5 | #[derive(Debug)]
 6 | struct Net {
 7 |     conv1: nn::Conv2D,
 8 |     conv2: nn::Conv2D,
 9 |     fc1: nn::Linear,
10 |     fc2: nn::Linear,
11 | }
12 | 
13 | impl Net {
14 |     fn new(vs: &nn::Path) -> Net {
15 |         let conv1 = nn::conv2d(vs, 1, 32, 5, Default::default());
16 |         let conv2 = nn::conv2d(vs, 32, 64, 5, Default::default());
17 |         let fc1 = nn::linear(vs, 1024, 1024, Default::default());
18 |         let fc2 = nn::linear(vs, 1024, 10, Default::default());
19 |         Net {
20 |             conv1,
21 |             conv2,
22 |             fc1,
23 |             fc2,
24 |         }
25 |     }
26 | }
27 | 
28 | impl nn::ModuleT for Net {
29 |     fn forward_t(&self, xs: &Tensor, train: bool) -> Tensor {
30 |         xs.view(&[-1, 1, 28, 28])
31 |             .apply(&self.conv1)
32 |             .max_pool2d_default(2)
33 |             .apply(&self.conv2)
34 |             .max_pool2d_default(2)
35 |             .view(&[-1, 1024])
36 |             .apply(&self.fc1)
37 |             .relu()
38 |             .dropout_(0.5, train)
39 |             .apply(&self.fc2)
40 |     }
41 | }
42 | 
43 | // FGSM attack code
44 | fn fgsm_attack(image: &Tensor, epsilon: f64, data_grad: &Tensor) -> Tensor {
45 |     // Collect the element-wise sign of the data gradient
46 |     let sign_data_grad = data_grad.sign();
47 |     // Create the perturbed image by adjusting each pixel of the input image
48 |     // let perturbed_image = image + epsilon*sign_data_grad;
49 |     let change = sign_data_grad * epsilon;
50 |     let mut perturbed_image = image + change;
51 |     // # Adding clipping to maintain [0,1] range
52 |     let perturbed_image = perturbed_image.clamp_(0., 1.);
53 |     // # Return the perturbed image
54 |     perturbed_image
55 | }
56 | 
57 | pub fn main() -> failure::Fallible<()> {
58 |     let m = tch::vision::mnist::load_dir("data")?;
59 |     let vs = nn::VarStore::new(Device::cuda_if_available());
60 |     let net = Net::new(&vs.root());
61 |     let opt = nn::Adam::default().build(&vs, 1e-4)?;
62 |     for epoch in 1..100 {
63 |         for (bimages, blabels) in m.train_iter(256).shuffle().to_device(vs.device()) {
64 |             let bimages = bimages.set_requires_grad(true);
65 |             println!("{:?}", bimages.requires_grad());
66 |             
67 |             let data_grad = bimages.grad();
68 |             // println!("{:?}", data_grad.sign());
69 | 
70 |             // # Call FGSM Attack
71 |             let epsilon = 0.5;
72 |             let perturbed_data = fgsm_attack(&bimages, epsilon, &data_grad);
73 | 
74 |             let loss = net
75 |                 .forward_t(&perturbed_data, true)
76 |                 .cross_entropy_for_logits(&blabels);
77 |             opt.backward_step(&loss);
78 |         }
79 |         let test_accuracy =
80 |             net.batch_accuracy_for_logits(&m.test_images, &m.test_labels, vs.device(), 1024);
81 |         println!("epoch: {:4} test acc: {:5.2}%", epoch, 100. * test_accuracy,);
82 |     }
83 |     Ok(())
84 | }


--------------------------------------------------------------------------------
/chapter6/face-detection-tf/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "face-detection-tf"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | tensorflow = "0.13.0"
 9 | structopt = "0.2.15"
10 | image = "0.21.1"
11 | imageproc = "0.18.0"


--------------------------------------------------------------------------------
/chapter6/face-detection-tf/mtcnn.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Apress/practical-machine-learning-w-rust/b0fd379ee4f0f7bcd9276ae6d31576aa655b08d7/chapter6/face-detection-tf/mtcnn.pb


--------------------------------------------------------------------------------
/chapter6/face-detection-tf/src/main.rs:
--------------------------------------------------------------------------------
  1 | use std::path::PathBuf;
  2 | use std::error::Error;
  3 | 
  4 | use tensorflow::Graph;
  5 | use tensorflow::ImportGraphDefOptions;
  6 | use tensorflow::{Session, SessionOptions, SessionRunArgs, Tensor};
  7 | use structopt::StructOpt;
  8 | use image;
  9 | use image::GenericImageView;
 10 | use image::Rgba;
 11 | use imageproc;
 12 | use imageproc::rect::Rect;
 13 | use imageproc::drawing::draw_hollow_rect_mut;
 14 | 
 15 | const LINE_COLOUR: Rgba<u8> = Rgba {
 16 |     data: [0, 255, 0, 0],
 17 | };
 18 | 
 19 | #[derive(Debug, StructOpt)]
 20 | #[structopt(name = "face-detection-tf", about = "Face Identification")]
 21 | struct Opt {
 22 |     #[structopt(short = "i", long = "input", parse(from_os_str))]
 23 |     input: PathBuf,
 24 | 
 25 |     #[structopt(short = "o", long = "output", parse(from_os_str))]
 26 |     output: PathBuf
 27 | }
 28 | 
 29 | #[derive(Copy, Clone, Debug)]
 30 | pub struct BBox {
 31 |     pub x1: f32,
 32 |     pub y1: f32,
 33 |     pub x2: f32,
 34 |     pub y2: f32,
 35 |     pub prob: f32,
 36 | }
 37 | 
 38 | /// read image, convert to RGB and load to a tensor
 39 | /// for face prediction.
 40 | fn get_input_image_tensor(opt: &Opt) -> Result<Tensor<f32>, Box<dyn Error>> {
 41 |     let input_image = image::open(&opt.input)?;
 42 |     
 43 |     let mut flattened: Vec<f32> = Vec::new();
 44 |     for (_x, _y, rgb) in input_image.pixels() {
 45 |         flattened.push(rgb[2] as f32);
 46 |         flattened.push(rgb[1] as f32);
 47 |         flattened.push(rgb[0] as f32);
 48 |     }
 49 |     let input = Tensor::new(
 50 |         &[input_image.height() as u64, input_image.width() as u64, 3])
 51 |         .with_values(&flattened)?;
 52 |     Ok(input)
 53 | }
 54 | 
 55 | fn main() -> Result<(), Box<dyn Error>> {
 56 |     let opt = Opt::from_args();
 57 |     println!("{:?}", (opt.input.to_owned(), opt.output.to_owned()));
 58 |     let input = get_input_image_tensor(&opt)?;
 59 | 
 60 |     //First, we load up the graph as a byte array
 61 |     let model = include_bytes!("../mtcnn.pb");
 62 | 
 63 |     //Then we create a tensorflow graph from the model
 64 |     let mut graph = Graph::new();
 65 |     graph.import_graph_def(&*model, &ImportGraphDefOptions::new())?;
 66 | 
 67 |     let session = Session::new(&SessionOptions::new(), &graph)?;
 68 |     let min_size = Tensor::new(&[]).with_values(&[40f32])?;
 69 |     let thresholds = Tensor::new(&[3]).with_values(&[0.6f32, 0.7f32, 0.7f32])?;
 70 |     let factor = Tensor::new(&[]).with_values(&[0.709f32])?;
 71 | 
 72 |     let mut args = SessionRunArgs::new();
 73 | 
 74 |     //Load our parameters for the model
 75 |     args.add_feed(&graph.operation_by_name_required("min_size")?, 0, &min_size);
 76 |     args.add_feed(&graph.operation_by_name_required("thresholds")?, 0, &thresholds);
 77 |     args.add_feed(&graph.operation_by_name_required("factor")?, 0, &factor);
 78 | 
 79 |     //Load our input image
 80 |     args.add_feed(&graph.operation_by_name_required("input")?, 0, &input);
 81 | 
 82 |     let bbox = args.request_fetch(&graph.operation_by_name_required("box")?, 0);
 83 |     let prob = args.request_fetch(&graph.operation_by_name_required("prob")?, 0);
 84 | 
 85 |     session.run(&mut args)?;
 86 | 
 87 |     let bbox_res: Tensor<f32> = args.fetch(bbox)?;
 88 |     let prob_res: Tensor<f32> = args.fetch(prob)?;
 89 | 
 90 |     println!("{:?}", bbox_res.dims()); // [120, 4]
 91 |     println!("{:?}", prob_res.dims()); // [120]
 92 | 
 93 |     //Let's store the results as a Vec<BBox>
 94 |     let bboxes: Vec<_> = bbox_res
 95 |         .chunks_exact(4) // Split into chunks of 4
 96 |         .zip(prob_res.iter()) // Combine it with prob_res
 97 |         .map(|(bbox, &prob)| BBox {
 98 |             y1: bbox[0],
 99 |             x1: bbox[1],
100 |             y2: bbox[2],
101 |             x2: bbox[3],
102 |             prob,
103 |         }).collect();
104 |     println!("BBox Length: {}, Bboxes:{:#?}", bboxes.len(), bboxes);
105 | 
106 |     let mut output_image = image::open(&opt.input)?;
107 | 
108 |     for bbox in bboxes {
109 |         let rect = Rect::at(bbox.x1 as i32, bbox.y1 as i32)
110 |             .of_size((bbox.x2 - bbox.x1) as u32, (bbox.y2 - bbox.y1) as u32);
111 |         draw_hollow_rect_mut(&mut output_image, rect, LINE_COLOUR);
112 |     }
113 |     output_image.save(&opt.output)?;
114 | 
115 |     Ok(())
116 | }


--------------------------------------------------------------------------------
/chapter6/finetuning_pytorch_image_models/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "finetuning_pytorch_image_models"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | # tch = "0.0.9"
 9 | tch = { git = "https://github.com/LaurentMazare/tch-rs.git", rev = "e92fadc" }
10 | failure = "0.1.5"


--------------------------------------------------------------------------------
/chapter6/finetuning_pytorch_image_models/README.md:
--------------------------------------------------------------------------------
1 | # Usage
2 | 
3 |      cargo run resnet18.ot hymenoptera_data
4 | 


--------------------------------------------------------------------------------
/chapter6/finetuning_pytorch_image_models/resnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torchvision
3 | 
4 | model = torchvision.models.resnet18(pretrained=True)
5 | # model = torch.load("model-best.pth", map_location='cpu')
6 | example = torch.rand(1, 3, 224, 224)
7 | traced_script_module = torch.jit.trace(model, example)
8 | traced_script_module.save("resnet18_model.pt")


--------------------------------------------------------------------------------
/chapter6/model_inference/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "model_inference"
3 | version = "0.1.0"
4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
5 | edition = "2018"
6 | 
7 | [dependencies]
8 | tch = "0.0.9"
9 | failure = "0.1.5"


--------------------------------------------------------------------------------
/chapter6/model_inference/src/main.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | extern crate failure;
 3 | extern crate tch;
 4 | use tch::nn::ModuleT;
 5 | use tch::vision::{alexnet, densenet, imagenet, inception, resnet, squeezenet, vgg};
 6 | 
 7 | pub fn main() -> failure::Fallible<()> {
 8 |     let args: Vec<_> = std::env::args().collect();
 9 |     let (weights, image) = match args.as_slice() {
10 |         [_, w, i] => (std::path::Path::new(w), i.to_owned()),
11 |         _ => bail!("usage: main resnet18.ot image.jpg"),
12 |     };
13 |     // Load the image file and resize it to the usual imagenet dimension of 224x224.
14 |     let image = imagenet::load_image_and_resize224(image)?;
15 | 
16 |     // Create the model and load the weights from the file.
17 |     let mut vs = tch::nn::VarStore::new(tch::Device::Cpu);
18 |     let net: Box<dyn ModuleT> = match weights.file_name().unwrap().to_str().unwrap() {
19 |         "resnet18.ot" => Box::new(resnet::resnet18(&vs.root(), imagenet::CLASS_COUNT)),
20 |         "resnet34.ot" => Box::new(resnet::resnet34(&vs.root(), imagenet::CLASS_COUNT)),
21 |         "densenet121.ot" => Box::new(densenet::densenet121(&vs.root(), imagenet::CLASS_COUNT)),
22 |         "vgg16.ot" => Box::new(vgg::vgg16(&vs.root(), imagenet::CLASS_COUNT)),
23 |         "squeezenet1_0.ot" => Box::new(squeezenet::v1_0(&vs.root(), imagenet::CLASS_COUNT)),
24 |         "squeezenet1_1.ot" => Box::new(squeezenet::v1_1(&vs.root(), imagenet::CLASS_COUNT)),
25 |         "alexnet.ot" => Box::new(alexnet::alexnet(&vs.root(), imagenet::CLASS_COUNT)),
26 |         "inception-v3.ot" => Box::new(inception::v3(&vs.root(), imagenet::CLASS_COUNT)),
27 |         _ => bail!("unknown model, use a weight file named e.g. resnet18.ot"),
28 |     };
29 |     vs.load(weights)?;
30 | 
31 |     // Apply the forward pass of the model to get the logits.
32 |     let output = net
33 |         .forward_t(&image.unsqueeze(0), /*train=*/ false)
34 |         .softmax(-1); // Convert to probability.
35 | 
36 |     // Print the top 5 categories for this image.
37 |     for (probability, class) in imagenet::top(&output, 5).iter() {
38 |         println!("{:50} {:5.2}%", class, 100.0 * probability)
39 |     }
40 |     Ok(())
41 | }


--------------------------------------------------------------------------------
/chapter6/pytorch-image-classification/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "pytorch-image-classification"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | tch = { path = "../tch-rs" }
 9 | # tch = { git = "https://github.com/LaurentMazare/tch-rs.git", rev="8110ee6" }
10 | # tch = { git = "https://github.com/LaurentMazare/tch-rs.git", rev="2ebebb808065de13495db8ff7b0ba18cb1a6fe92" }
11 | failure = "0.1.5"


--------------------------------------------------------------------------------
/chapter7/goodbooks-recommender/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "goodbooks-recommender"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | reqwest = "0.9.17"
 9 | failure = "0.1.5"
10 | serde = "1"
11 | serde_derive = "1"
12 | serde_json = "1"
13 | csv = "1"
14 | sbr = "0.4.0"
15 | rand = "0.6.5"
16 | structopt = "0.2.15"


--------------------------------------------------------------------------------
/chapter7/high-performance-computing/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "high-performance-computing"
 3 | version = "0.1.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | # faster = "0.5.0"
 9 | faster = { path = "../faster" }
10 | rblas = "0.0.13"
11 | # lapack = "0.16.0"
12 | # lapack = { git = "https://github.com/blas-lapack-rs/lapack.git", rev = "67554e6"}


--------------------------------------------------------------------------------
/chapter7/high-performance-computing/src/main.rs:
--------------------------------------------------------------------------------
 1 | use faster::*;
 2 | use rblas::Dot;
 3 | // use lapack::*;
 4 | 
 5 | fn main() {
 6 |     let lots_of_3s = (&[-123.456f32; 128][..]).simd_iter(f32s(0.0))
 7 |         .simd_map(|v| {
 8 |             f32s(9.0) * v.abs().sqrt().rsqrt().ceil().sqrt() - f32s(4.0) - f32s(2.0)
 9 |         })
10 |         .scalar_collect();
11 |     println!("{:?}", lots_of_3s);
12 | 
13 |     // making a parallel operation
14 |     let my_vector: Vec<f32> = (0..10).map(|v| v as f32).collect();
15 |     let power_of_3 = (&my_vector[..]).simd_iter(f32s(0.0))
16 |         .simd_map(|v| {
17 |             v * v * v
18 |         })
19 |         .scalar_collect();
20 |     println!("{:?}", power_of_3);
21 | 
22 |     // taking the sum
23 |     let reduced = (&power_of_3[..]).simd_iter(f32s(0.0))
24 |         .simd_reduce(f32s(0.0), |a, v| a + v ).sum();
25 |     println!("{:?}", reduced);
26 | 
27 |     let x = vec![1.0, -2.0, 3.0, 4.0];
28 |     let y = [1.0, 1.0, 1.0, 1.0, 7.0];
29 | 
30 |     let d = Dot::dot(&x, &y[..x.len()]);
31 |     println!("dot product {:?}", d);
32 | 
33 |     // let n = 3;
34 |     // let mut a = vec![3.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 3.0];
35 |     // let mut w = vec![0.0; n as usize];
36 |     // let mut work = vec![0.0; 4 * n as usize];
37 |     // let lwork = 4 * n;
38 |     // let mut info = 0;
39 | 
40 |     // unsafe {
41 |     //     dsyev(b'V', b'U', n, &mut a, n, &mut w, &mut work, lwork, &mut info);
42 |     // }
43 | 
44 |     // assert!(info == 0);
45 |     // for (one, another) in w.iter().zip(&[2.0, 2.0, 5.0]) {
46 |     //     assert!((one - another).abs() < 1e-14);
47 |     // }
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/chapter7/statistics/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "statistics"
3 | version = "0.1.0"
4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
5 | edition = "2018"
6 | 
7 | [dependencies]
8 | ndarray = "0.12.1"


--------------------------------------------------------------------------------
/chapter8/cpp_demangle/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rust2py"
 3 | version = "0.1.0"
 4 | edition = "2018"
 5 | 
 6 | [lib]
 7 | name = "rust2py"
 8 | crate-type = ["cdylib"]
 9 | 
10 | [dependencies.pyo3]
11 | version = "0.7.0"
12 | features = ["extension-module"]


--------------------------------------------------------------------------------
/chapter8/cpp_demangle/mangle_ex.py:
--------------------------------------------------------------------------------
1 | # import our rust library, no need for cffi
2 | from cpp_demangle import demangle
3 | 
4 | # run the demangle function, prints 'mangled::foo(double)'
5 | print(demangle("_ZN7mangled3fooEd"))
6 | 


--------------------------------------------------------------------------------
/chapter8/cpp_demangle/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from setuptools_rust import Binding, RustExtension
3 | 
4 | setup(name='cpp-demangle',
5 |       version="0.0.1",
6 |       rust_extensions=[RustExtension('cpp_demangle', 'Cargo.toml',  binding=Binding.PyO3)],
7 |       test_suite="tests",
8 |       zip_safe=False)
9 | 


--------------------------------------------------------------------------------
/chapter8/cpp_demangle/src/lib.rs:
--------------------------------------------------------------------------------
 1 | use pyo3::prelude::*;
 2 | use pyo3::wrap_pyfunction;
 3 | 
 4 | #[pymodule]
 5 | fn rust2py(py: Python, m: &PyModule) -> PyResult<()> {
 6 | 
 7 |     // Note that the `#[pyfn()]` annotation automatically converts the arguments from
 8 |     // Python objects to Rust values; and the Rust return value back into a Python object.
 9 |     #[pyfn(m, "sum_as_string")]
10 |     fn sum_as_string_py(_py: Python, a:i64, b:i64) -> PyResult<String> {
11 |        Ok(format!("{}", a + b))
12 |     }
13 | 
14 |     Ok(())
15 | }
16 | 
17 | #[pyfunction]
18 | fn double(x: usize) -> usize {
19 |     x * 2
20 | }
21 | 
22 | #[pymodule]
23 | fn module_with_functions(py: Python, m: &PyModule) -> PyResult<()> {
24 |     m.add_wrapped(wrap_pyfunction!(double)).unwrap();
25 | 
26 |     Ok(())
27 | }
28 | 
29 | 
30 | /// add(a, b, /)
31 | /// --
32 | ///
33 | /// This function adds two unsigned 64-bit integers.
34 | #[pyfunction]
35 | fn add(a: u64, b: u64) -> u64 {
36 |     a + b
37 | }


--------------------------------------------------------------------------------
/chapter8/crfsuite-model/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "crfsuite-model"
 3 | version = "0.2.0"
 4 | authors = ["Joydeep Bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | # data reading and organisation
 9 | csv = "1.0.7"
10 | serde = "1"
11 | serde_derive = "1"
12 | rand = "0.6.5"
13 | 
14 | # machine learning
15 | crfsuite = "0.2.6"
16 | 
17 | # to call from python
18 | pyo3 = { git = "https://github.com/PyO3/pyo3.git", rev = "99fdafbb880c181f4bce16bbbac03888b3cf85c8", features = ["extension-module"]}
19 | 
20 | [lib]
21 | name = "crfsuite_model"
22 | crate-type = ["cdylib"]


--------------------------------------------------------------------------------
/chapter8/crfsuite-model/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include pyproject.toml Cargo.toml
2 | recursive-include src *


--------------------------------------------------------------------------------
/chapter8/crfsuite-model/crfsuite_model/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .crfsuite_model import CRFSuiteModel
3 | 
4 | __all__ = ["CRFSuiteModel",]


--------------------------------------------------------------------------------
/chapter8/crfsuite-model/crfsuite_model_prediction.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | from crfsuite_model import CRFSuiteModel
3 | model = CRFSuiteModel("model.crfsuite")
4 | res = model.predict("data/ner_predict.csv")
5 | print(res)
6 | 


--------------------------------------------------------------------------------
/chapter8/crfsuite-model/crfsuite_model_training.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | from crfsuite_model import CRFSuiteModel
3 | model = CRFSuiteModel("model.crfsuite")
4 | res = model.fit("data/ner.csv")
5 | print(res)
6 | 


--------------------------------------------------------------------------------
/chapter8/crfsuite-model/data/ner_predict.csv:
--------------------------------------------------------------------------------
 1 | ,lemma,next-lemma,next-next-lemma,next-next-pos,next-next-shape,next-next-word,next-pos,next-shape,next-word,pos,prev-iob,prev-lemma,prev-pos,prev-prev-iob,prev-prev-lemma,prev-prev-pos,prev-prev-shape,prev-prev-word,prev-shape,prev-word,sentence_idx,shape,word
 2 | 0,thousand,of,demonstr,NNS,lowercase,demonstrators,IN,lowercase,of,NNS,__START1__,__start1__,__START1__,__START2__,__start2__,__START2__,wildcard,__START2__,wildcard,__START1__,1,capitalized,Thousands
 3 | 1,of,demonstr,have,VBP,lowercase,have,NNS,lowercase,demonstrators,IN,O,thousand,NNS,__START1__,__start1__,__START1__,wildcard,__START1__,capitalized,Thousands,1,lowercase,of
 4 | 2,demonstr,have,march,VBN,lowercase,marched,VBP,lowercase,have,NNS,O,of,IN,O,thousand,NNS,capitalized,Thousands,lowercase,of,1,lowercase,demonstrators
 5 | 3,have,march,through,IN,lowercase,through,VBN,lowercase,marched,VBP,O,demonstr,NNS,O,of,IN,lowercase,of,lowercase,demonstrators,1,lowercase,have
 6 | 4,march,through,london,NNP,capitalized,London,IN,lowercase,through,VBN,O,have,VBP,O,demonstr,NNS,lowercase,demonstrators,lowercase,have,1,lowercase,marched
 7 | 5,through,london,to,TO,lowercase,to,NNP,capitalized,London,IN,O,march,VBN,O,have,VBP,lowercase,have,lowercase,marched,1,lowercase,through
 8 | 6,london,to,protest,VB,lowercase,protest,TO,lowercase,to,NNP,O,through,IN,O,march,VBN,lowercase,marched,lowercase,through,1,capitalized,London
 9 | 7,to,protest,the,DT,lowercase,the,VB,lowercase,protest,TO,B-geo,london,NNP,O,through,IN,lowercase,through,capitalized,London,1,lowercase,to
10 | 8,protest,the,war,NN,lowercase,war,DT,lowercase,the,VB,O,to,TO,B-geo,london,NNP,capitalized,London,lowercase,to,1,lowercase,protest
11 | 


--------------------------------------------------------------------------------
/chapter8/crfsuite-model/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=41.0.0", "wheel", "setuptools_rust>=0.10.2", "toml"]
3 | build-backend = "setuptools.build_meta"
4 | 


--------------------------------------------------------------------------------
/chapter8/crfsuite-model/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pip>=19.1
2 | pytest>=3.5.0
3 | setuptools-rust>=0.10.2
4 | pytest-benchmark>=3.1.1
5 | 


--------------------------------------------------------------------------------
/chapter8/crfsuite-model/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | from setuptools import setup
 5 | from setuptools.command.test import test as TestCommand
 6 | from setuptools.command.sdist import sdist as SdistCommand
 7 | 
 8 | try:
 9 |     from setuptools_rust import RustExtension
10 | except ImportError:
11 |     import subprocess
12 | 
13 |     errno = subprocess.call([sys.executable, "-m", "pip", "install", "setuptools-rust"])
14 |     if errno:
15 |         print("Please install setuptools-rust package")
16 |         raise SystemExit(errno)
17 |     else:
18 |         from setuptools_rust import RustExtension
19 | 
20 | 
21 | class CargoModifiedSdist(SdistCommand):
22 |     """Modifies Cargo.toml to use an absolute rather than a relative path
23 | 
24 |     The current implementation of PEP 517 in pip always does builds in an
25 |     isolated temporary directory. This causes problems with the build, because
26 |     Cargo.toml necessarily refers to the current version of pyo3 by a relative
27 |     path.
28 | 
29 |     Since these sdists are never meant to be used for anything other than
30 |     tox / pip installs, at sdist build time, we will modify the Cargo.toml
31 |     in the sdist archive to include an *absolute* path to pyo3.
32 |     """
33 | 
34 |     def make_release_tree(self, base_dir, files):
35 |         """Stages the files to be included in archives"""
36 |         super().make_release_tree(base_dir, files)
37 | 
38 |         import toml
39 |         # Cargo.toml is now staged and ready to be modified
40 |         cargo_loc = os.path.join(base_dir, 'Cargo.toml')
41 |         assert os.path.exists(cargo_loc)
42 | 
43 |         with open(cargo_loc, 'r') as f:
44 |             cargo_toml = toml.load(f)
45 | 
46 |         rel_pyo3_path = cargo_toml['dependencies']['pyo3']['path']
47 |         base_path = os.path.dirname(__file__)
48 |         abs_pyo3_path = os.path.abspath(os.path.join(base_path, rel_pyo3_path))
49 | 
50 |         cargo_toml['dependencies']['pyo3']['path'] = abs_pyo3_path
51 | 
52 |         with open(cargo_loc, 'w') as f:
53 |             toml.dump(cargo_toml, f)
54 | 
55 | 
56 | class PyTest(TestCommand):
57 |     user_options = []
58 | 
59 |     def run(self):
60 |         self.run_command("test_rust")
61 | 
62 |         import subprocess
63 | 
64 |         subprocess.check_call(["pytest", "tests"])
65 | 
66 | 
67 | setup_requires = ["setuptools-rust>=0.10.1", "wheel"]
68 | install_requires = []
69 | tests_require = install_requires + ["pytest", "pytest-benchmark"]
70 | 
71 | setup(
72 |     name="crfsuite-model",
73 |     version="0.1.0",
74 |     classifiers=[
75 |         "License :: OSI Approved :: MIT License",
76 |         "Development Status :: 3 - Alpha",
77 |         "Intended Audience :: Developers",
78 |         "Programming Language :: Python",
79 |         "Programming Language :: Rust",
80 |         "Operating System :: POSIX",
81 |         "Operating System :: MacOS :: MacOS X",
82 |     ],
83 |     packages=["crfsuite_model"],
84 |     rust_extensions=[RustExtension("crfsuite_model.crfsuite_model", "Cargo.toml")],
85 |     install_requires=install_requires,
86 |     tests_require=tests_require,
87 |     setup_requires=setup_requires,
88 |     include_package_data=True,
89 |     zip_safe=False,
90 |     cmdclass={
91 |         'test': PyTest,
92 |         'sdist': CargoModifiedSdist,
93 |     },
94 | )
95 | 


--------------------------------------------------------------------------------
/chapter8/crfsuite-model/src/lib.rs:
--------------------------------------------------------------------------------
  1 | extern crate serde;
  2 | #[macro_use]
  3 | extern crate serde_derive;
  4 | 
  5 | use pyo3::prelude::*;
  6 | use pyo3::wrap_pyfunction;
  7 | use std::fs;
  8 | use std::path::PathBuf;
  9 | 
 10 | use std::vec::Vec;
 11 | use std::error::Error;
 12 | 
 13 | use csv;
 14 | use rand;
 15 | use rand::thread_rng;
 16 | use rand::seq::SliceRandom;
 17 | 
 18 | use crfsuite::{Model, Attribute, CrfError};
 19 | use crfsuite::{Trainer, Algorithm, GraphicalModel};
 20 | 
 21 | #[pyclass(module = "crfsuite_model")]
 22 | pub struct CRFSuiteModel {
 23 |     model_name: String,
 24 | }
 25 | 
 26 | #[pymethods]
 27 | impl CRFSuiteModel {
 28 |     #[new]
 29 |     fn new(obj: &PyRawObject, path: String) {
 30 |         obj.init(CRFSuiteModel {
 31 |             model_name: path,
 32 |         });
 33 |     }
 34 | 
 35 |     fn fit(&self, py: Python<'_>, path: String) -> PyResult<String> {
 36 |         let data_file = PathBuf::from(&path[..]);
 37 |         let data = get_data(&data_file).unwrap();
 38 |         let (test_data, train_data) = split_test_train(&data, 0.2);
 39 |         let (xseq_train, yseq_train) = create_xseq_yseq(&train_data);
 40 |         let (xseq_test, yseq_test) = create_xseq_yseq(&test_data);
 41 |         crfmodel_training(xseq_train, yseq_train, self.model_name.as_ref()).unwrap();
 42 |         let preds = model_prediction(xseq_test, self.model_name.as_ref()).unwrap();
 43 |         check_accuracy(&preds, &yseq_test);
 44 |         Ok("model fit done".to_string())
 45 |     }
 46 | 
 47 |     fn predict(&self, predict_filename: String) -> PyResult<Vec<String>> {
 48 |         let predict_data_file = PathBuf::from(predict_filename);
 49 |         let data = get_data_no_y(&predict_data_file).unwrap();
 50 |         let xseq_test = create_xseq_for_predict(&data[..]);
 51 |         let preds = model_prediction(xseq_test, self.model_name.as_ref()).unwrap();
 52 |         Ok(preds)
 53 |     }
 54 | }
 55 | 
 56 | #[derive(Debug, Deserialize, Clone)]
 57 | pub struct NER {
 58 |     lemma: String,
 59 |     #[serde(rename = "next-lemma")]
 60 |     next_lemma: String,
 61 |     word: String,
 62 |     tag: String
 63 | }
 64 | 
 65 | #[derive(Debug, Deserialize, Clone)]
 66 | pub struct NER_Only_X {
 67 |     lemma: String,
 68 |     #[serde(rename = "next-lemma")]
 69 |     next_lemma: String,
 70 |     word: String,
 71 | }
 72 | 
 73 | fn get_data_no_y(path: &PathBuf) -> Result<Vec<NER_Only_X>, Box<dyn Error>> {
 74 |     let csvfile = fs::File::open(path)?;
 75 |     let mut rdr = csv::Reader::from_reader(csvfile);
 76 |     let mut data = Vec::new();
 77 |     for result in rdr.deserialize() {
 78 |         let r: NER_Only_X = result?;
 79 |         data.push(r);
 80 |     }
 81 |     Ok(data)
 82 | }
 83 | 
 84 | fn get_data(path: &PathBuf) -> Result<Vec<NER>, Box<dyn Error>> {
 85 |     let csvfile = fs::File::open(path)?;
 86 |     let mut rdr = csv::Reader::from_reader(csvfile);
 87 |     let mut data = Vec::new();
 88 |     for result in rdr.deserialize() {
 89 |         let r: NER = result?;
 90 |         data.push(r);
 91 |     }
 92 |     data.shuffle(&mut thread_rng());
 93 |     Ok(data)
 94 | }
 95 | 
 96 | fn split_test_train(data: &[NER], test_size: f32) -> (Vec<NER>, Vec<NER>) {
 97 |     let test_size: f32 = data.len() as f32 * test_size;
 98 |     let test_size = test_size.round() as usize;
 99 | 
100 |     let (test_data, train_data) = data.split_at(test_size);
101 |     (test_data.to_vec(), train_data.to_vec())
102 | }
103 | 
104 | fn create_xseq_yseq(data: &[NER])
105 |         -> (Vec<Vec<Attribute>>, Vec<String>) {
106 |     let mut xseq = vec![];
107 |     let mut yseq = vec![];
108 |     for item in data {
109 |         let seq = vec![Attribute::new(item.lemma.clone(), 1.0),
110 |             Attribute::new(item.next_lemma.clone(), 0.5)]; // higher weightage for the mainword.
111 |         xseq.push(seq);
112 |         yseq.push(item.tag.clone());
113 |     }
114 |     (xseq, yseq)
115 | }
116 | 
117 | fn create_xseq_for_predict(data: &[NER_Only_X])
118 |         -> Vec<Vec<Attribute>> {
119 |     let mut xseq = vec![];
120 |     for item in data {
121 |         let seq = vec![Attribute::new(item.lemma.clone(), 1.0),
122 |             Attribute::new(item.next_lemma.clone(), 0.5)]; // higher weightage for the mainword.
123 |         xseq.push(seq);
124 |     }
125 |     xseq
126 | }
127 | 
128 | fn check_accuracy(preds: &[String], actual: &[String]) {
129 |     let mut hits = 0;
130 |     let mut correct_hits = 0;
131 |     for (predicted, actual) in preds.iter().zip(actual) {
132 |         if actual != "O" { // will not consider the other category as it bloats the accuracy.
133 |             if predicted == actual && actual != "O" {
134 |                 correct_hits += 1;
135 |             }
136 |             hits += 1;
137 |         }
138 |     }
139 |     println!("accuracy={} ({}/{} correct)",
140 |         correct_hits as f32 / hits as f32,
141 |         correct_hits,
142 |         hits);
143 | }
144 | 
145 | fn crfmodel_training(xseq: Vec<Vec<Attribute>>,
146 |                      yseq: Vec<String>,
147 |                      model_name: &str) -> Result<(), Box<CrfError>> {
148 |     let mut trainer = Trainer::new(true);
149 |     trainer.select(Algorithm::AROW, GraphicalModel::CRF1D)?;
150 |     trainer.append(&xseq, &yseq, 0i32)?;
151 |     trainer.train(model_name, -1i32)?; // using all instances for training.
152 |     Ok(())
153 | }
154 | 
155 | fn model_prediction(xtest: Vec<Vec<Attribute>>,
156 |                     model_name: &str)
157 |                     -> Result<Vec<String>, Box<CrfError>>{
158 |     let model = Model::from_file(model_name)?;
159 |     let mut tagger = model.tagger()?;
160 |     let preds = tagger.tag(&xtest)?;
161 |     Ok(preds)
162 | }
163 | 
164 | #[pymodule]
165 | fn crfsuite_model(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
166 |     m.add_class::<CRFSuiteModel>()?;
167 | 
168 |     Ok(())
169 | }


--------------------------------------------------------------------------------
/chapter8/iris_classification_xgboost/IrisClassificationXgboost.java:
--------------------------------------------------------------------------------
 1 | class IrisClassificationXgboost {
 2 |     private static native void fit();
 3 |     private static native String predict();
 4 | 
 5 |     static {
 6 |         // This actually loads the shared object that we'll be creating.
 7 |         // The actual location of the .so or .dll may differ based on your
 8 |         // platform.
 9 |         System.loadLibrary("iris_classification_xgboost");
10 |     }
11 | 
12 |     // The rest is just regular ol' Java!
13 |     public static void main(String[] args) {
14 |         IrisClassificationXgboost.fit();
15 |         String predictions = IrisClassificationXgboost.predict();
16 |         System.out.println(predictions);
17 |     }
18 | }


--------------------------------------------------------------------------------
/chapter8/iris_classification_xgboost/Makefile:
--------------------------------------------------------------------------------
 1 | java_run: lib
 2 | 	javac IrisClassificationXgboost.java && java -Djava.library.path=iris_classification_library/target/debug/ IrisClassificationXgboost
 3 | 
 4 | .PHONY: lib
 5 | 
 6 | javah:
 7 | 	javah IrisClassificationXgboost
 8 | 
 9 | lib:
10 | 	cd iris_classification_library && cargo build
11 | 


--------------------------------------------------------------------------------
/chapter8/iris_classification_xgboost/data/iris.csv:
--------------------------------------------------------------------------------
  1 | sepal_length,sepal_width,petal_length,petal_width,species
  2 | 5.1,3.5,1.4,0.2,setosa
  3 | 4.9,3.0,1.4,0.2,setosa
  4 | 4.7,3.2,1.3,0.2,setosa
  5 | 4.6,3.1,1.5,0.2,setosa
  6 | 5.0,3.6,1.4,0.2,setosa
  7 | 5.4,3.9,1.7,0.4,setosa
  8 | 4.6,3.4,1.4,0.3,setosa
  9 | 5.0,3.4,1.5,0.2,setosa
 10 | 4.4,2.9,1.4,0.2,setosa
 11 | 4.9,3.1,1.5,0.1,setosa
 12 | 5.4,3.7,1.5,0.2,setosa
 13 | 4.8,3.4,1.6,0.2,setosa
 14 | 4.8,3.0,1.4,0.1,setosa
 15 | 4.3,3.0,1.1,0.1,setosa
 16 | 5.8,4.0,1.2,0.2,setosa
 17 | 5.7,4.4,1.5,0.4,setosa
 18 | 5.4,3.9,1.3,0.4,setosa
 19 | 5.1,3.5,1.4,0.3,setosa
 20 | 5.7,3.8,1.7,0.3,setosa
 21 | 5.1,3.8,1.5,0.3,setosa
 22 | 5.4,3.4,1.7,0.2,setosa
 23 | 5.1,3.7,1.5,0.4,setosa
 24 | 4.6,3.6,1.0,0.2,setosa
 25 | 5.1,3.3,1.7,0.5,setosa
 26 | 4.8,3.4,1.9,0.2,setosa
 27 | 5.0,3.0,1.6,0.2,setosa
 28 | 5.0,3.4,1.6,0.4,setosa
 29 | 5.2,3.5,1.5,0.2,setosa
 30 | 5.2,3.4,1.4,0.2,setosa
 31 | 4.7,3.2,1.6,0.2,setosa
 32 | 4.8,3.1,1.6,0.2,setosa
 33 | 5.4,3.4,1.5,0.4,setosa
 34 | 5.2,4.1,1.5,0.1,setosa
 35 | 5.5,4.2,1.4,0.2,setosa
 36 | 4.9,3.1,1.5,0.1,setosa
 37 | 5.0,3.2,1.2,0.2,setosa
 38 | 5.5,3.5,1.3,0.2,setosa
 39 | 4.9,3.1,1.5,0.1,setosa
 40 | 4.4,3.0,1.3,0.2,setosa
 41 | 5.1,3.4,1.5,0.2,setosa
 42 | 5.0,3.5,1.3,0.3,setosa
 43 | 4.5,2.3,1.3,0.3,setosa
 44 | 4.4,3.2,1.3,0.2,setosa
 45 | 5.0,3.5,1.6,0.6,setosa
 46 | 5.1,3.8,1.9,0.4,setosa
 47 | 4.8,3.0,1.4,0.3,setosa
 48 | 5.1,3.8,1.6,0.2,setosa
 49 | 4.6,3.2,1.4,0.2,setosa
 50 | 5.3,3.7,1.5,0.2,setosa
 51 | 5.0,3.3,1.4,0.2,setosa
 52 | 7.0,3.2,4.7,1.4,versicolor
 53 | 6.4,3.2,4.5,1.5,versicolor
 54 | 6.9,3.1,4.9,1.5,versicolor
 55 | 5.5,2.3,4.0,1.3,versicolor
 56 | 6.5,2.8,4.6,1.5,versicolor
 57 | 5.7,2.8,4.5,1.3,versicolor
 58 | 6.3,3.3,4.7,1.6,versicolor
 59 | 4.9,2.4,3.3,1.0,versicolor
 60 | 6.6,2.9,4.6,1.3,versicolor
 61 | 5.2,2.7,3.9,1.4,versicolor
 62 | 5.0,2.0,3.5,1.0,versicolor
 63 | 5.9,3.0,4.2,1.5,versicolor
 64 | 6.0,2.2,4.0,1.0,versicolor
 65 | 6.1,2.9,4.7,1.4,versicolor
 66 | 5.6,2.9,3.6,1.3,versicolor
 67 | 6.7,3.1,4.4,1.4,versicolor
 68 | 5.6,3.0,4.5,1.5,versicolor
 69 | 5.8,2.7,4.1,1.0,versicolor
 70 | 6.2,2.2,4.5,1.5,versicolor
 71 | 5.6,2.5,3.9,1.1,versicolor
 72 | 5.9,3.2,4.8,1.8,versicolor
 73 | 6.1,2.8,4.0,1.3,versicolor
 74 | 6.3,2.5,4.9,1.5,versicolor
 75 | 6.1,2.8,4.7,1.2,versicolor
 76 | 6.4,2.9,4.3,1.3,versicolor
 77 | 6.6,3.0,4.4,1.4,versicolor
 78 | 6.8,2.8,4.8,1.4,versicolor
 79 | 6.7,3.0,5.0,1.7,versicolor
 80 | 6.0,2.9,4.5,1.5,versicolor
 81 | 5.7,2.6,3.5,1.0,versicolor
 82 | 5.5,2.4,3.8,1.1,versicolor
 83 | 5.5,2.4,3.7,1.0,versicolor
 84 | 5.8,2.7,3.9,1.2,versicolor
 85 | 6.0,2.7,5.1,1.6,versicolor
 86 | 5.4,3.0,4.5,1.5,versicolor
 87 | 6.0,3.4,4.5,1.6,versicolor
 88 | 6.7,3.1,4.7,1.5,versicolor
 89 | 6.3,2.3,4.4,1.3,versicolor
 90 | 5.6,3.0,4.1,1.3,versicolor
 91 | 5.5,2.5,4.0,1.3,versicolor
 92 | 5.5,2.6,4.4,1.2,versicolor
 93 | 6.1,3.0,4.6,1.4,versicolor
 94 | 5.8,2.6,4.0,1.2,versicolor
 95 | 5.0,2.3,3.3,1.0,versicolor
 96 | 5.6,2.7,4.2,1.3,versicolor
 97 | 5.7,3.0,4.2,1.2,versicolor
 98 | 5.7,2.9,4.2,1.3,versicolor
 99 | 6.2,2.9,4.3,1.3,versicolor
100 | 5.1,2.5,3.0,1.1,versicolor
101 | 5.7,2.8,4.1,1.3,versicolor
102 | 6.3,3.3,6.0,2.5,virginica
103 | 5.8,2.7,5.1,1.9,virginica
104 | 7.1,3.0,5.9,2.1,virginica
105 | 6.3,2.9,5.6,1.8,virginica
106 | 6.5,3.0,5.8,2.2,virginica
107 | 7.6,3.0,6.6,2.1,virginica
108 | 4.9,2.5,4.5,1.7,virginica
109 | 7.3,2.9,6.3,1.8,virginica
110 | 6.7,2.5,5.8,1.8,virginica
111 | 7.2,3.6,6.1,2.5,virginica
112 | 6.5,3.2,5.1,2.0,virginica
113 | 6.4,2.7,5.3,1.9,virginica
114 | 6.8,3.0,5.5,2.1,virginica
115 | 5.7,2.5,5.0,2.0,virginica
116 | 5.8,2.8,5.1,2.4,virginica
117 | 6.4,3.2,5.3,2.3,virginica
118 | 6.5,3.0,5.5,1.8,virginica
119 | 7.7,3.8,6.7,2.2,virginica
120 | 7.7,2.6,6.9,2.3,virginica
121 | 6.0,2.2,5.0,1.5,virginica
122 | 6.9,3.2,5.7,2.3,virginica
123 | 5.6,2.8,4.9,2.0,virginica
124 | 7.7,2.8,6.7,2.0,virginica
125 | 6.3,2.7,4.9,1.8,virginica
126 | 6.7,3.3,5.7,2.1,virginica
127 | 7.2,3.2,6.0,1.8,virginica
128 | 6.2,2.8,4.8,1.8,virginica
129 | 6.1,3.0,4.9,1.8,virginica
130 | 6.4,2.8,5.6,2.1,virginica
131 | 7.2,3.0,5.8,1.6,virginica
132 | 7.4,2.8,6.1,1.9,virginica
133 | 7.9,3.8,6.4,2.0,virginica
134 | 6.4,2.8,5.6,2.2,virginica
135 | 6.3,2.8,5.1,1.5,virginica
136 | 6.1,2.6,5.6,1.4,virginica
137 | 7.7,3.0,6.1,2.3,virginica
138 | 6.3,3.4,5.6,2.4,virginica
139 | 6.4,3.1,5.5,1.8,virginica
140 | 6.0,3.0,4.8,1.8,virginica
141 | 6.9,3.1,5.4,2.1,virginica
142 | 6.7,3.1,5.6,2.4,virginica
143 | 6.9,3.1,5.1,2.3,virginica
144 | 5.8,2.7,5.1,1.9,virginica
145 | 6.8,3.2,5.9,2.3,virginica
146 | 6.7,3.3,5.7,2.5,virginica
147 | 6.7,3.0,5.2,2.3,virginica
148 | 6.3,2.5,5.0,1.9,virginica
149 | 6.5,3.0,5.2,2.0,virginica
150 | 6.2,3.4,5.4,2.3,virginica
151 | 5.9,3.0,5.1,1.8,virginica
152 | 


--------------------------------------------------------------------------------
/chapter8/iris_classification_xgboost/data/predict.csv:
--------------------------------------------------------------------------------
1 | sepal_length,sepal_width,petal_length,petal_width,species
2 | 5.1,3.5,1.4,0.2,setosa
3 | 


--------------------------------------------------------------------------------
/chapter8/iris_classification_xgboost/iris_classification_library/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "iris_classification_xgboost"
 3 | version = "0.1.0"
 4 | authors = ["joydeep bhattacharjee <joydeepubuntu@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | csv = "1.0.7"
 9 | serde = "1.0.92"
10 | serde_derive = "1.0.92"
11 | rand = "0.6"
12 | xgboost = "0.1.4"
13 | ml-utils = { path = "../../../chapter2/ml-utils" }
14 | jni = "0.12.3"
15 | 
16 | 
17 | [lib]
18 | name = "iris_classification_xgboost"
19 | crate-type = ["cdylib"]


--------------------------------------------------------------------------------
/chapter8/my_lambda_function/.cargo/config:
--------------------------------------------------------------------------------
1 | [target.x86_64-unknown-linux-musl]
2 | linker = "x86_64-linux-musl-gcc"
3 | 


--------------------------------------------------------------------------------
/chapter8/my_lambda_function/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "my_lambda_function"
 3 | version = "0.1.0"
 4 | authors = ["joydeep bhattacharjee"]
 5 | autobins = false
 6 | edition = "2018"
 7 | 
 8 | [dependencies]
 9 | lambda_runtime = "0.1"
10 | serde = "^1"
11 | serde_derive = "^1"
12 | serde_json = "^1"
13 | log = "0.4"
14 | simple_logger = "^1"
15 | regex = "1"
16 | 
17 | [[bin]]
18 | name = "bootstrap"
19 | path = "src/main.rs"


--------------------------------------------------------------------------------
/chapter8/my_lambda_function/buildthis.sh:
--------------------------------------------------------------------------------
1 | rm -f rust.zip
2 | cargo build --release --target x86_64-unknown-linux-musl
3 | zip -j rust.zip target/x86_64-unknown-linux-musl/release/bootstrap


--------------------------------------------------------------------------------
/chapter8/my_lambda_function/src/main.rs:
--------------------------------------------------------------------------------
 1 | use serde_derive;
 2 | use serde_derive::{Serialize, Deserialize};
 3 | use lambda_runtime;
 4 | use lambda_runtime::{lambda, Context, error::HandlerError};
 5 | use log;
 6 | use log::error;
 7 | use std::error::Error;
 8 | use std::collections;
 9 | use std::collections::hash_map::Entry::{Occupied, Vacant};
10 | use regex;
11 | use regex::Regex;
12 | 
13 | #[derive(Serialize, Deserialize)]
14 | struct CustomEvent {
15 |     string: String,
16 | }
17 | 
18 | fn main() -> Result<(), Box<dyn Error>> {
19 |     simple_logger::init_with_level(log::Level::Debug).unwrap();
20 |     lambda!(my_handler);
21 | 
22 |     Ok(())
23 | }
24 | 
25 | fn my_handler(event: CustomEvent, ctx: Context) -> Result<String, HandlerError> {
26 |     if event.string == "" {
27 |         error!("Empty string in request {}", ctx.aws_request_id);
28 |         return Err(ctx.new_error("Empty input string"));
29 |     }
30 |     let mut map = collections::HashMap::<String, u32>::new();
31 |     let re = Regex::new(r"\w+").unwrap();
32 |     for caps in re.captures_iter(&event.string) {
33 |         if let Some(cap) = caps.get(0) {
34 |             let word = cap.as_str();
35 |             match map.entry(word.to_string()) {
36 |                 Occupied(mut view) => { *view.get_mut() += 1; }
37 |                 Vacant(view) => { view.insert(1); }
38 |             }
39 |         }
40 |     }
41 | 
42 |     // Serialise to a json string
43 |     let j = serde_json::to_string(&map).unwrap();
44 | 
45 |     Ok(j)
46 | }
47 | 


--------------------------------------------------------------------------------
/errata.md:
--------------------------------------------------------------------------------
 1 | # Errata for *Book Title*
 2 | 
 3 | On **page xx** [Summary of error]:
 4 |  
 5 | Details of error here. Highlight key pieces in **bold**.
 6 | 
 7 | ***
 8 | 
 9 | On **page xx** [Summary of error]:
10 |  
11 | Details of error here. Highlight key pieces in **bold**.
12 | 
13 | ***


--------------------------------------------------------------------------------