├── .gitignore ├── src ├── globals.rs ├── lib.rs ├── flexdatapoint.rs ├── datapoint.rs ├── flexdatavector.rs ├── helper.rs ├── series.rs ├── flexdata.rs ├── flexseries.rs └── flextable.rs ├── .github └── workflows │ └── rust.yml ├── Cargo.toml ├── tests ├── datapoint_test.rs ├── series_test.rs ├── flexdata_test.rs ├── timeseries_test.rs ├── flextable_test.rs └── flexseries_test.rs └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /src/globals.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 2 | pub enum FlexIndex { 3 | Str(String), 4 | Uint(usize) 5 | } 6 | 7 | #[derive(Debug, Serialize, Deserialize, Clone)] 8 | pub enum FlexIndexType { 9 | Str, 10 | Uint 11 | } 12 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Build 20 | run: cargo build --verbose 21 | - name: Run tests 22 | run: cargo test --verbose 23 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "datatoolkit" 3 | version = "0.1.0" 4 | authors = ["Vegapit "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | prettytable-rs = "0.10" 11 | rayon = "1.10" 12 | serde = { version = "1", features = ["derive"] } 13 | serde_json = "1" 14 | 15 | [dev-dependencies] 16 | chrono = { version = "0.4", features = ["serde"] } -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] extern crate serde; 2 | 3 | mod flextable; 4 | mod datapoint; 5 | mod flexdatavector; 6 | mod flexseries; 7 | mod series; 8 | mod flexdatapoint; 9 | mod flexdata; 10 | mod globals; 11 | pub mod helper; 12 | 13 | pub use self::flexdata::{FlexData, FlexDataType}; 14 | pub use self::flexseries::FlexSeries; 15 | pub use self::flextable::FlexTable; 16 | pub use self::datapoint::DataPoint; 17 | pub use self::flexdatavector::FlexDataVector; 18 | pub use self::series::Series; 19 | pub use self::flexdatapoint::FlexDataPoint; 20 | pub use self::globals::{ FlexIndex, FlexIndexType}; -------------------------------------------------------------------------------- /tests/datapoint_test.rs: -------------------------------------------------------------------------------- 1 | extern crate datatoolkit; 2 | extern crate chrono; 3 | 4 | use datatoolkit::DataPoint; 5 | use chrono::{Utc, TimeZone}; 6 | 7 | #[test] 8 | fn datapoint_operations() { 9 | let dt1 = Utc.with_ymd_and_hms(2007, 3, 31, 23, 59, 59).unwrap(); 10 | let dt2 = Utc.with_ymd_and_hms(2007, 4, 1, 0, 0, 0).unwrap(); 11 | 12 | let r1 = DataPoint::new(dt1, 10f64); 13 | let r2 = DataPoint::new(dt1, 5f64); 14 | let mut r3 = DataPoint::new(dt2, 5f64); 15 | 16 | assert!(r1 != r2); 17 | assert!(r2 != r3); 18 | 19 | assert_eq!( (&r1 + &r2).unwrap().get(), &15f64 ); 20 | assert_eq!( (&r1 - &r2).unwrap().get(), &5f64 ); 21 | assert_eq!( (&r1 * &r2).unwrap().get(), &50f64 ); 22 | assert_eq!( (&r1 / &r2).unwrap().get(), &2f64 ); 23 | assert_eq!( &r1 + &r3, None ); 24 | 25 | r3.apply(|&x: &f64| x.powf(2f64)); 26 | assert_eq!( r3.get(), &25f64 ); 27 | } -------------------------------------------------------------------------------- /tests/series_test.rs: -------------------------------------------------------------------------------- 1 | extern crate datatoolkit; 2 | 3 | use datatoolkit::{DataPoint,Series}; 4 | 5 | fn build_series() -> Series { 6 | let dps = vec![ 7 | DataPoint::new(0, 'v'), 8 | DataPoint::new(1, 'e'), 9 | DataPoint::new(2, 'g'), 10 | DataPoint::new(3, 'a'), 11 | DataPoint::new(4, 'p'), 12 | DataPoint::new(5, 'i'), 13 | DataPoint::new(6, 't') 14 | ]; 15 | Series::from_vec( "Test", dps ) 16 | } 17 | 18 | #[test] 19 | fn getters() { 20 | let ts = build_series(); 21 | // Get method 22 | assert_eq!( ts.at(&2, 0).unwrap().get(), &'g' ); 23 | assert_eq!( ts.at(&2, -1).unwrap().get(), &'e' ); 24 | assert_eq!( ts.at(&3, 1).unwrap().get(), &'p' ); 25 | assert_eq!( ts.at(&7, 0), None ); 26 | // Latest range method 27 | let res = ts.range(-3, -1); 28 | assert_eq!( res[0].get(), &'p' ); 29 | assert_eq!( res[1].get(), &'i' ); 30 | assert_eq!( res[2].get(), &'t' ); 31 | // Range method 32 | let res = ts.range_at(&5, 2, -2); 33 | assert_eq!( res[0].get(), &'a' ); 34 | assert_eq!( res[1].get(), &'p' ); 35 | // Index 36 | assert_eq!( ts[-1i32].get(), &'t' ); // Last element 37 | assert_eq!( ts[0i32].get(), &'v' ); // First element 38 | } 39 | 40 | #[test] 41 | fn iterator() { 42 | let ts = build_series(); 43 | let res : Vec> = ts.into_iter().collect(); 44 | assert_eq!(res.len(), 7); 45 | assert_eq!(res[2].get(), &'g'); 46 | } -------------------------------------------------------------------------------- /tests/flexdata_test.rs: -------------------------------------------------------------------------------- 1 | extern crate datatoolkit; 2 | 3 | use datatoolkit::FlexData; 4 | use std::convert::TryFrom; 5 | 6 | #[test] 7 | fn into_from() { 8 | let flexdata_string = FlexData::from( String::from("datatoolkit") ); 9 | let flexdata_float = FlexData::from( 1f64 ); 10 | 11 | let s = String::try_from( &flexdata_string ).unwrap(); 12 | let d = f64::try_from( &flexdata_float ).unwrap(); 13 | 14 | assert_eq!( d, 1f64); 15 | assert_eq!( s, String::from("datatoolkit")); 16 | } 17 | 18 | #[test] 19 | fn operators() { 20 | // Int 21 | assert_eq!( FlexData::from( 3 ), &FlexData::from( -5 ) + &FlexData::from( 8 ) ); 22 | assert_eq!( FlexData::from( 3 ), &FlexData::from( 8 ) - &FlexData::from( 5 ) ); 23 | assert_eq!( FlexData::from( 15 ), &FlexData::from( 5 ) * &FlexData::from( 3 ) ); 24 | assert_eq!( FlexData::from( 3 ), &FlexData::from( 15 ) / &FlexData::from( 5 ) ); 25 | 26 | let mut i = FlexData::from( 1 ); 27 | i += FlexData::from( 5 ); 28 | assert_eq!( i, FlexData::from( 6 ) ); 29 | 30 | // Uint 31 | assert_eq!( FlexData::from( 8 ), &FlexData::from( 5 ) + &FlexData::from( 3 ) ); 32 | assert_eq!( FlexData::from( 8 ), &FlexData::from( 15 ) - &FlexData::from( 7 ) ); 33 | assert_eq!( FlexData::from( 8 ), &FlexData::from( 2 ) * &FlexData::from( 4 ) ); 34 | assert_eq!( FlexData::from( 8 ), &FlexData::from( 32 ) / &FlexData::from( 4 ) ); 35 | 36 | let mut u = FlexData::from( 1 ); 37 | u += FlexData::from( 5 ); 38 | assert_eq!( u, FlexData::from( 6 ) ); 39 | } -------------------------------------------------------------------------------- /src/flexdatapoint.rs: -------------------------------------------------------------------------------- 1 | use crate::{FlexData, FlexDataType, FlexIndex}; 2 | use crate::helper::{convert, derive_datatype}; 3 | 4 | #[derive(Debug, Serialize, Deserialize, Clone)] 5 | pub struct FlexDataPoint { 6 | index: FlexIndex, 7 | data: FlexData 8 | } 9 | 10 | impl FlexDataPoint { 11 | 12 | pub fn new(index: FlexIndex, data: FlexData) -> Self { 13 | Self { index, data } 14 | } 15 | 16 | pub fn get_data(&self) -> &FlexData { 17 | &self.data 18 | } 19 | 20 | pub fn set_data(&mut self, data: FlexData) { 21 | self.data = data 22 | } 23 | 24 | pub fn get_index(&self) -> &FlexIndex { 25 | &self.index 26 | } 27 | 28 | pub fn set_index(&mut self, index: FlexIndex) { 29 | self.index = index; 30 | } 31 | 32 | pub fn get_datatype(&self) -> FlexDataType { 33 | derive_datatype( &self.data ) 34 | } 35 | 36 | // Inspection 37 | 38 | pub fn verify(&self, f: impl Fn(&FlexData) -> bool) -> bool { 39 | f(&self.data) 40 | } 41 | 42 | pub fn has_na(&self) -> bool { 43 | !self.verify(|x: &FlexData| x != &FlexData::NA) 44 | } 45 | 46 | // Transformation 47 | 48 | pub fn as_type(&self, datatype: &FlexDataType) -> Self { 49 | Self::new( self.index.clone(), convert(&self.data, datatype)) 50 | } 51 | 52 | pub fn apply(&self, f: impl Fn(&FlexData) -> FlexData) -> Self { 53 | Self::new( self.index.clone(), f(&self.data)) 54 | } 55 | 56 | } 57 | 58 | impl PartialEq for FlexDataPoint { 59 | fn eq(&self, other: &FlexDataPoint) -> bool { 60 | self.index == other.index && self.data == other.data 61 | } 62 | } 63 | 64 | impl Eq for FlexDataPoint{} 65 | 66 | impl PartialOrd for FlexDataPoint { 67 | fn partial_cmp(&self, other: &FlexDataPoint) -> Option { 68 | self.data.partial_cmp(&other.data) 69 | } 70 | } -------------------------------------------------------------------------------- /tests/timeseries_test.rs: -------------------------------------------------------------------------------- 1 | extern crate datatoolkit; 2 | extern crate chrono; 3 | 4 | use datatoolkit::{DataPoint,Series}; 5 | use chrono::{DateTime, Utc, TimeZone}; 6 | 7 | fn build_series() -> Series, usize> { 8 | let dps = vec![ 9 | DataPoint::new(Utc.with_ymd_and_hms(2008, 1, 1,0, 0, 0).unwrap(), 122), 10 | DataPoint::new(Utc.with_ymd_and_hms(2008, 1, 1,0, 1, 0).unwrap(), 120), 11 | DataPoint::new(Utc.with_ymd_and_hms(2008, 1, 1,0, 2, 0).unwrap(), 118), 12 | DataPoint::new(Utc.with_ymd_and_hms(2008, 1, 1,0, 3, 0).unwrap(), 114), 13 | DataPoint::new(Utc.with_ymd_and_hms(2008, 1, 1,0, 5, 0).unwrap(), 116), 14 | DataPoint::new(Utc.with_ymd_and_hms(2008, 1, 1,0, 4, 0).unwrap(), 117) 15 | ]; 16 | Series::from_vec( "Test", dps ) 17 | } 18 | 19 | #[test] 20 | fn getters() { 21 | let ts = build_series(); 22 | // Get method 23 | assert_eq!( ts.at(&Utc.with_ymd_and_hms(2008, 1, 1,0, 2, 0).unwrap(), 0).unwrap().get(), &118 ); 24 | assert_eq!( ts.at(&Utc.with_ymd_and_hms(2008, 1, 1,0, 2, 0).unwrap(), -1).unwrap().get(), &120 ); 25 | assert_eq!( ts.at(&Utc.with_ymd_and_hms(2008, 1, 1,0, 3, 0).unwrap(), 1).unwrap().get(), &117 ); 26 | assert_eq!( ts.at(&Utc.with_ymd_and_hms(2008, 1, 1,0, 6, 0).unwrap(), 0), None ); 27 | // Latest range method 28 | let res = ts.range(-3, -1); 29 | assert_eq!( res[0].get(), &114 ); 30 | assert_eq!( res[1].get(), &117 ); 31 | assert_eq!( res[2].get(), &116 ); 32 | // Range method 33 | let res = ts.range_at(&Utc.with_ymd_and_hms(2008, 1, 1,0, 5, 0).unwrap(), 2, -2); 34 | assert_eq!( res[0].get(), &114 ); 35 | assert_eq!( res[1].get(), &117 ); 36 | // Index 37 | assert_eq!( ts[-1i32].get(), &116 ); // Last element 38 | assert_eq!( ts[0i32].get(), &122 ); // First element 39 | } 40 | 41 | #[test] 42 | fn iterator() { 43 | let ts = build_series(); 44 | let res : Vec,usize>> = ts.into_iter().collect(); 45 | assert_eq!(res.len(), 6); 46 | assert_eq!(res[2].get(), &118); 47 | } 48 | 49 | #[test] 50 | fn cumsum() { 51 | let mut ts = build_series(); 52 | ts = ts.cumsum(); 53 | assert_eq!( ts[-1i32].get(), &707); 54 | } 55 | 56 | #[test] 57 | fn insert() { 58 | let mut ts = build_series(); 59 | let date = Utc.with_ymd_and_hms(2008, 1, 1,0, 2, 0).unwrap(); 60 | ts.insert_add( DataPoint::new( date.clone() , 5) ); 61 | assert_eq!( ts.at( &date, 0 ).unwrap().get(), &123); 62 | } -------------------------------------------------------------------------------- /tests/flextable_test.rs: -------------------------------------------------------------------------------- 1 | extern crate datatoolkit; 2 | extern crate serde; 3 | 4 | use std::fs::read_to_string; 5 | use datatoolkit::{FlexTable, FlexData, FlexIndex, FlexDataType}; 6 | 7 | fn create_table() -> FlexTable { 8 | // Pandas Equivalent: 9 | // df = pd.read_csv('./tests/E3.csv') 10 | // df = df[["Div","Date","Time","HomeTeam","AwayTeam","FTHG","FTAG","B365H","B365D","B365A"]] 11 | 12 | let headers = vec!["Div","Date","Time","HomeTeam","AwayTeam","FTHG","FTAG","B365H","B365D","B365A"]; 13 | let datatypes = vec![ 14 | FlexDataType::Str, 15 | FlexDataType::Str, 16 | FlexDataType::Str, 17 | FlexDataType::Str, 18 | FlexDataType::Str, 19 | FlexDataType::Uint, 20 | FlexDataType::Uint, 21 | FlexDataType::Dbl, 22 | FlexDataType::Dbl, 23 | FlexDataType::Dbl 24 | ]; 25 | 26 | let text = read_to_string("./tests/E3.csv").expect("File Not Found"); 27 | FlexTable::from_csv(text.as_str(), headers.into_iter().map(String::from).collect(), datatypes) 28 | } 29 | 30 | #[test] 31 | fn csv_import() { 32 | 33 | let mut table = create_table(); 34 | assert!( table.has_na() ); 35 | 36 | // All games where one team scored more than 3 goals 37 | // Pandas equivalent: df.where((df['FTHG'] > 3) | (df['FTAG'] > 3)) 38 | let f = |x: &FlexData| x > &FlexData::Uint(3); 39 | table.filter_any(&["FTHG","FTAG"], f).print( Some(20) ); 40 | 41 | // All games where no goals were scored 42 | // Pandas equivalent: df.where((df['FTHG'] == 0) & (df['FTAG'] == 0)) 43 | let f = |x: &FlexData| x == &FlexData::Uint(0); 44 | table.filter_all(&["FTHG","FTAG"], f).print( Some(20) ); 45 | 46 | // Create new series as function of others 47 | // using helper functions to condense the code 48 | // Pandas equivalent: df['GoalDiff'] = df['FTHG'] - df['FTAG'] 49 | let series = table.extract_series(&["FTHG","FTAG"]); 50 | let gd_series = series[0].sub( "GoalDiff", &FlexDataType::Int, &series[1] ); 51 | table.add_series( gd_series ); 52 | 53 | // Pandas equivalent: print( df.head(10) ) 54 | table.print( Some(10) ); // print first 10 records only 55 | 56 | // Pandas equivalent: print( df.iloc[24,:] ) 57 | table[24].print(); 58 | 59 | // Subset selection 60 | table.get_subset( vec![FlexIndex::Uint(12), FlexIndex::Uint(30)]).print( None ); 61 | 62 | // Group by Hometeams 63 | for (k,v) in FlexTable::group_by(&table, "HomeTeam") { 64 | println!("{}", k); 65 | v.print( Some(5) ); 66 | break; 67 | } 68 | 69 | let filtered_table = table.drop_na(); 70 | assert!( filtered_table.has_na() == false ); 71 | 72 | let filtered_series = filtered_table.extract_series(&["B365H", "B365A"]); 73 | let corr = filtered_series[0].pearson_correlation(&filtered_series[1]).unwrap(); 74 | assert!( corr < 0.0 ); 75 | 76 | //table.to_csv("test.csv"); 77 | } -------------------------------------------------------------------------------- /src/datapoint.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::Ordering; 2 | use std::ops::*; 3 | 4 | #[derive(Debug, Serialize, Deserialize, Clone)] 5 | pub struct DataPoint { 6 | index: T, 7 | data: U 8 | } 9 | 10 | impl DataPoint { 11 | pub fn new(index: T, data: U) -> DataPoint { 12 | DataPoint{ data, index } 13 | } 14 | 15 | pub fn get_index(&self) -> &T { 16 | &self.index 17 | } 18 | 19 | pub fn get(&self) -> &U { 20 | &self.data 21 | } 22 | 23 | pub fn set(&mut self, value: U) { 24 | self.data = value; 25 | } 26 | 27 | pub fn apply(&mut self, f: impl Fn(&U) -> U) { 28 | self.data = f(&self.data); 29 | } 30 | } 31 | 32 | impl Ord for DataPoint { 33 | fn cmp(&self, other: &DataPoint) -> Ordering { 34 | self.index.cmp(&other.index) 35 | } 36 | } 37 | 38 | impl PartialOrd for DataPoint { 39 | fn partial_cmp(&self, other: &DataPoint) -> Option { 40 | Some( self.index.cmp(&other.index) ) 41 | } 42 | } 43 | 44 | impl PartialEq for DataPoint { 45 | fn eq(&self, other: &DataPoint) -> bool { 46 | self.index == other.index && self.data == other.data 47 | } 48 | } 49 | 50 | impl Eq for DataPoint {} 51 | 52 | impl Add<&DataPoint> for &DataPoint { 53 | type Output = Option>; 54 | 55 | fn add(self, other: &DataPoint) -> Self::Output { 56 | if self.index == other.index { 57 | let mut dp = self.clone(); 58 | dp.data += other.data; 59 | Some( dp ) 60 | } else { 61 | None 62 | } 63 | } 64 | } 65 | 66 | impl Sub<&DataPoint> for &DataPoint { 67 | type Output = Option>; 68 | 69 | fn sub(self, other: &DataPoint) -> Self::Output { 70 | if self.index == other.index { 71 | let mut dp = self.clone(); 72 | dp.data -= other.data; 73 | Some( dp ) 74 | } else { 75 | None 76 | } 77 | } 78 | } 79 | 80 | impl Mul<&DataPoint> for &DataPoint { 81 | type Output = Option>; 82 | 83 | fn mul(self, other: &DataPoint) -> Self::Output { 84 | if self.index == other.index { 85 | let mut dp = self.clone(); 86 | dp.data *= other.data; 87 | Some( dp ) 88 | } else { 89 | None 90 | } 91 | } 92 | } 93 | 94 | impl Div<&DataPoint> for &DataPoint { 95 | type Output = Option>; 96 | 97 | fn div(self, other: &DataPoint) -> Self::Output { 98 | if self.index == other.index { 99 | let mut dp = self.clone(); 100 | dp.data /= other.data; 101 | Some( dp ) 102 | } else { 103 | None 104 | } 105 | } 106 | } -------------------------------------------------------------------------------- /tests/flexseries_test.rs: -------------------------------------------------------------------------------- 1 | extern crate datatoolkit; 2 | 3 | use datatoolkit::{FlexDataType, FlexSeries, FlexDataPoint, FlexData, FlexIndex}; 4 | 5 | fn make_double_series1() -> FlexSeries { 6 | let datapoints = vec![ 7 | FlexDataPoint::new(FlexIndex::Uint(1), FlexData::Dbl(2.5)), 8 | FlexDataPoint::new(FlexIndex::Uint(2), FlexData::Dbl(1.2)), 9 | FlexDataPoint::new(FlexIndex::Uint(3), FlexData::Dbl(3.6)), 10 | FlexDataPoint::new(FlexIndex::Uint(4), FlexData::Dbl(0.1)), 11 | FlexDataPoint::new(FlexIndex::Uint(5), FlexData::Dbl(0.7)), 12 | FlexDataPoint::new(FlexIndex::Uint(6), FlexData::Dbl(1.8)), 13 | FlexDataPoint::new(FlexIndex::Uint(7), FlexData::Dbl(2.7)), 14 | FlexDataPoint::new(FlexIndex::Uint(8), FlexData::Dbl(2.9)), 15 | FlexDataPoint::new(FlexIndex::Uint(9), FlexData::Dbl(1.4)), 16 | FlexDataPoint::new(FlexIndex::Uint(10), FlexData::Dbl(0.3)) 17 | ]; 18 | FlexSeries::from_vec("dummy1", FlexDataType::Dbl, datapoints) 19 | } 20 | 21 | fn make_double_series2() -> FlexSeries { 22 | let datapoints = vec![ 23 | FlexDataPoint::new(FlexIndex::Uint(1), FlexData::Dbl(1.5)), 24 | FlexDataPoint::new(FlexIndex::Uint(2), FlexData::Dbl(2.2)), 25 | FlexDataPoint::new(FlexIndex::Uint(3), FlexData::Dbl(0.6)), 26 | FlexDataPoint::new(FlexIndex::Uint(4), FlexData::Dbl(3.1)), 27 | FlexDataPoint::new(FlexIndex::Uint(5), FlexData::Dbl(3.7)), 28 | FlexDataPoint::new(FlexIndex::Uint(6), FlexData::Dbl(2.8)), 29 | FlexDataPoint::new(FlexIndex::Uint(7), FlexData::Dbl(1.7)), 30 | FlexDataPoint::new(FlexIndex::Uint(8), FlexData::Dbl(1.9)), 31 | FlexDataPoint::new(FlexIndex::Uint(9), FlexData::Dbl(2.4)), 32 | FlexDataPoint::new(FlexIndex::Uint(10), FlexData::Dbl(3.3)) 33 | ]; 34 | FlexSeries::from_vec("dummy2", FlexDataType::Dbl, datapoints) 35 | } 36 | 37 | #[test] 38 | fn getters() { 39 | let series1 = make_double_series1(); 40 | assert_eq!( series1.get_label(), "dummy1" ); 41 | assert_eq!( series1.get_datatype(), &FlexDataType::Dbl ); 42 | assert_eq!( series1.get_size(), 10 ); 43 | let data = series1.get_data(); 44 | assert_eq!( data[2], &FlexData::Dbl(3.6) ); 45 | assert_eq!( data[6], &FlexData::Dbl(2.7) ); 46 | } 47 | 48 | #[test] 49 | fn selectors() { 50 | let series1 = make_double_series1(); 51 | assert_eq!( series1.at( &FlexIndex::Uint(3) ), Some( &FlexDataPoint::new(FlexIndex::Uint(3), FlexData::Dbl(3.6)) ) ); 52 | assert_eq!( series1.at( &FlexIndex::Uint(12) ), None ); 53 | assert_eq!( series1.contains( &FlexIndex::Uint(3) ), true ); 54 | assert_eq!( series1.contains( &FlexIndex::Uint(12) ), false ); 55 | let subset = series1.get_subset( vec![FlexIndex::Uint(3), FlexIndex::Uint(12)] ); 56 | assert_eq!( subset.get_size(), 1 ); 57 | } 58 | 59 | 60 | #[test] 61 | fn stats() { 62 | let series1 = make_double_series1(); 63 | let series2 = make_double_series2(); 64 | 65 | let m1 = series1.mean().unwrap(); 66 | assert!( (1.72f64 - m1).abs() < 1e-5 ); 67 | 68 | let v1 = series1.variance(true).unwrap(); 69 | assert!( (1.3951f64 - v1).abs() < 1e-4 ); 70 | 71 | let m2 = series2.mean().unwrap(); 72 | assert!( (2.32f64 - m2).abs() < 1e-5 ); 73 | 74 | let v2 = series2.variance(true).unwrap(); 75 | assert!( (0.8795f64 - v2).abs() < 1e-4 ); 76 | 77 | let cov = series1.covariance( &series2, true ).unwrap(); 78 | assert!( (-0.996f64 - cov).abs() < 1e-4 ); 79 | 80 | let corr = series1.pearson_correlation( &series2 ).unwrap(); 81 | println!("{:?}", corr); 82 | assert!( (-0.8991f64 - corr).abs() < 1e-4 ); 83 | } -------------------------------------------------------------------------------- /src/flexdatavector.rs: -------------------------------------------------------------------------------- 1 | use std::ops::*; 2 | use crate::{FlexIndex, FlexData, FlexDataType}; 3 | use crate::helper::{convert, derive_datatype}; 4 | use prettytable::{Table, Row, Cell}; 5 | 6 | #[derive(Debug, Serialize, Deserialize, Clone)] 7 | pub struct FlexDataVector { 8 | index: FlexIndex, 9 | data: Vec 10 | } 11 | 12 | impl FlexDataVector { 13 | 14 | pub fn new(index: FlexIndex, data: Vec) -> Self { 15 | Self { index, data } 16 | } 17 | 18 | // Getters and Setters 19 | 20 | pub fn get_index(&self) -> &FlexIndex { 21 | &self.index 22 | } 23 | 24 | pub fn set_index(&mut self, index: FlexIndex) { 25 | self.index = index; 26 | } 27 | 28 | pub fn get_data(&self) -> &Vec { 29 | &self.data 30 | } 31 | 32 | pub fn set_data(&mut self, data: Vec) { 33 | self.data = data; 34 | } 35 | 36 | pub fn get_datatypes(&self) -> Vec { 37 | self.data.iter() 38 | .map(derive_datatype) 39 | .collect() 40 | } 41 | 42 | pub fn get_size(&self) -> usize { 43 | self.data.len() 44 | } 45 | 46 | // Inspection 47 | 48 | pub fn verify(&self, f: impl Fn(&FlexData) -> bool) -> bool { 49 | self.data.iter() 50 | .all(f) 51 | } 52 | 53 | pub fn has_na(&self) -> bool { 54 | !self.verify(|x: &FlexData| x != &FlexData::NA) 55 | } 56 | 57 | // Transformation 58 | 59 | pub fn as_types(&self, datatypes: &Vec) -> Self { 60 | let mod_data : Vec = self.data.iter() 61 | .zip(datatypes) 62 | .map(|(d,t)| convert(d, t)) 63 | .collect(); 64 | Self::new( self.index.clone(), mod_data ) 65 | } 66 | 67 | 68 | // Print 69 | 70 | pub fn print(&self) { 71 | let mut table = Table::new(); 72 | let mut types_cells : Vec = self.get_datatypes().iter() 73 | .map(|datatype| { 74 | match datatype { 75 | FlexDataType::Dbl => Cell::new("f64"), 76 | FlexDataType::Uint => Cell::new("u32"), 77 | FlexDataType::Int => Cell::new("i32"), 78 | FlexDataType::Char => Cell::new("char"), 79 | FlexDataType::Str => Cell::new("str"), 80 | FlexDataType::NA => Cell::new("n/a") 81 | } 82 | }) 83 | .collect(); 84 | types_cells.insert(0, Cell::new("")); 85 | table.add_row(Row::new(types_cells)); 86 | let mut record_cells : Vec = Vec::new(); 87 | let index_cell = match &self.index { 88 | FlexIndex::Uint(val) => Cell::new( format!("{}", val).as_str() ), 89 | FlexIndex::Str(val) => Cell::new( val.as_str() ) 90 | }; 91 | record_cells.push(index_cell); 92 | for k in 0..self.get_size() { 93 | let cell = match &self.data[k] { 94 | FlexData::Str(val) => Cell::new( val.as_str() ), 95 | FlexData::Dbl(val) => Cell::new( format!("{:.5}", val).as_str() ), 96 | FlexData::Uint(val) => Cell::new( format!("{}", val).as_str() ), 97 | FlexData::Int(val) => Cell::new( format!("{}", val).as_str() ), 98 | FlexData::Char(val) => Cell::new( format!("{}", val).as_str() ), 99 | FlexData::NA => Cell::new( "N/A" ) 100 | }; 101 | record_cells.push(cell); 102 | } 103 | table.add_row(Row::new(record_cells)); 104 | // Print the table to stdout 105 | table.printstd(); 106 | } 107 | } 108 | 109 | // Implement [] operator 110 | impl Index for FlexDataVector { 111 | type Output = FlexData; 112 | fn index(&self, index: usize) -> &FlexData { 113 | &self.data[index] 114 | } 115 | } 116 | 117 | impl PartialEq for FlexDataVector { 118 | fn eq(&self, other: &FlexDataVector) -> bool { 119 | self.index == other.index && self.data.iter().zip( other.data.iter() ).all(|(a,b)| a == b) 120 | } 121 | } 122 | 123 | impl Eq for FlexDataVector{} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DataToolkit 2 | 3 | Pure Rust crate allowing the manipulation of indexed data structures like timeseries: 4 | 5 | ```rust 6 | extern crate datatoolkit; 7 | extern crate chrono; 8 | 9 | use datatoolkit::{DataPoint,TimeSeries}; 10 | use chrono::{Utc, TimeZone}; 11 | 12 | let dps = vec![ 13 | DataPoint::new(Utc.ymd(2008, 1, 1).and_hms(0, 0, 0), 122), 14 | DataPoint::new(Utc.ymd(2008, 1, 1).and_hms(0, 1, 0), 120), 15 | DataPoint::new(Utc.ymd(2008, 1, 1).and_hms(0, 2, 0), 118), 16 | DataPoint::new(Utc.ymd(2008, 1, 1).and_hms(0, 3, 0), 114), 17 | DataPoint::new(Utc.ymd(2008, 1, 1).and_hms(0, 5, 0), 116), 18 | DataPoint::new(Utc.ymd(2008, 1, 1).and_hms(0, 4, 0), 117) 19 | ]; 20 | Series::from_vec( "Test", dps ) 21 | 22 | // Get method 23 | assert_eq!( ts.at(&Utc.ymd(2008, 1, 1).and_hms(0, 2, 0), 0).unwrap().get(), &118 ); 24 | assert_eq!( ts.at(&Utc.ymd(2008, 1, 1).and_hms(0, 2, 0), -1).unwrap().get(), &120 ); 25 | assert_eq!( ts.at(&Utc.ymd(2008, 1, 1).and_hms(0, 3, 0), 1).unwrap().get(), &117 ); 26 | assert_eq!( ts.at(&Utc.ymd(2008, 1, 1).and_hms(0, 6, 0), 0), None ); 27 | // Latest range method 28 | let res = ts.range(-3, -1); 29 | assert_eq!( res[0].get(), &114 ); 30 | assert_eq!( res[1].get(), &117 ); 31 | assert_eq!( res[2].get(), &116 ); 32 | // Range method 33 | let res = ts.range_at(&Utc.ymd(2008, 1, 1).and_hms(0, 5, 0), 2, -2); 34 | assert_eq!( res[0].get(), &114 ); 35 | assert_eq!( res[1].get(), &117 ); 36 | // Index 37 | assert_eq!( ts[-1].get(), &116 ); // Last element 38 | assert_eq!( ts[0].get(), &122 ); // First element 39 | ``` 40 | 41 | Similarly to Pandas in Python, it also handles data from multiple types thanks to flexible data structures like `FlexTable`. This example uses the 1920 season data from the *English League 2* division from [football-data.co.uk](https://football-data.co.uk): 42 | 43 | ```rust 44 | // Pandas Equivalent: 45 | // df = pd.read_csv('./tests/E3.csv') 46 | // df = df[["Div","Date","Time","HomeTeam","AwayTeam","FTHG","FTAG","B365H","B365D","B365A"]] 47 | 48 | let headers = vec!["Div","Date","Time","HomeTeam","AwayTeam","FTHG","FTAG","B365H","B365D","B365A"]; 49 | let datatypes = vec![ 50 | FlexDataType::Str, 51 | FlexDataType::Str, 52 | FlexDataType::Str, 53 | FlexDataType::Str, 54 | FlexDataType::Str, 55 | FlexDataType::Uint, 56 | FlexDataType::Uint, 57 | FlexDataType::Dbl, 58 | FlexDataType::Dbl, 59 | FlexDataType::Dbl 60 | ]; 61 | let table = FlexTable::from_csv("./tests/E3.csv", headers, datatypes); 62 | ``` 63 | 64 | All data missing or not fitting the type requirements are assigned a type of `FlexDataType:NA`. 65 | Here are some examples on generating new series using series in the `FlexTable`. 66 | 67 | ```rust 68 | // All games where one team scored more than 3 goals 69 | // Pandas equivalent: df.where((df['FTHG'] > 3) | (df['FTAG'] > 3)) 70 | let f = |x: &FlexData| x > &FlexData::Uint(3); 71 | table.filter_any(&["FTHG","FTAG"], f).print( Some(20) ); 72 | 73 | // All games where no goals were scored 74 | // Pandas equivalent: df.where((df['FTHG'] == 0) & (df['FTAG'] == 0)) 75 | let f = |x: &FlexData| x == &FlexData::Uint(0); 76 | table.filter_all(&["FTHG","FTAG"], f).print( Some(20) ); 77 | 78 | // Create new series as function of others 79 | // using helper functions to condense the code 80 | // Pandas equivalent: df['GoalDiff'] = df['FTHG'] - df['FTAG'] 81 | let series = table.extract_series(&["FTHG","FTAG"]); 82 | let gd_series = series[0].sub( "GoalDiff", &FlexDataType::Int, &series[1] ); 83 | table.add_series( gd_series ); 84 | 85 | // Pandas equivalent: print( df.head(10) ) 86 | table.print( Some(10) ); // print first 10 records only 87 | 88 | // Pandas equivalent: print( df.iloc[24,:] ) 89 | table[24].print(); 90 | 91 | // Subset selection 92 | table.get_subset( vec![FlexIndex::Uint(12), FlexIndex::Uint(30)]).print( None ); 93 | 94 | // Group by Hometeams 95 | for (k,v) in FlexTable::group_by(&table, "HomeTeam") { 96 | println!("{}", k); 97 | v.print( Some(5) ); 98 | break; 99 | } 100 | ``` 101 | 102 | Please refer to the `tests` folder for more usage examples. 103 | 104 | Bear in mind that this library is in early development so the interface could vary significantly over time. 105 | -------------------------------------------------------------------------------- /src/helper.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | use crate::{FlexData, FlexIndex, FlexDataType}; 3 | 4 | pub fn extract_csv_headers(text: &str) -> Vec { 5 | let buffer : Vec<&str> = text.lines().take(1).collect(); 6 | buffer[0].split(',') 7 | .map(String::from) 8 | .collect() 9 | } 10 | 11 | pub fn derive_datatype(data: &FlexData) -> FlexDataType { 12 | match data { 13 | FlexData::Uint(_) => FlexDataType::Uint, 14 | FlexData::Int(_) => FlexDataType::Int, 15 | FlexData::Dbl(_) => FlexDataType::Dbl, 16 | FlexData::Str(_) => FlexDataType::Str, 17 | FlexData::Char(_) => FlexDataType::Char, 18 | _ => FlexDataType::NA 19 | } 20 | } 21 | 22 | pub fn generate_flexdata_from_str(token: &str, datatype: &FlexDataType) -> FlexData { 23 | match datatype { 24 | FlexDataType::Dbl => token.parse::().map_or(FlexData::NA, FlexData::Dbl), 25 | FlexDataType::Int => token.parse::().map_or(FlexData::NA, FlexData::Int), 26 | FlexDataType::Uint => token.parse::().map_or(FlexData::NA, FlexData::Uint), 27 | FlexDataType::Char => token.parse::().map_or(FlexData::NA, FlexData::Char), 28 | _ => FlexData::Str( token.to_string() ) 29 | } 30 | } 31 | 32 | pub fn make_data_from_index(index: &FlexIndex) -> FlexData { 33 | match index { 34 | FlexIndex::Uint(val) => FlexData::Uint(*val as u32), 35 | FlexIndex::Str(val) => FlexData::Str(val.to_string()) 36 | } 37 | } 38 | 39 | pub fn make_index_from_data(data: &FlexData) -> FlexIndex { 40 | match data { 41 | FlexData::Uint(val) => FlexIndex::Uint(*val as usize), 42 | FlexData::Int(val) => FlexIndex::Uint(*val as usize), 43 | FlexData::Char(val) => FlexIndex::Str(format!("{}", val)), 44 | FlexData::Str(val) => FlexIndex::Str(val.to_string()), 45 | _ => panic!("FlexData::NA and FlexData::Dbl can not be indices") 46 | } 47 | } 48 | 49 | pub fn index_intersection(first: Vec<&FlexIndex>, other: Vec<&FlexIndex>) -> Vec { 50 | let set1 : HashSet = first.into_iter().cloned().collect(); 51 | let set2 : HashSet = other.into_iter().cloned().collect(); 52 | set1.intersection(&set2).cloned().collect() 53 | } 54 | 55 | pub fn convert(x: &FlexData, datatype: &FlexDataType) -> FlexData { 56 | match x { 57 | FlexData::Dbl( val ) => { 58 | match datatype { 59 | FlexDataType::Str => FlexData::Str( format!("{}", val) ), 60 | FlexDataType::Dbl => FlexData::Dbl( *val ), 61 | FlexDataType::Int => FlexData::Int( *val as i32 ), 62 | FlexDataType::Uint => FlexData::Uint( *val as u32 ), 63 | _ => FlexData::NA 64 | } 65 | }, 66 | FlexData::Uint( val ) => { 67 | match datatype { 68 | FlexDataType::Str => FlexData::Str( format!("{}", val) ), 69 | FlexDataType::Dbl => FlexData::Dbl( *val as f64 ), 70 | FlexDataType::Int => FlexData::Int( *val as i32 ), 71 | FlexDataType::Uint => FlexData::Uint( *val ), 72 | _ => FlexData::NA 73 | } 74 | }, 75 | FlexData::Int( val ) => { 76 | match datatype { 77 | FlexDataType::Str => FlexData::Str( format!("{}", val) ), 78 | FlexDataType::Dbl => FlexData::Dbl( *val as f64 ), 79 | FlexDataType::Int => FlexData::Int( *val ), 80 | FlexDataType::Uint => FlexData::Uint( *val as u32 ), 81 | _ => FlexData::NA 82 | } 83 | }, 84 | FlexData::Str( val ) => { 85 | match datatype { 86 | FlexDataType::Str => FlexData::Str(val.to_string()), 87 | _ => FlexData::NA 88 | } 89 | }, 90 | FlexData::Char( val ) => { 91 | match datatype { 92 | FlexDataType::Str => FlexData::Str( format!("{}", val) ), 93 | FlexDataType::Char => FlexData::Char( *val ), 94 | _ => FlexData::NA 95 | } 96 | }, 97 | _ => FlexData::NA 98 | } 99 | } 100 | 101 | pub fn inverse(x: &FlexData) -> FlexData { 102 | match x { 103 | FlexData::Dbl(val) => { 104 | if val != &0f64 { 105 | FlexData::Dbl(1.0 / val) 106 | } else { 107 | FlexData::NA 108 | } 109 | }, 110 | _ => FlexData::NA 111 | } 112 | } 113 | 114 | pub fn ln(x: &FlexData) -> FlexData { 115 | match x { 116 | FlexData::Dbl(val) => { 117 | if val > &0f64 { 118 | FlexData::Dbl( val.ln() ) 119 | } else { 120 | FlexData::NA 121 | } 122 | }, 123 | _ => FlexData::NA 124 | } 125 | } 126 | 127 | pub fn exp(x: &FlexData) -> FlexData { 128 | match x { 129 | FlexData::Dbl(val) => FlexData::Dbl( val.exp() ), 130 | _ => FlexData::NA 131 | } 132 | } 133 | 134 | pub fn sum(v: Vec) -> FlexData { 135 | let mut total = v[0].clone(); 136 | for elt in v.iter().skip(1) { 137 | total += elt.clone(); 138 | } 139 | total 140 | } -------------------------------------------------------------------------------- /src/series.rs: -------------------------------------------------------------------------------- 1 | use crate::DataPoint; 2 | use std::ops::*; 3 | use std::convert::From; 4 | 5 | #[derive(Debug, Clone)] 6 | pub struct Series { 7 | id: String, 8 | data: Vec>, 9 | opt_max_size: Option, 10 | counter: usize 11 | } 12 | 13 | impl Series { 14 | 15 | pub fn new(id: &str, opt_max_size: Option) -> Series { 16 | Series{ 17 | id: id.to_string(), 18 | data: Vec::>::new(), 19 | opt_max_size, 20 | counter: 0 21 | } 22 | } 23 | 24 | pub fn from_vec(id: &str, items: Vec>) -> Series { 25 | let mut ts = Series::new( id, None ); 26 | for item in items { 27 | ts.insert_update( item ); 28 | } 29 | ts 30 | } 31 | 32 | pub fn get_id(&self) -> &str { 33 | self.id.as_str() 34 | } 35 | 36 | /// Insert item or update if it already exists 37 | pub fn insert_update(&mut self, item: DataPoint) { 38 | if let Some(k) = self.data.iter().position(|x| x.get_index() == item.get_index() ) { 39 | self.data[k] = item; 40 | } else { 41 | self.data.push(item); 42 | self.data.sort(); 43 | if let Some( maxsize ) = self.opt_max_size { 44 | if self.data.len() > maxsize { 45 | self.data.remove(0); 46 | } 47 | } 48 | } 49 | } 50 | 51 | /// Get item corresponding to timesignature 52 | pub fn at(&self, index: &T, offset: i32) -> Option> { 53 | let optpos = self.data.iter().rposition(|x| x.get_index() == index); 54 | match optpos { 55 | Some( pos ) => { 56 | if offset > 0{ 57 | let newpos = pos + (offset as usize); 58 | if newpos < self.data.len() { 59 | Some( self.data[newpos].clone() ) 60 | } else { 61 | None 62 | } 63 | } else if pos >= (-offset as usize) { 64 | let newpos = pos - (-offset as usize); 65 | Some( self.data[newpos].clone() ) 66 | } else { 67 | None 68 | } 69 | }, 70 | None => None 71 | } 72 | } 73 | 74 | /// Get range with reference timesignature and startoffset 75 | pub fn range_at(&self, index: &T, size: usize, offset: i32) -> Vec> { 76 | let mut res : Vec> = Vec::new(); 77 | for i in 0..size { 78 | if let Some( data ) = self.at(index, offset + (i as i32)) { 79 | res.push( data ); 80 | } 81 | } 82 | res 83 | } 84 | 85 | /// Get range from start to end inclusive 86 | pub fn range(&self, start: i32, end: i32) -> Vec> { 87 | let is : usize = if start >= 0 { start as usize } else { self.data.len() - start.unsigned_abs() as usize }; 88 | let ie : usize = if end >= 0 { end as usize } else { self.data.len() - end.unsigned_abs() as usize }; 89 | let mut res : Vec> = Vec::new(); 90 | for i in is..=ie { 91 | res.push( self[i].clone() ) 92 | } 93 | res 94 | } 95 | 96 | } 97 | 98 | impl + Copy> Series { 99 | 100 | /// Insert item or add data if it exists 101 | pub fn insert_add(&mut self, item: DataPoint) { 102 | if let Some(k) = self.data.iter().position(|x| x.get_index() == item.get_index() ) { 103 | let val = self.data[k].get().to_owned(); 104 | self.data[k].set( val + *item.get() ); 105 | } else { 106 | self.data.push(item); 107 | self.data.sort(); 108 | if let Some( maxsize ) = self.opt_max_size { 109 | if self.data.len() > maxsize { 110 | self.data.remove(0); 111 | } 112 | } 113 | } 114 | } 115 | 116 | } 117 | 118 | impl + Copy> Series { 119 | 120 | /// Create the Series of its cumulated sum 121 | pub fn cumsum(&self) -> Series { 122 | let mut running_total : U = 0.into(); 123 | let mut ts = Series::::new( self.id.as_str(), self.opt_max_size ); 124 | for mut dp in self.clone() { 125 | running_total += *dp.get(); 126 | dp.set( running_total ); 127 | ts.insert_update( dp ); 128 | } 129 | ts 130 | } 131 | 132 | } 133 | 134 | // Implement [] operator 135 | impl Index for Series { 136 | type Output = DataPoint; 137 | fn index(&self, index: i32) -> &DataPoint { 138 | if index >= 0 { 139 | &self.data[index as usize] 140 | } else { 141 | &self.data[self.data.len() - (-index as usize)] 142 | } 143 | } 144 | } 145 | 146 | impl Index for Series { 147 | type Output = DataPoint; 148 | fn index(&self, index: usize) -> &DataPoint { 149 | &self.data[index] 150 | } 151 | } 152 | 153 | 154 | impl Iterator for Series { 155 | type Item = DataPoint; 156 | 157 | fn next(&mut self) -> Option { 158 | if self.counter < self.data.len() { 159 | self.counter += 1; 160 | Some( self.data[self.counter - 1].clone() ) 161 | } else { 162 | None 163 | } 164 | } 165 | } -------------------------------------------------------------------------------- /src/flexdata.rs: -------------------------------------------------------------------------------- 1 | use std::ops::*; 2 | use std::convert::TryFrom; 3 | use std::iter::Sum; 4 | use crate::helper::derive_datatype; 5 | 6 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] 7 | pub enum FlexDataType { 8 | Str, 9 | Uint, 10 | Int, 11 | Dbl, 12 | Char, 13 | NA 14 | } 15 | 16 | #[derive(Debug, Serialize, Deserialize, PartialEq, PartialOrd, Clone)] 17 | pub enum FlexData { 18 | Str(String), 19 | Uint(u32), 20 | Int(i32), 21 | Dbl(f64), 22 | Char(char), 23 | NA 24 | } 25 | 26 | impl From for FlexData { 27 | fn from(value: String) -> FlexData { 28 | FlexData::Str(value) 29 | } 30 | } 31 | 32 | impl From for FlexData { 33 | fn from(value: u32) -> FlexData { 34 | FlexData::Uint(value) 35 | } 36 | } 37 | 38 | impl From for FlexData { 39 | fn from(value: i32) -> FlexData { 40 | FlexData::Int(value) 41 | } 42 | } 43 | 44 | impl From for FlexData { 45 | fn from(value: f64) -> FlexData { 46 | FlexData::Dbl(value) 47 | } 48 | } 49 | 50 | impl From for FlexData { 51 | fn from(value: char) -> FlexData { 52 | FlexData::Char(value) 53 | } 54 | } 55 | 56 | // Into Implementation 57 | 58 | impl TryFrom<&FlexData> for String { 59 | type Error = &'static str; 60 | fn try_from(value: &FlexData) -> Result { 61 | match value { 62 | FlexData::Str(v) => Ok(v.to_string()), 63 | _ => Err("Only FlexData::Str can be extracted to String") 64 | } 65 | } 66 | } 67 | 68 | impl TryFrom<&FlexData> for f64 { 69 | type Error = &'static str; 70 | fn try_from(value: &FlexData) -> Result { 71 | match value { 72 | FlexData::Dbl(v) => Ok(*v), 73 | _ => Err("Only FlexData::Dbl can be extracted to f64") 74 | } 75 | } 76 | } 77 | 78 | impl TryFrom<&FlexData> for u32 { 79 | type Error = &'static str; 80 | fn try_from(value: &FlexData) -> Result { 81 | match value { 82 | FlexData::Uint(v) => Ok(*v), 83 | _ => Err("Only FlexData::Uint can be extracted to u32") 84 | } 85 | } 86 | } 87 | 88 | impl TryFrom<&FlexData> for i32 { 89 | type Error = &'static str; 90 | fn try_from(value: &FlexData) -> Result { 91 | match value { 92 | FlexData::Int(v) => Ok(*v), 93 | _ => Err("Only FlexData::Int can be extracted to i32") 94 | } 95 | } 96 | } 97 | 98 | // Operators 99 | 100 | impl Add for &FlexData { 101 | type Output = FlexData; 102 | fn add(self, other: &FlexData) -> Self::Output { 103 | match self { 104 | FlexData::Dbl(val) => { 105 | match other { 106 | FlexData::Dbl(other_val) => FlexData::Dbl(val + other_val), 107 | _ => FlexData::NA 108 | } 109 | }, 110 | FlexData::Uint(val) => { 111 | match other { 112 | FlexData::Uint(other_val) => FlexData::Uint(val + other_val), 113 | _ => FlexData::NA 114 | } 115 | }, 116 | FlexData::Int(val) => { 117 | match other { 118 | FlexData::Int(other_val) => FlexData::Int(val + other_val), 119 | _ => FlexData::NA 120 | } 121 | }, 122 | _ => FlexData::NA 123 | } 124 | } 125 | } 126 | 127 | impl Sub for &FlexData { 128 | type Output = FlexData; 129 | fn sub(self, other: &FlexData) -> Self::Output { 130 | match self { 131 | FlexData::Dbl(val) => { 132 | match other { 133 | FlexData::Dbl(other_val) => FlexData::Dbl(val - other_val), 134 | _ => FlexData::NA 135 | } 136 | }, 137 | FlexData::Uint(val) => { 138 | match other { 139 | FlexData::Uint(other_val) => FlexData::Uint(val - other_val), 140 | _ => FlexData::NA 141 | } 142 | }, 143 | FlexData::Int(val) => { 144 | match other { 145 | FlexData::Int(other_val) => FlexData::Int(val - other_val), 146 | _ => FlexData::NA 147 | } 148 | }, 149 | _ => FlexData::NA 150 | } 151 | } 152 | } 153 | 154 | impl Mul for &FlexData { 155 | type Output = FlexData; 156 | fn mul(self, other: &FlexData) -> Self::Output { 157 | match self { 158 | FlexData::Dbl(val) => { 159 | match other { 160 | FlexData::Dbl(other_val) => FlexData::Dbl(val * other_val), 161 | _ => FlexData::NA 162 | } 163 | }, 164 | FlexData::Uint(val) => { 165 | match other { 166 | FlexData::Uint(other_val) => FlexData::Uint(val * other_val), 167 | _ => FlexData::NA 168 | } 169 | }, 170 | FlexData::Int(val) => { 171 | match other { 172 | FlexData::Int(other_val) => FlexData::Int(val * other_val), 173 | _ => FlexData::NA 174 | } 175 | }, 176 | _ => FlexData::NA 177 | } 178 | } 179 | } 180 | 181 | impl Div for &FlexData { 182 | type Output = FlexData; 183 | fn div(self, other: &FlexData) -> Self::Output { 184 | match self { 185 | FlexData::Dbl(val) => { 186 | match other { 187 | FlexData::Dbl(other_val) => { 188 | if other_val != &0f64 { 189 | FlexData::Dbl(val / other_val) 190 | } else { 191 | FlexData::NA 192 | } 193 | }, 194 | _ => FlexData::NA 195 | } 196 | }, 197 | FlexData::Int(val) => { 198 | match other { 199 | FlexData::Int(other_val) => { 200 | if other_val != &0 { 201 | FlexData::Int(val / other_val) 202 | } else { 203 | FlexData::NA 204 | } 205 | }, 206 | _ => FlexData::NA 207 | } 208 | }, 209 | FlexData::Uint(val) => { 210 | match other { 211 | FlexData::Uint(other_val) => { 212 | if other_val != &0 { 213 | FlexData::Uint(val / other_val) 214 | } else { 215 | FlexData::NA 216 | } 217 | }, 218 | _ => FlexData::NA 219 | } 220 | }, 221 | _ => FlexData::NA 222 | } 223 | } 224 | } 225 | 226 | impl AddAssign for FlexData { 227 | fn add_assign(&mut self, other: FlexData) { 228 | *self = match self { 229 | FlexData::Dbl(val) => { 230 | match other { 231 | FlexData::Dbl(other_val) => FlexData::Dbl(*val + other_val), 232 | _ => FlexData::NA 233 | } 234 | }, 235 | FlexData::Uint(val) => { 236 | match other { 237 | FlexData::Uint(other_val) => FlexData::Uint(*val + other_val), 238 | _ => FlexData::NA 239 | } 240 | }, 241 | FlexData::Int(val) => { 242 | match other { 243 | FlexData::Int(other_val) => FlexData::Int(*val + other_val), 244 | _ => FlexData::NA 245 | } 246 | }, 247 | _ => FlexData::NA 248 | } 249 | } 250 | } 251 | 252 | impl SubAssign for FlexData { 253 | fn sub_assign(&mut self, other: FlexData) { 254 | *self = match self { 255 | FlexData::Dbl(val) => { 256 | match other { 257 | FlexData::Dbl(other_val) => FlexData::Dbl(*val - other_val), 258 | _ => FlexData::NA 259 | } 260 | }, 261 | FlexData::Uint(val) => { 262 | match other { 263 | FlexData::Uint(other_val) => FlexData::Uint(*val - other_val), 264 | _ => FlexData::NA 265 | } 266 | }, 267 | FlexData::Int(val) => { 268 | match other { 269 | FlexData::Int(other_val) => FlexData::Int(*val - other_val), 270 | _ => FlexData::NA 271 | } 272 | }, 273 | _ => FlexData::NA 274 | } 275 | } 276 | } 277 | 278 | impl Sum for FlexData { 279 | fn sum(iter: I) -> FlexData 280 | where I: Iterator { 281 | let mut total = FlexData::NA; 282 | for d in iter { 283 | if derive_datatype( &total ) == FlexDataType::NA { 284 | total = d; 285 | } else { 286 | total += d; 287 | } 288 | } 289 | total 290 | } 291 | } -------------------------------------------------------------------------------- /src/flexseries.rs: -------------------------------------------------------------------------------- 1 | use crate::{FlexDataType, FlexDataPoint, FlexData, FlexIndex}; 2 | use crate::helper::{convert, index_intersection}; 3 | use std::collections::HashMap; 4 | use std::convert::TryFrom; 5 | use std::ops::*; 6 | use prettytable::{Table, Row, Cell}; 7 | 8 | #[derive(Debug, Serialize, Deserialize, Clone)] 9 | pub struct FlexSeries { 10 | iter_counter: usize, 11 | label: String, 12 | datatype: FlexDataType, 13 | data: Vec, 14 | index_to_pos: HashMap 15 | } 16 | 17 | impl FlexSeries { 18 | 19 | pub fn new(label: &str, datatype: FlexDataType) -> Self { 20 | Self { 21 | iter_counter: 0, 22 | label: label.to_string(), 23 | datatype, 24 | data: Vec::new(), 25 | index_to_pos: HashMap::new() 26 | } 27 | } 28 | 29 | pub fn from_vec(label: &str, datatype: FlexDataType, data: Vec) -> Self { 30 | let mod_data : Vec = data.into_iter() 31 | .map(|d| d.as_type(&datatype) ) 32 | .collect(); 33 | let mut index_to_pos : HashMap = HashMap::new(); 34 | for (i,fdp) in mod_data.iter().enumerate() { 35 | index_to_pos.insert( fdp.get_index().clone(), i); 36 | } 37 | Self { 38 | iter_counter: 0, 39 | label: label.to_string(), 40 | datatype, 41 | data: mod_data, 42 | index_to_pos 43 | } 44 | } 45 | 46 | // Getters and setters 47 | 48 | pub fn get_label(&self) -> &str { 49 | self.label.as_str() 50 | } 51 | 52 | pub fn set_label(&mut self, label: &str) { 53 | self.label = label.to_string(); 54 | } 55 | 56 | pub fn get_datatype(&self) -> &FlexDataType { 57 | &self.datatype 58 | } 59 | 60 | pub fn set_datatype(&mut self, datatype: FlexDataType) { 61 | self.datatype = datatype; 62 | } 63 | 64 | pub fn get_size(&self) -> usize { 65 | self.data.len() 66 | } 67 | 68 | pub fn get_indices(&self) -> Vec<&FlexIndex> { 69 | self.data.iter() 70 | .map(|fdp| fdp.get_index()) 71 | .collect() 72 | } 73 | 74 | pub fn get_data(&self) -> Vec<&FlexData> { 75 | self.data.iter() 76 | .map(|fdp| fdp.get_data()) 77 | .collect() 78 | } 79 | 80 | // Selecting 81 | 82 | pub fn at(&self, index: &FlexIndex) -> Option<&FlexDataPoint> { 83 | self.index_to_pos.get( index ) 84 | .map(|&pos| &self.data[pos] ) 85 | } 86 | 87 | pub fn contains(&self, index: &FlexIndex) -> bool { 88 | self.index_to_pos.contains_key( index ) 89 | } 90 | 91 | pub fn get_subset(&self, indices: Vec) -> Self { 92 | let records : Vec = indices.into_iter() 93 | .filter_map(|index| self.at( &index )) 94 | .cloned() 95 | .collect(); 96 | Self::from_vec( self.get_label(), self.get_datatype().clone(), records ) 97 | } 98 | 99 | // Data operations 100 | 101 | pub fn update(&mut self, data: FlexDataPoint) { 102 | if let Some( &i ) = self.index_to_pos.get( data.get_index() ) { 103 | self.data[i] = data.as_type(&self.datatype); 104 | } 105 | } 106 | 107 | pub fn insert(&mut self, data: FlexDataPoint) { 108 | self.index_to_pos.insert(data.get_index().clone(), self.data.len()); 109 | self.data.push(data.as_type(&self.datatype)); 110 | } 111 | 112 | pub fn insert_update(&mut self, data: FlexDataPoint) { 113 | if let Some( &i ) = self.index_to_pos.get( data.get_index() ) { 114 | self.data[i] = data.as_type(&self.datatype); 115 | } else { 116 | self.index_to_pos.insert(data.get_index().clone(), self.data.len()); 117 | self.data.push(data.as_type(&self.datatype)); 118 | } 119 | } 120 | 121 | pub fn remove(&mut self, k: usize) { 122 | self.index_to_pos.remove( self.data[k].get_index() ); 123 | self.data.remove(k); 124 | } 125 | 126 | pub fn remove_at(&mut self, index: &FlexIndex) { 127 | if let Some( &i ) = self.index_to_pos.get( index ) { 128 | self.index_to_pos.remove( index ); 129 | self.data.remove(i); 130 | } 131 | } 132 | 133 | // Transformation 134 | 135 | pub fn as_type(&self, datatype: &FlexDataType) -> Self { 136 | let data : Vec = self.data.iter() 137 | .map(|d| d.as_type(datatype)) 138 | .collect(); 139 | Self::from_vec(self.label.as_str(), self.datatype.clone(), data) 140 | } 141 | 142 | pub fn align_to(&self, indices: &[FlexIndex]) -> Self { 143 | let mut series = self.clone(); 144 | for index in indices.iter() { 145 | if !series.contains( index ) { 146 | series.insert( FlexDataPoint::new((*index).clone(), FlexData::NA) ); 147 | } 148 | } 149 | series 150 | } 151 | 152 | pub fn apply(&self, f: impl Fn(&FlexData) -> FlexData) -> Self { 153 | let data = self.data.iter() 154 | .map(|dp| dp.apply(&f)) 155 | .collect(); 156 | Self::from_vec(self.label.as_str(), self.datatype.clone(), data) 157 | } 158 | 159 | // Filtering 160 | 161 | pub fn filter_any(&self, f: impl Fn(&FlexData) -> bool) -> Self { 162 | let data : Vec = self.data.iter() 163 | .filter(|d| f(d.get_data())) 164 | .cloned() 165 | .collect(); 166 | Self::from_vec(self.label.as_str(), self.datatype.clone(), data) 167 | } 168 | 169 | // NA management 170 | 171 | pub fn has_na(&self) -> bool { 172 | self.data.iter() 173 | .any(|fdp| fdp.get_data() == &FlexData::NA ) 174 | } 175 | 176 | pub fn get_na(&self) -> Self { 177 | self.filter_any(|x: &FlexData| x == &FlexData::NA) 178 | } 179 | 180 | pub fn drop_na(&self) -> Self { 181 | self.filter_any(|x: &FlexData| x != &FlexData::NA) 182 | } 183 | 184 | // Statistics 185 | 186 | pub fn mean(&self) -> Option { 187 | if self.get_size() == 0 { 188 | None 189 | } else { 190 | let n = self.get_size() as f64; 191 | match self.clone().map(|dp| dp.get_data().clone()).sum() { 192 | FlexData::Int(val) => Some( (val as f64) / n ), 193 | FlexData::Uint(val) => Some( (val as f64) / n ), 194 | FlexData::Dbl(val) => Some( val / n ), 195 | _ => None 196 | } 197 | } 198 | } 199 | 200 | pub fn covariance(&self, other: &Self, is_sample: bool) -> Option { 201 | let intersect = index_intersection(self.get_indices(), other.get_indices()); 202 | if intersect.len() <= 1 { 203 | None 204 | } else { 205 | let float_series1 = self.as_type(&FlexDataType::Dbl); 206 | let float_series2 = other.as_type(&FlexDataType::Dbl); 207 | let mut m1 = 0.0f64; 208 | let mut m2 = 0.0f64; 209 | let mut res = 0.0f64; 210 | let mut n = 0.0f64; 211 | for idx in intersect.into_iter() { 212 | let x1 = f64::try_from( float_series1.at(&idx).unwrap().get_data() ).unwrap(); 213 | let x2 = f64::try_from( float_series2.at(&idx).unwrap().get_data() ).unwrap(); 214 | n += 1.0; 215 | let dx1 = x1 - m1; 216 | m1 += dx1 / n; 217 | m2 += (x2 - m2) / n; 218 | res += dx1 * (x2 - m2); 219 | } 220 | if is_sample { 221 | Some( res / (n - 1.0) ) 222 | } else { 223 | Some( res / n) 224 | } 225 | } 226 | } 227 | 228 | pub fn variance(&self, is_sample: bool) -> Option { 229 | if self.get_size() <= 1 { 230 | None 231 | } else { 232 | let float_series = self.as_type(&FlexDataType::Dbl); 233 | let mut m = 0.0f64; 234 | let mut res = 0.0f64; 235 | let mut n = 0.0f64; 236 | for idx in self.get_indices() { 237 | let x = f64::try_from( float_series.at(idx).unwrap().get_data() ).unwrap(); 238 | n += 1.0; 239 | let dx1 = x - m; 240 | m += dx1 / n; 241 | res += dx1 * (x - m); 242 | } 243 | if is_sample { 244 | Some( res / (n - 1.0) ) 245 | } else { 246 | Some( res / n) 247 | } 248 | } 249 | } 250 | 251 | pub fn pearson_correlation(&self, other: &Self) -> Option { 252 | let intersect = index_intersection(self.get_indices(), other.get_indices()); 253 | if intersect.len() <= 1 { 254 | None 255 | } else { 256 | let float_series1 = self.as_type(&FlexDataType::Dbl); 257 | let float_series2 = other.as_type(&FlexDataType::Dbl); 258 | let mut m1 = 0.0f64; 259 | let mut m2 = 0.0f64; 260 | let mut m12 = 0.0f64; 261 | let mut cov = 0.0f64; 262 | let mut v1 = 0.0f64; 263 | let mut v2 = 0.0f64; 264 | let mut n = 0.0f64; 265 | for idx in intersect.into_iter() { 266 | let x1 = f64::try_from( float_series1.at(&idx).unwrap().get_data() ).unwrap(); 267 | let x2 = f64::try_from( float_series2.at(&idx).unwrap().get_data() ).unwrap(); 268 | let dx1 = x1 - m1; 269 | let dx2 = x2 - m2; 270 | n += 1.0; 271 | m1 += dx1 / n; 272 | m2 += dx2 / n; 273 | m12 += (x2 - m12) / n; 274 | cov += dx1 * (x2 - m12); 275 | v1 += dx1 * (x1 - m1); 276 | v2 += dx2 * (x2 - m2); 277 | } 278 | Some( cov / (v1.sqrt() * v2.sqrt()) ) 279 | } 280 | } 281 | 282 | // Sorting 283 | 284 | pub fn sort(&self, ascending: bool) -> Self { 285 | let mut data = self.data.clone(); 286 | if ascending { 287 | data.sort_by(|a,b| a.partial_cmp(b).unwrap() ); 288 | } else { 289 | data.sort_by(|a,b| b.partial_cmp(a).unwrap() ); 290 | } 291 | FlexSeries::from_vec(self.label.as_str(), self.datatype.clone(), data) 292 | } 293 | 294 | // Pretty print 295 | 296 | pub fn print(&self, max_size: Option) { 297 | let size = max_size.map(|val| val.min(self.get_size()) ).unwrap_or( self.get_size() ); 298 | let mut table = Table::new(); 299 | table.add_row(Row::new(vec![ 300 | Cell::new(""), 301 | Cell::new(self.label.as_str()) 302 | ])); 303 | let type_cell = match self.datatype { 304 | FlexDataType::Dbl => Cell::new("f64"), 305 | FlexDataType::Uint => Cell::new("usize"), 306 | FlexDataType::Int => Cell::new("isize"), 307 | FlexDataType::Char => Cell::new("char"), 308 | FlexDataType::Str => Cell::new("str"), 309 | FlexDataType::NA => Cell::new("n/a") 310 | }; 311 | table.add_row(Row::new(vec![Cell::new(""), type_cell])); 312 | for i in 0..size { 313 | let index_cell = match self[i].get_index() { 314 | FlexIndex::Uint(val) => Cell::new( format!("{}", val).as_str() ), 315 | FlexIndex::Str(val) => Cell::new( val.as_str() ) 316 | }; 317 | let data_cell = match self[i].get_data() { 318 | FlexData::Str(val) => Cell::new( val.as_str() ), 319 | FlexData::Dbl(val) => Cell::new( format!("{:.5}", val).as_str() ), 320 | FlexData::Uint(val) => Cell::new( format!("{}", val).as_str() ), 321 | FlexData::Int(val) => Cell::new( format!("{}", val).as_str() ), 322 | FlexData::Char(val) => Cell::new( format!("{}", val).as_str() ), 323 | FlexData::NA => Cell::new( "N/A" ) 324 | }; 325 | table.add_row(Row::new(vec![index_cell,data_cell])); 326 | } 327 | // Print the table to stdout 328 | table.printstd(); 329 | } 330 | 331 | // Operations 332 | 333 | pub fn add(&self, label: &str, datatype: &FlexDataType, other: &Self) -> Self { 334 | let mut data : Vec = Vec::new(); 335 | for idx in index_intersection(self.get_indices().clone(), other.get_indices().clone()).into_iter() { 336 | let fdv1 = self.at( &idx ).unwrap(); 337 | let fdv2 = other.at( &idx ).unwrap(); 338 | let val = &convert( fdv1.get_data(), datatype ) + &convert( fdv2.get_data(), datatype ); 339 | data.push( FlexDataPoint::new(idx, val) ); 340 | } 341 | Self::from_vec(label, datatype.clone(), data) 342 | } 343 | 344 | pub fn sub(&self, label: &str, datatype: &FlexDataType, other: &Self) -> Self { 345 | let mut data : Vec = Vec::new(); 346 | for idx in index_intersection(self.get_indices().clone(), other.get_indices().clone()).into_iter() { 347 | let fdv1 = self.at( &idx ).unwrap(); 348 | let fdv2 = other.at( &idx ).unwrap(); 349 | let val = &convert( fdv1.get_data(), datatype ) - &convert( fdv2.get_data(), datatype ); 350 | data.push( FlexDataPoint::new(idx, val) ); 351 | } 352 | Self::from_vec(label, datatype.clone(), data) 353 | } 354 | 355 | pub fn prod(&self, label: &str, datatype: &FlexDataType, other: &Self) -> Self { 356 | let mut data : Vec = Vec::new(); 357 | for idx in index_intersection(self.get_indices().clone(), other.get_indices().clone()).into_iter() { 358 | let fdv1 = self.at( &idx ).unwrap(); 359 | let fdv2 = other.at( &idx ).unwrap(); 360 | let val = &convert( fdv1.get_data(), datatype ) * &convert( fdv2.get_data(), datatype ); 361 | data.push( FlexDataPoint::new(idx, val) ); 362 | } 363 | Self::from_vec(label, datatype.clone(), data) 364 | } 365 | } 366 | 367 | // Implement [] operator 368 | 369 | impl Index for FlexSeries { 370 | type Output = FlexDataPoint; 371 | fn index(&self, index: usize) -> &FlexDataPoint { 372 | &self.data[index] 373 | } 374 | } 375 | 376 | impl Index for FlexSeries { 377 | type Output = FlexDataPoint; 378 | fn index(&self, index: i32) -> &FlexDataPoint { 379 | if index >= 0 { 380 | &self.data[index as usize] 381 | } else { 382 | &self.data[self.data.len() - (-index as usize)] 383 | } 384 | } 385 | } 386 | 387 | impl Index for FlexSeries { 388 | type Output = FlexDataPoint; 389 | fn index(&self, index: u32) -> &FlexDataPoint { 390 | &self.data[index as usize] 391 | } 392 | } 393 | 394 | impl Iterator for FlexSeries { 395 | type Item = FlexDataPoint; 396 | 397 | fn next(&mut self) -> Option { 398 | if self.iter_counter < self.get_size() { 399 | let dv = self.data[self.iter_counter].clone(); 400 | self.iter_counter += 1; 401 | Some( dv ) 402 | } else { 403 | self.iter_counter = 0; 404 | None 405 | } 406 | } 407 | } -------------------------------------------------------------------------------- /src/flextable.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::io::Write; 3 | use std::ops::*; 4 | use std::convert::TryFrom; 5 | use std::iter::Iterator; 6 | use rayon::prelude::*; 7 | use prettytable::{Table, Row, Cell}; 8 | 9 | use std::sync::{Arc, Mutex}; 10 | 11 | use crate::helper::{convert, generate_flexdata_from_str, extract_csv_headers, make_index_from_data}; 12 | use crate::{FlexDataType, FlexData, FlexIndex, FlexDataPoint, FlexDataVector, FlexSeries}; 13 | 14 | #[derive(Debug, Serialize, Deserialize, Clone)] 15 | pub struct FlexTable { 16 | iter_counter: usize, 17 | labels: Vec, 18 | datatypes: Vec, 19 | data: Vec, 20 | label_to_pos: HashMap, 21 | index_to_pos: HashMap 22 | } 23 | 24 | impl FlexTable { 25 | 26 | // Constructors 27 | 28 | pub fn new( series: Vec ) -> Self { 29 | assert!( series.iter().map(|s| s.get_size()).min() == series.iter().map(|s| s.get_size()).max() ); 30 | let mut data : Vec = Vec::new(); 31 | for i in 0..series[0].get_size() { 32 | let index = series[0][i].get_index().clone(); 33 | let fds : Vec = series.iter() 34 | .map(|s| s[i].get_data().clone() ) 35 | .collect(); 36 | data.push( FlexDataVector::new( index, fds ) ); 37 | } 38 | let mut index_to_pos : HashMap = HashMap::new(); 39 | for (i,fdp) in data.iter().enumerate() { 40 | index_to_pos.insert( fdp.get_index().clone(), i); 41 | } 42 | let mut label_to_pos : HashMap = HashMap::new(); 43 | for (i,s) in series.iter().enumerate() { 44 | label_to_pos.insert( s.get_label().to_string(), i); 45 | } 46 | Self { 47 | iter_counter: 0, 48 | labels: series.iter().map(|s| s.get_label().to_string()).collect(), 49 | datatypes: series.iter().map(|s| s.get_datatype().clone()).collect(), 50 | data, 51 | label_to_pos, 52 | index_to_pos 53 | } 54 | } 55 | 56 | pub fn from_vecs( labels: Vec, datatypes: Vec, data: Vec ) -> Self { 57 | let mod_data : Vec = data.into_iter() 58 | .map(|d| d.as_types(&datatypes)) 59 | .collect(); 60 | let mut index_to_pos : HashMap = HashMap::new(); 61 | for (i,fdp) in mod_data.iter().enumerate() { 62 | index_to_pos.insert( fdp.get_index().clone(), i); 63 | } 64 | let mut label_to_pos : HashMap = HashMap::new(); 65 | for (i,l) in labels.iter().enumerate() { 66 | label_to_pos.insert( l.to_string(), i); 67 | } 68 | Self{ 69 | iter_counter: 0, 70 | labels, 71 | datatypes, 72 | data: mod_data, 73 | label_to_pos, 74 | index_to_pos 75 | } 76 | } 77 | 78 | pub fn from_csv(text: &str, headers: Vec, datatypes: Vec) -> Self { 79 | let mut filtered_text = text.to_string(); 80 | filtered_text.retain(|c| c != '"'); 81 | 82 | // Define header positions and series 83 | let raw_headers = extract_csv_headers(filtered_text.as_str()); 84 | 85 | let header_positions : Vec = headers.iter() 86 | .filter_map(|header| raw_headers.iter().position(|token| token == header)) 87 | .collect(); 88 | 89 | let mut datavectors : Vec = Vec::new(); 90 | let mut counter = 0; 91 | for line in filtered_text.lines().skip(1) { 92 | let tokens : Vec<&str> = line.split(',').collect(); 93 | let data : Vec = header_positions.iter() 94 | .enumerate() 95 | .map(|(i,&k)| generate_flexdata_from_str( tokens[k], &datatypes[i] ) ) 96 | .collect(); 97 | datavectors.push( FlexDataVector::new( FlexIndex::Uint(counter), data ) ); 98 | counter += 1; 99 | } 100 | Self::from_vecs( headers, datatypes, datavectors ) 101 | } 102 | 103 | pub fn to_csv(&self, filepath: &str) { 104 | let mut file = std::fs::File::create(filepath).expect("File creation failed"); 105 | file.write_all(",".as_bytes()).expect("Writing failed"); 106 | file.write_all(self.labels.join(",").as_bytes()).expect("Writing failed"); 107 | file.write_all("\n".to_string().as_bytes()).expect("Writing failed"); 108 | for i in 0..self.num_records() { 109 | let mut row : Vec = Vec::new(); 110 | let cell = match self.data[i].get_index() { 111 | FlexIndex::Uint(val) => format!("{}", val), 112 | FlexIndex::Str(val) => val.clone() 113 | }; 114 | row.push(cell); 115 | for d in self.data[i].get_data() { 116 | let cell = match d { 117 | FlexData::Str(val) => val.clone(), 118 | FlexData::Dbl(val) => format!("{:.5}", val), 119 | FlexData::Uint(val) => format!("{}", val), 120 | FlexData::Int(val) => format!("{}", val), 121 | FlexData::Char(val) => format!("{}", val), 122 | FlexData::NA => "N/A".to_string() 123 | }; 124 | row.push(cell); 125 | } 126 | file.write_all(row.join(",").as_bytes()).expect("Writing failed"); 127 | file.write_all("\n".to_string().as_bytes()).expect("Writing failed"); 128 | } 129 | } 130 | 131 | // Getters 132 | 133 | pub fn get_labels(&self) -> &Vec { 134 | &self.labels 135 | } 136 | 137 | pub fn get_datatypes(&self) -> &Vec { 138 | &self.datatypes 139 | } 140 | 141 | pub fn get_indices(&self) -> Vec { 142 | self.index_to_pos.keys().cloned().collect() 143 | } 144 | 145 | pub fn num_records(&self) -> usize { 146 | self.data.len() 147 | } 148 | 149 | pub fn num_series(&self) -> usize { 150 | self.datatypes.len() 151 | } 152 | 153 | // Selecting 154 | 155 | pub fn at(&self, index: &FlexIndex) -> Option { 156 | self.index_to_pos.get( index ).map(|pos| self.data[*pos].clone()) 157 | } 158 | 159 | pub fn contains(&self, index: &FlexIndex) -> bool { 160 | self.index_to_pos.contains_key( index ) 161 | } 162 | 163 | pub fn get_subset(&self, indices: Vec) -> Self { 164 | let records : Vec = indices.into_iter() 165 | .filter_map(|index| self.at(&index)) 166 | .collect(); 167 | Self::from_vecs( self.labels.clone(), self.datatypes.clone(), records ) 168 | } 169 | 170 | pub fn extract_series(&self, labels: &[&str]) -> Vec { 171 | let res : Vec = labels.iter() 172 | .map(|&label| { 173 | let pos = self.label_to_pos.get(label).unwrap(); 174 | let data : Vec = self.data.par_iter() 175 | .map(|v| { 176 | FlexDataPoint::new( v.get_index().clone(), v.get_data()[*pos].clone() ) 177 | }) 178 | .collect(); 179 | FlexSeries::from_vec(label, self.datatypes[*pos].clone(), data) 180 | }) 181 | .collect(); 182 | res 183 | } 184 | 185 | pub fn extract_all_series(&self) -> HashMap { 186 | let mut res : HashMap = HashMap::new(); 187 | self.labels.iter() 188 | .for_each(|label| { 189 | let pos = self.label_to_pos.get(label).unwrap(); 190 | let data : Vec = self.data.par_iter() 191 | .map(|v| FlexDataPoint::new( v.get_index().clone(), v.get_data()[*pos].clone() )) 192 | .collect(); 193 | res.insert( label.clone(), FlexSeries::from_vec(label, self.datatypes[*pos].clone(), data) ); 194 | }); 195 | res 196 | } 197 | 198 | // Modifiers 199 | 200 | pub fn add_series(&mut self, series: FlexSeries) { 201 | let adj_series = series.align_to( &self.get_indices() ); 202 | self.labels.push( adj_series.get_label().to_string() ); 203 | self.label_to_pos.insert( adj_series.get_label().to_string(), self.labels.len() - 1); 204 | self.datatypes.push( adj_series.get_datatype().clone() ); 205 | let mod_data : Vec = self.data.iter() 206 | .zip(adj_series.get_data()) 207 | .map(|(dv,dp)| { 208 | let mut v = dv.get_data().clone(); 209 | v.push( dp.clone() ); 210 | FlexDataVector::new(dv.get_index().clone(), v) 211 | }) 212 | .collect(); 213 | self.data = mod_data; 214 | } 215 | 216 | pub fn remove_record(&mut self, k: usize) { 217 | self.index_to_pos.remove( self.data[k].get_index() ); 218 | self.data.remove(k); 219 | } 220 | 221 | pub fn remove_record_at(&mut self, index: &FlexIndex) { 222 | if let Some( &i ) = self.index_to_pos.get( index ) { 223 | self.index_to_pos.remove( index ); 224 | self.data.remove(i); 225 | } 226 | } 227 | 228 | pub fn set_index(&mut self, label: &str) { 229 | let pos = self.label_to_pos.get(label).unwrap(); 230 | let mod_data : Vec = self.data.iter() 231 | .map(|v| { 232 | let mut data = v.get_data().clone(); 233 | let index = make_index_from_data( &data[*pos] ); 234 | data.remove( *pos ); 235 | FlexDataVector::new(index, data) 236 | }) 237 | .collect(); 238 | self.data = mod_data; 239 | self.label_to_pos.remove(label); 240 | } 241 | 242 | // Filtering 243 | 244 | pub fn filter_all(&self, labels: &[&str], f: impl Fn(&FlexData) -> bool) -> Self { 245 | let mut records : Vec = Vec::new(); 246 | for k in 0..self.num_records() { 247 | if labels.iter() 248 | .all(|&l| { 249 | if let Some( &pos ) = self.label_to_pos.get( l ) { 250 | f( &self.data[k].get_data()[pos] ) 251 | } else { 252 | false 253 | } 254 | }) { 255 | records.push( self.data[k].clone() ); 256 | } 257 | } 258 | Self::from_vecs( self.labels.clone(), self.datatypes.clone(), records ) 259 | } 260 | 261 | pub fn filter_any(&self, labels: &[&str], f: impl Fn(&FlexData) -> bool) -> Self { 262 | let mut records : Vec = Vec::new(); 263 | for k in 0..self.num_records() { 264 | if labels.iter().any(|&l| { 265 | if let Some( &pos ) = self.label_to_pos.get( l ) { 266 | f( &self.data[k].get_data()[pos] ) 267 | } else { 268 | false 269 | } 270 | }) { 271 | records.push( self.data[k].clone() ); 272 | } 273 | } 274 | Self::from_vecs( self.labels.clone(), self.datatypes.clone(), records ) 275 | } 276 | 277 | // NA Management 278 | 279 | pub fn has_na(&self) -> bool { 280 | self.data.iter() 281 | .any(|s| s.has_na()) 282 | } 283 | 284 | pub fn get_na(&self) -> Self { 285 | let labels : Vec<&str> = self.get_labels().iter() 286 | .map(|s| s.as_str()) 287 | .collect(); 288 | self.filter_any( labels.as_slice(), |x: &FlexData| x == &FlexData::NA ) 289 | } 290 | 291 | pub fn drop_na(&self) -> Self { 292 | let labels : Vec<&str> = self.get_labels().iter() 293 | .map(|s| s.as_str()) 294 | .collect(); 295 | self.filter_all( labels.as_slice(), |x: &FlexData| x != &FlexData::NA ) 296 | } 297 | 298 | // n-ary operation 299 | 300 | pub fn nary_apply(&self, label: &str, datatype: FlexDataType, labels: &[&str], f: impl Fn(&[&FlexData]) -> FlexData) -> FlexSeries { 301 | let mut data : Vec = Vec::new(); 302 | for k in 0..self.num_records() { 303 | let inputs : Vec<&FlexData> = labels.iter() 304 | .map(|&l| { 305 | let pos = self.label_to_pos.get(l).expect("Label not found"); 306 | &self.data[k][*pos] 307 | }) 308 | .collect(); 309 | data.push( FlexDataPoint::new( self.data[k].get_index().clone(), f( inputs.as_slice() ) ) ); 310 | } 311 | FlexSeries::from_vec(label, datatype, data) 312 | } 313 | 314 | pub fn sort(&self, label: &str, ascending: bool) -> Self { 315 | let pos = self.label_to_pos.get(label).unwrap(); 316 | let mut data = self.data.clone(); 317 | if ascending { 318 | data.sort_by(|a,b| a[*pos].partial_cmp(&b[*pos]).unwrap() ); 319 | } else { 320 | data.sort_by(|a,b| b[*pos].partial_cmp(&a[*pos]).unwrap() ); 321 | } 322 | FlexTable::from_vecs(self.labels.clone(), self.datatypes.clone(), data) 323 | } 324 | 325 | // grouping 326 | 327 | pub fn group_by(table: &Self, label: &str) -> HashMap { 328 | 329 | let groups : Arc>> = Arc::new( Mutex::new( HashMap::new() ) ); 330 | let mut thread_handles : Vec<_> = Vec::new(); 331 | 332 | let series = table.extract_series( &[label] ); 333 | if series.len() == 1 { 334 | // Define value set 335 | let mut value_set : HashMap> = HashMap::new(); 336 | for k in 0..series[0].get_size() { 337 | let fdp = series[0][k].clone(); 338 | let val : String = String::try_from( &convert(fdp.get_data(), &FlexDataType::Str) ) 339 | .expect("Value not convertible to String"); 340 | if let Some( v ) = value_set.get_mut( &val ) { 341 | v.push( fdp.get_index().clone() ); 342 | } else { 343 | value_set.insert( val, Vec::::new() ); 344 | } 345 | } 346 | 347 | // Build subsets 348 | let arc_table = Arc::new( table.clone() ); 349 | for (k,v) in value_set.into_iter() { 350 | let cloned_groups = groups.clone(); 351 | let cloned_table = arc_table.clone(); 352 | let handle = std::thread::spawn(move || { 353 | let subset = cloned_table.get_subset(v); 354 | let mut local_groups = cloned_groups.lock().unwrap(); 355 | local_groups.insert(k, subset); 356 | }); 357 | thread_handles.push( handle ); 358 | } 359 | } 360 | 361 | if !thread_handles.is_empty() { 362 | thread_handles.into_iter() 363 | .for_each(|handle| { let _ = handle.join(); }); 364 | } 365 | 366 | let res = groups.lock().unwrap().clone(); 367 | res 368 | } 369 | 370 | // pretty print 371 | 372 | pub fn print(&self, max_size: Option) { 373 | let size = max_size.map(|val| val.min(self.num_records()) ).unwrap_or( self.num_records() ); 374 | let mut table = Table::new(); 375 | let mut headers_cells : Vec = self.labels.iter() 376 | .map(|h| Cell::new(h)) 377 | .collect(); 378 | headers_cells.insert(0, Cell::new("")); 379 | table.add_row(Row::new(headers_cells)); 380 | let mut types_cells : Vec = self.datatypes.iter() 381 | .map(|datatype| { 382 | match datatype { 383 | FlexDataType::Dbl => Cell::new("f64"), 384 | FlexDataType::Uint => Cell::new("usize"), 385 | FlexDataType::Int => Cell::new("isize"), 386 | FlexDataType::Char => Cell::new("char"), 387 | FlexDataType::Str => Cell::new("str"), 388 | FlexDataType::NA => Cell::new("n/a") 389 | } 390 | }) 391 | .collect(); 392 | types_cells.insert(0, Cell::new("")); 393 | table.add_row(Row::new(types_cells)); 394 | for i in 0..size { 395 | let mut record_cells : Vec = Vec::new(); 396 | for j in 0..self.num_series() { 397 | if j == 0 { 398 | let cell = match self.data[i].get_index() { 399 | FlexIndex::Uint(val) => Cell::new( format!("{}", val).as_str() ), 400 | FlexIndex::Str(val) => Cell::new( val.as_str() ) 401 | }; 402 | record_cells.push(cell); 403 | } 404 | let cell = match &self.data[i].get_data()[j] { 405 | FlexData::Str(val) => Cell::new( val.as_str() ), 406 | FlexData::Dbl(val) => Cell::new( format!("{:.5}", val).as_str() ), 407 | FlexData::Uint(val) => Cell::new( format!("{}", val).as_str() ), 408 | FlexData::Int(val) => Cell::new( format!("{}", val).as_str() ), 409 | FlexData::Char(val) => Cell::new( format!("{}", val).as_str() ), 410 | FlexData::NA => Cell::new( "N/A" ) 411 | }; 412 | record_cells.push(cell); 413 | } 414 | table.add_row(Row::new(record_cells)); 415 | } 416 | // Print the table to stdout 417 | table.printstd(); 418 | } 419 | } 420 | 421 | //Implement [] operator 422 | 423 | impl Index for FlexTable { 424 | type Output = FlexDataVector; 425 | fn index(&self, index: usize) -> &FlexDataVector { 426 | &self.data[index] 427 | } 428 | } 429 | 430 | impl Iterator for FlexTable { 431 | type Item = FlexDataVector; 432 | 433 | fn next(&mut self) -> Option { 434 | if self.iter_counter < self.num_records() { 435 | let dv = self.data[self.iter_counter].clone(); 436 | self.iter_counter += 1; 437 | Some( dv ) 438 | } else { 439 | self.iter_counter = 0; 440 | None 441 | } 442 | } 443 | } --------------------------------------------------------------------------------