├── .github └── workflows │ └── ci_pipeline.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── analysis_options.yaml ├── build.yaml ├── example └── main.dart ├── lib ├── ml_dataframe.dart └── src │ ├── data_frame │ ├── data_frame.dart │ ├── data_frame_impl.dart │ ├── data_frame_impl.g.dart │ ├── data_frame_json_keys.dart │ ├── exceptions │ │ └── wrong_series_shape_exception.dart │ ├── factories │ │ ├── from_csv.dart │ │ ├── from_json.dart │ │ ├── from_matrix.dart │ │ ├── from_raw_csv.dart │ │ ├── from_raw_data.dart │ │ └── prefilled_dataframes │ │ │ ├── _deprecated │ │ │ ├── load_iris_dataset.dart │ │ │ └── load_pima_indians_diabetes_dataset.dart │ │ │ ├── get_housing_data_frame.dart │ │ │ ├── get_iris_data_frame.dart │ │ │ ├── get_pima_indians_diabetes_data_frame.dart │ │ │ └── get_wine_quality_data_frame.dart │ ├── helpers │ │ ├── convert_rows_to_series.dart │ │ ├── convert_series_to_rows.dart │ │ ├── data_frame_to_string.dart │ │ ├── generate_unordered_indices.dart │ │ └── get_header.dart │ ├── series.dart │ ├── series_impl.dart │ ├── series_impl.g.dart │ └── series_json_keys.dart │ ├── data_reader │ ├── csv_data_reader.dart │ ├── data_reader.dart │ └── file_factory │ │ └── file_factory.dart │ ├── data_selector │ └── data_selector.dart │ ├── numerical_converter │ ├── helpers │ │ ├── from_numerical_converter_json.dart │ │ └── numerical_converter_to_json.dart │ ├── numerical_converter.dart │ ├── numerical_converter_impl.dart │ └── numerical_converter_json_keys.dart │ └── serializable │ ├── serializable.dart │ └── serializable_mixin.dart ├── pubspec.yaml └── test ├── data_frame ├── data │ └── raw_csv_with_header.txt ├── data_frame_from_matrix_test.dart ├── data_frame_from_raw_csv_test.dart ├── data_frame_from_raw_data_test.dart ├── data_frame_from_series_test.dart ├── data_frame_helpers_test.dart ├── data_frame_test.dart ├── data_frame_to_string_test.dart ├── factories │ ├── data_frame.json │ ├── elo_blatter.csv │ ├── fake_data_headless.csv │ ├── from_csv_test.dart │ ├── from_json_test.dart │ └── prefilled_dataframes │ │ ├── get_housing_data_frame_test.dart │ │ ├── get_iris_data_frame_test.dart │ │ ├── get_pima_indians_diabetes_data_frame_test.dart │ │ └── get_wine_quality_dataframe_test.dart └── series_test.dart └── numerical_converter └── numerical_converter_impl_test.dart /.github/workflows/ci_pipeline.yml: -------------------------------------------------------------------------------- 1 | name: CI pipeline 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | 13 | container: 14 | image: google/dart:latest 15 | 16 | steps: 17 | - uses: actions/checkout@v2 18 | 19 | - name: Print Dart SDK version 20 | run: dart --version 21 | 22 | - name: Install dependencies 23 | run: dart pub get 24 | 25 | - name: Verify formatting 26 | run: dart format --output=none --set-exit-if-changed . 27 | 28 | - name: Analyze project source 29 | run: dart analyze --no-fatal-warnings 30 | 31 | - name: Run tests 32 | run: dart test 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Files and directories created by pub 2 | .packages 3 | .pub/ 4 | .idea 5 | build/ 6 | 7 | # Directory created by dartdoc 8 | doc/api/ 9 | /.dart_tool/ 10 | pubspec.lock 11 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 1.6.0 4 | - `getHousingDataFrame` function added 5 | 6 | ## 1.5.1 7 | - Corrected typos and examples in README.md 8 | 9 | ## 1.5.0 10 | - `getWineQualityDataFrame` function added 11 | - `loadIrisDataset`, `loadPimaIndiansDiabetesDataset` deprecated, `getIrisDataFrame`, `getPimaIndiansDiabetesDataFrame` added instead 12 | 13 | ## 1.4.2 14 | - `loadIrisDataset`, `loadPimaIndiansDiabetesDataset`: 15 | - raw CSV data used instead of files 16 | 17 | ## 1.4.1 18 | - `loadPimaIndiansDiabetesDataset` function exported 19 | 20 | ## 1.4.0 21 | - Added `Pima Indians diabetes` dataset 22 | 23 | ## 1.3.0 24 | - Added `Iris` dataset 25 | 26 | ## 1.2.2 27 | - Fixed markup in `fromCsv` documentation 28 | - Added documentation for `Series` 29 | 30 | ## 1.2.1 31 | - Added documentation for `fromCsv`, `fromJson` 32 | 33 | ## 1.2.0 34 | - `DataFrame`: 35 | - Added `mapSeries` function 36 | 37 | ## 1.1.0 38 | - `DataFrame`: 39 | - Added `map` function 40 | 41 | ## 1.0.0 42 | 43 | - Stable release 44 | - `Dataframe`: 45 | - `dropSeries`: 46 | - `seriesIndices` renamed to `indices` 47 | - `seriesNames` renamed to `names` 48 | 49 | ## 0.6.0 50 | 51 | - `DataFrame`: `toString` gets nicely formatted printing of the data to get a quick overview 52 | 53 | ## 0.5.1 54 | 55 | - `pubspec.yaml`: `json_serializable` package moved to dev_dependencies section 56 | 57 | ## 0.5.0 58 | 59 | - Null-safety supported (stable) 60 | 61 | ## 0.5.0-nullsafety.0 62 | 63 | - Null-safety supported (beta) 64 | 65 | ## 0.4.1 66 | 67 | - `DataFrame`: series-to-rows and rows-to-series converters issues fixed 68 | 69 | ## 0.4.0 70 | 71 | - `DataFrame`: `fromRawCsv` constructor added 72 | 73 | ## 0.3.0 74 | 75 | - `DataFrame`: `sampleFromRows` method added 76 | - `CI`: github actions set up 77 | 78 | ## 0.2.0 79 | 80 | - `DataFrame`: `shuffle` method added 81 | 82 | ## 0.1.1 83 | 84 | - `DataFrame`: addSeries method added 85 | 86 | ## 0.1.0 87 | 88 | - `DataFrame`, `Series`: Serialization/deserialization supported 89 | 90 | ## 0.0.11 91 | 92 | - `dtype` parameter removed from the DataFrame's constructor 93 | - `dtype` parameter added to `toMatrix` method 94 | 95 | ## 0.0.10 96 | 97 | - `DataFrame.sampleFromSeries` method's signature changed 98 | 99 | ## 0.0.9 100 | 101 | - `dtype` field added to `DataFrame` interface 102 | 103 | ## 0.0.8 104 | 105 | - `xrange` dependency removed 106 | - `ml_linalg 12.0.*` supported 107 | - dart sdk constraint changed to `>=2.2.0 <3.0.0` 108 | 109 | ## 0.0.7 110 | 111 | - `xrange` version locked 112 | 113 | ## 0.0.6 114 | 115 | - `DataFrame`: `sampleFromSeries` method added 116 | 117 | ## 0.0.5 118 | 119 | - `DataFrame`: `dropSeries` method added 120 | 121 | ## 0.0.4 122 | 123 | - `DataFrame`: `fromMatrix` constructor added 124 | 125 | ## 0.0.3 126 | 127 | - DataFrame helpers: series data is not `cold` iterable while series creating 128 | - DataFrame unit tests: redundant constructor parameters removed for some test cases 129 | 130 | ## 0.0.2 131 | 132 | - Redundant dependencies removed from `dev_dependencies` section 133 | 134 | ## 0.0.1 135 | 136 | - `DataFrame`: `DataFrame` entity with basic functionality added 137 | - `Series`: `Series` class added - the entity that is representing a column with its header 138 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 Ilia Gyrdymov 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, 7 | this list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 17 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 19 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 20 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 21 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 22 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | -------------------------------------------------------------------------------- /analysis_options.yaml: -------------------------------------------------------------------------------- 1 | include: package:pedantic/analysis_options.yaml 2 | 3 | analyzer: 4 | strong-mode: 5 | implicit-casts: false 6 | implicit-dynamic: true 7 | errors: 8 | unnecessary_import: error 9 | unnecessary_type_check: error 10 | missing_return: error 11 | dead_code: error 12 | duplicate_import: error 13 | unused_import: error 14 | deprecated_member_use_from_same_package: warning 15 | -------------------------------------------------------------------------------- /build.yaml: -------------------------------------------------------------------------------- 1 | targets: 2 | $default: 3 | builders: 4 | json_serializable: 5 | options: 6 | any_map: false 7 | checked: true 8 | create_to_json: true 9 | disallow_unrecognized_keys: true 10 | explicit_to_json: false 11 | field_rename: none 12 | ignore_unannotated: false 13 | include_if_null: true 14 | -------------------------------------------------------------------------------- /example/main.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/ml_dataframe.dart'; 2 | 3 | void dataframeWithHeaderDemo() { 4 | final dataframe = DataFrame([ 5 | ['Age', 'City', 'Blood Group', 'is_married'], 6 | [33, 'Larnaca', 'A', true], 7 | [17, 'Limassol', 'A', false], 8 | [29, 'Nicosia', 'B', false], 9 | [45, 'Larnaca', 'AB', true], 10 | ]); 11 | 12 | print('\nDataframe with the header row: '); 13 | print(dataframe); 14 | } 15 | 16 | void headlessDataframeDemo() { 17 | final dataframe = DataFrame([ 18 | [33, 'Larnaca', 'A', true], 19 | [17, 'Limassol', 'A', false], 20 | [29, 'Nicosia', 'B', false], 21 | [45, 'Larnaca', 'AB', true], 22 | ], headerExists: false); 23 | 24 | print('\nHeadless dataframe: '); 25 | print(dataframe); 26 | } 27 | 28 | void headlessDataframeWithCustomPrefixDemo() { 29 | final dataframe = DataFrame([ 30 | [33, 'Larnaca', 'A', true], 31 | [17, 'Limassol', 'A', false], 32 | [29, 'Nicosia', 'B', false], 33 | [45, 'Larnaca', 'AB', true], 34 | ], headerExists: false, autoHeaderPrefix: 'SERIES_'); 35 | 36 | print('\nHeadless dataframe with custom prefix: '); 37 | print(dataframe); 38 | } 39 | 40 | void predefinedHeaderDataframeDemo() { 41 | final dataframe = DataFrame( 42 | [ 43 | [33, 'Larnaca', 'A', true], 44 | [17, 'Limassol', 'A', false], 45 | [29, 'Nicosia', 'B', false], 46 | [45, 'Larnaca', 'AB', true], 47 | ], 48 | headerExists: false, 49 | header: ['AGE', 'TOWN', 'Blood', 'MARRIED']); 50 | 51 | print('\nDataframe with predefined header: '); 52 | print(dataframe); 53 | } 54 | 55 | void dataframeWithSpecificColumnsDemo() { 56 | final dataframe = DataFrame([ 57 | ['Age', 'City', 'Blood Group', 'is_married'], 58 | [33, 'Larnaca', 'A', true], 59 | [17, 'Limassol', 'A', false], 60 | [29, 'Nicosia', 'B', false], 61 | [45, 'Larnaca', 'AB', true], 62 | ], columnNames: [ 63 | 'Age', 64 | 'is_married' 65 | ]); 66 | 67 | print('\nDataframe with specific columns: '); 68 | print(dataframe); 69 | } 70 | 71 | void main() { 72 | dataframeWithHeaderDemo(); 73 | headlessDataframeDemo(); 74 | headlessDataframeWithCustomPrefixDemo(); 75 | predefinedHeaderDataframeDemo(); 76 | dataframeWithSpecificColumnsDemo(); 77 | } 78 | -------------------------------------------------------------------------------- /lib/ml_dataframe.dart: -------------------------------------------------------------------------------- 1 | export 'package:ml_dataframe/src/data_frame/data_frame.dart'; 2 | export 'package:ml_dataframe/src/data_frame/factories/from_csv.dart'; 3 | export 'package:ml_dataframe/src/data_frame/factories/from_json.dart'; 4 | export 'package:ml_dataframe/src/data_frame/factories/prefilled_dataframes/_deprecated/load_iris_dataset.dart'; 5 | export 'package:ml_dataframe/src/data_frame/factories/prefilled_dataframes/_deprecated/load_pima_indians_diabetes_dataset.dart'; 6 | export 'package:ml_dataframe/src/data_frame/factories/prefilled_dataframes/get_housing_data_frame.dart'; 7 | export 'package:ml_dataframe/src/data_frame/factories/prefilled_dataframes/get_iris_data_frame.dart'; 8 | export 'package:ml_dataframe/src/data_frame/factories/prefilled_dataframes/get_pima_indians_diabetes_data_frame.dart'; 9 | export 'package:ml_dataframe/src/data_frame/factories/prefilled_dataframes/get_wine_quality_data_frame.dart'; 10 | export 'package:ml_dataframe/src/data_frame/series.dart'; 11 | -------------------------------------------------------------------------------- /lib/src/data_frame/data_frame.dart: -------------------------------------------------------------------------------- 1 | import 'package:csv/csv.dart'; 2 | import 'package:ml_dataframe/src/data_frame/data_frame_impl.dart'; 3 | import 'package:ml_dataframe/src/data_frame/factories/from_matrix.dart'; 4 | import 'package:ml_dataframe/src/data_frame/factories/from_raw_csv.dart'; 5 | import 'package:ml_dataframe/src/data_frame/factories/from_raw_data.dart'; 6 | import 'package:ml_dataframe/src/data_frame/series.dart'; 7 | import 'package:ml_dataframe/src/numerical_converter/numerical_converter_impl.dart'; 8 | import 'package:ml_dataframe/src/serializable/serializable.dart'; 9 | import 'package:ml_linalg/linalg.dart'; 10 | 11 | const defaultHeaderPrefix = 'col_'; 12 | 13 | /// An in-memory storage to keep data in column-like manner with human readable 14 | /// headers with possibility to convert the data to pure numeric representation. 15 | abstract class DataFrame implements Serializable { 16 | /// Creates a dataframe from the non-typed [data] that is represented as 17 | /// two-dimensional array, where each element is a row of table-like data. 18 | /// The first element of the two-dimensional array may be a header of a 19 | /// dataset: 20 | /// 21 | /// ````dart 22 | /// final data = [ 23 | /// ['column_1', 'column_2', 'column_3'], // a header 24 | /// [ 20, false, 'value_1' ], // row 1 25 | /// [ 51, true, 'value_2' ], // row 2 26 | /// [ 22, false, null ], // row 3 27 | /// ] 28 | /// final dataframe = DataFrame(data); 29 | /// ```` 30 | /// 31 | /// [headerExists] Indicates whether the dataset header (a sequence of 32 | /// column names) exists. If header exists, it must present on the very first 33 | /// row of the data: 34 | /// 35 | /// ````dart 36 | /// final data = [ 37 | /// ['column_1', 'column_2', 'column_3'], // row 1 38 | /// [ 20, false, 'value_1' ], // row 2 39 | /// [ 51, true, 'value_2' ], // row 3 40 | /// [ 22, false, null ], // row 4 41 | /// ] 42 | /// // the first row isn't considered a header in this case, it's considered 43 | /// // a data item row 44 | /// final dataframe = DataFrame(data, headerExists: false); 45 | /// 46 | /// print(dataframe.header); // should output an autogenerated header 47 | /// print(dataframe.rows); 48 | /// ```` 49 | /// 50 | /// The output: 51 | /// 52 | /// ``` 53 | /// ['col_0', 'col_1', 'col_2'] 54 | /// 55 | /// [ 56 | /// ['column_1', 'column_2', 'column_3'], 57 | /// [ 20, false, 'value_1' ], 58 | /// [ 51, true, 'value_2' ], 59 | /// [ 22, false, null ], 60 | /// ] 61 | /// ``` 62 | /// 63 | /// [header] Predefined dataset header. It'll be skipped if [headerExists] is 64 | /// true. Use it to provide a custom header to a header-less dataset. 65 | /// 66 | /// [autoHeaderPrefix] A string that is used as a prefix for every column name 67 | /// of auto-generated header (if [headerExists] is false and [header] is 68 | /// empty). Underscore + ordinal number is used as a postfix of column names. 69 | /// 70 | /// [columns] A collection of column indices that specifies which columns 71 | /// should be extracted from the [data] and placed in the resulting [DataFrame] 72 | /// Has a higher precedence than [columnNames] 73 | /// 74 | /// [columnNames] A collection of column titles that specifies which columns 75 | /// should be extracted from the [data] and placed in the resulting 76 | /// [DataFrame]. It's also can be used with auto-generated column names. 77 | /// The argument will be omitted if [columns] is provided 78 | factory DataFrame( 79 | Iterable> data, { 80 | bool headerExists = true, 81 | Iterable header = const [], 82 | String autoHeaderPrefix = defaultHeaderPrefix, 83 | Iterable columns = const [], 84 | Iterable columnNames = const [], 85 | }) => 86 | fromRawData( 87 | data, 88 | headerExists: headerExists, 89 | predefinedHeader: header, 90 | autoHeaderPrefix: autoHeaderPrefix, 91 | columns: columns, 92 | columnNames: columnNames, 93 | ); 94 | 95 | /// Create a [DataFrame] instance from a collection of [Series] 96 | /// 97 | /// A usage example: 98 | /// 99 | /// ```dart 100 | /// import 'package:ml_dataframe/ml_dataframe.dart'; 101 | /// 102 | /// void main() { 103 | /// final series = [ 104 | /// Series('some_series', [10, 20, 30]), 105 | /// Series('super_series', [20, 40, 60]), 106 | /// Series('awesome_series', [30, 80, 90]), 107 | /// ]; 108 | /// final dataframe = DataFrame.fromSeries(series); 109 | /// 110 | /// print(dataframe); 111 | /// 112 | /// // DataFrame (3 x 3) 113 | /// // some_series super_series awesome_series 114 | /// // 10 20 30 115 | /// // 20 40 80 116 | /// // 30 60 90 117 | /// } 118 | /// ``` 119 | factory DataFrame.fromSeries(Iterable series) => 120 | DataFrameImpl.fromSeries( 121 | series, 122 | const NumericalConverterImpl(), 123 | ); 124 | 125 | /// Create a [DataFrame] instance from a instance of [Matrix] 126 | /// 127 | /// [header] A header row for the [DataFrame] instance. If no [header] 128 | /// provided, the row will be autogenerated 129 | /// 130 | /// [autoHeaderPrefix] A string that will be used as a prefix of a column 131 | /// name in case of autogenerated header row. Default value is `col_` 132 | /// 133 | /// [columns] Column indices to pick from the [matrix] 134 | /// 135 | /// [discreteColumns] Column indices with discrete values. The information 136 | /// about discrete columns will be used for [Series] - to understand what is 137 | /// discrete data, please refer to [Series] documentation 138 | /// 139 | /// A usage example: 140 | /// 141 | /// ```dart 142 | /// import 'package:ml_dataframe/ml_dataframe.dart'; 143 | /// import 'package:ml_linalg/matrix.dart'; 144 | /// 145 | /// void main() { 146 | /// final matrix = Matrix.fromList( 147 | /// [10, 20, 30], 148 | /// [30, 30, 90], 149 | /// [60, 40, 70], 150 | /// [90, 50, 80], 151 | /// ); 152 | /// final dataframe = DataFrame.fromMatrix(matrix, 153 | /// header: ['column', 'super_column', 'awesome_column']); 154 | /// 155 | /// print(dataframe); 156 | /// 157 | /// // DataFrame (4 x 3) 158 | /// // column super_column awesome_column 159 | /// // 10 20 30 160 | /// // 30 30 90 161 | /// // 60 40 70 162 | /// // 90 50 80 163 | /// } 164 | factory DataFrame.fromMatrix( 165 | Matrix matrix, { 166 | Iterable header = const [], 167 | String autoHeaderPrefix = defaultHeaderPrefix, 168 | Iterable columns = const [], 169 | Iterable discreteColumns = const [], 170 | Iterable discreteColumnNames = const [], 171 | }) => 172 | fromMatrix( 173 | matrix, 174 | predefinedHeader: header, 175 | autoHeaderPrefix: autoHeaderPrefix, 176 | columns: columns, 177 | discreteColumns: discreteColumns, 178 | discreteColumnNames: discreteColumnNames, 179 | ); 180 | 181 | /// Creates a dataframe instance from stringified csv [rawContent]. 182 | /// 183 | /// ````dart 184 | /// final rawContent = 185 | /// 'column_1,column_2,column_3\n' + 186 | /// '100,200,300\n' + 187 | /// '400,500,600\n' + 188 | /// '700,800,900\n'; 189 | /// 190 | /// final dataframe = DataFrame.fromRawCsv(rawContent); 191 | /// 192 | /// print(dataframe.header); // (column_1, column_2, column_3) 193 | /// print(dataframe.rows); // ((100,200,300), (400,500,600), (700,800,900)) 194 | /// print(dataframe.series.elementAt(0).data); // (100, 400, 700) 195 | /// print(dataframe.series.elementAt(1).data); // (200, 500, 600) 196 | /// print(dataframe.series.elementAt(2).data); // (300, 600, 900) 197 | /// ```` 198 | /// [fieldDelimiter] A delimiter which divides elements in a single row, 199 | /// `,` by default 200 | /// 201 | /// [textDelimiter] A delimiter which allows to use [fieldDelimiter] character 202 | /// inside a cell of the resulting table, e.g. [fieldDelimiter] is `,`, 203 | /// [textDelimiter] is `"`, and that means that every `,` symbol in [rawContent] 204 | /// which is not a field delimiter must be wrapped with `"`-symbol 205 | /// 206 | /// [eol] The end of line character, `\n` by default 207 | /// 208 | /// [headerExists] Whether the [rawContent] has a header line (list of column 209 | /// titles) or not 210 | /// 211 | /// [header] A custom header line for a resulting csv table (if 212 | /// [headerExists] is false) 213 | /// 214 | /// [autoHeaderPrefix] If there is no header line in the [rawContent] and 215 | /// no [header] provided, [autoHeaderPrefix] will be used as a prefix for 216 | /// autogenerated column titles 217 | /// 218 | /// [columns] A collection of column indices that specifies which columns 219 | /// should be extracted from the raw data and placed in the resulting [DataFrame]. 220 | /// Has a higher precedence than [columnNames] 221 | /// 222 | /// [columnNames] A collection of column titles that specifies which columns 223 | /// should be extracted from the raw data and placed in the resulting 224 | /// [DataFrame]. It's also can be used with auto-generated column names. 225 | /// The argument will be omitted if [columns] is provided 226 | factory DataFrame.fromRawCsv( 227 | String rawContent, { 228 | String fieldDelimiter = defaultFieldDelimiter, 229 | String textDelimiter = defaultTextDelimiter, 230 | String? textEndDelimiter, 231 | String eol = '\n', 232 | bool headerExists = true, 233 | Iterable header = const [], 234 | String autoHeaderPrefix = defaultHeaderPrefix, 235 | Iterable columns = const [], 236 | Iterable columnNames = const [], 237 | }) => 238 | fromRawCsv( 239 | rawContent, 240 | fieldDelimiter: fieldDelimiter, 241 | textDelimiter: textDelimiter, 242 | textEndDelimiter: textEndDelimiter, 243 | eol: eol, 244 | headerExists: headerExists, 245 | header: header, 246 | autoHeaderPrefix: autoHeaderPrefix, 247 | columns: columns, 248 | columnNames: columnNames, 249 | ); 250 | 251 | factory DataFrame.fromJson(Map json) => 252 | DataFrameImpl.fromJson(json); 253 | 254 | /// Returns a collection of names of all series (like a table header) 255 | Iterable get header; 256 | 257 | /// Returns a collection of all data item rows of the DataFrame's source data 258 | Iterable> get rows; 259 | 260 | /// Returns a lazy series (columns) collection of the [DataFrame]. 261 | /// 262 | /// [Series] is roughly a column and its header (name) 263 | Iterable get series; 264 | 265 | /// Returns a list of two integers representing the shape of the dataframe: 266 | /// the first integer is a number of rows, the second integer - a number of 267 | /// columns 268 | List get shape; 269 | 270 | /// Returns a specific [Series] by a key. 271 | /// 272 | /// The [key] may be a series name or a series index (ordinal number of the 273 | /// series) 274 | Series operator [](Object key); 275 | 276 | /// Returns a dataframe with a new series added to the end of this dataframe's 277 | /// series collection 278 | DataFrame addSeries(Series series); 279 | 280 | /// Returns a dataframe, sampled from series that are obtained from the 281 | /// series [indices] or series [names]. 282 | /// 283 | /// If [indices] are specified, [names] parameter will be ignored. 284 | /// 285 | /// Series indices or series names may be repeating. 286 | DataFrame sampleFromSeries({ 287 | Iterable indices, 288 | Iterable names, 289 | }); 290 | 291 | /// Returns a dataframe, sampled from rows that are obtained from the 292 | /// rows [indices] 293 | /// 294 | /// Rows indices may be repeating. 295 | DataFrame sampleFromRows(Iterable indices); 296 | 297 | /// Returns a new [DataFrame] without specified series 298 | /// 299 | /// If [indices] are specified, [names] parameter will be ignored. 300 | DataFrame dropSeries({ 301 | Iterable indices, 302 | Iterable names, 303 | }); 304 | 305 | /// Converts the [DataFrame] into [Matrix]. 306 | /// 307 | /// The method may throw an error if the [DataFrame] contains data that 308 | /// cannot be converted to numeric representation 309 | Matrix toMatrix([DType dtype]); 310 | 311 | /// Returns a new [DataFrame] with shuffled rows of this [DataFrame] 312 | DataFrame shuffle({int seed}); 313 | 314 | /// Returns a new [DataFrame] with modified data according to the 315 | /// [mapper] function 316 | /// 317 | /// [T] is a type of the source value, [R] is a type of the mapped value. 318 | /// If the value's type won't be changed, one needs to provide the same types for 319 | /// both generic types. 320 | /// 321 | /// Usage example: 322 | /// 323 | /// ```dart 324 | /// import 'package:ml_dataframe/ml_dataframe'; 325 | /// 326 | /// void main() { 327 | /// final data = DataFrame([ 328 | /// ['col_1', 'col_2', 'col_3'], 329 | /// [ 2, 20, 200], 330 | /// [ 3, 30, 300], 331 | /// [ 4, 40, 400], 332 | /// ]); 333 | /// final modifiedData = data.map((value) => value * 2); 334 | /// 335 | /// print(modifiedData); 336 | /// // DataFrame (3 x 3) 337 | /// // col_1 col_2 col_3 338 | /// // 4 40 400 339 | /// // 6 60 600 340 | /// // 8 80 800 341 | /// } 342 | /// ``` 343 | DataFrame map(R Function(T value) mapper); 344 | 345 | /// Returns a new [DataFrame] with a modified series according to the 346 | /// [mapper] function 347 | /// 348 | /// [index] is an index of the series to be modified 349 | /// 350 | /// [name] is a name of the series to be modified 351 | /// 352 | /// [index] has precedence over [name] if both parameters are specified. 353 | /// At least one parameter must be specified! Otherwise, the method will throw 354 | /// an error 355 | /// 356 | /// [T] is a type of the series source value, [R] is a type of the mapped value. 357 | /// If the value's type won't be changed, one needs to provide the same types for 358 | /// both generic types. 359 | /// 360 | /// Usage example: 361 | /// 362 | /// ```dart 363 | /// import 'package:ml_dataframe/ml_dataframe'; 364 | /// 365 | /// void main() { 366 | /// final data = DataFrame([ 367 | /// ['col_1', 'col_2', 'col_3'], 368 | /// [ 2, 20, 200], 369 | /// [ 3, 30, 300], 370 | /// [ 4, 40, 400], 371 | /// ]); 372 | /// final modifiedData = data.mapSeries((value) => value * 2, name: 'col_2'); 373 | /// 374 | /// print(modifiedData); 375 | /// // DataFrame (3 x 3) 376 | /// // col_1 col_2 col_3 377 | /// // 2 40 200 378 | /// // 3 60 300 379 | /// // 4 80 400 380 | /// } 381 | /// ``` 382 | DataFrame mapSeries(R Function(T value) mapper, 383 | {int? index, String? name}); 384 | 385 | /// Returns a nicely formatted string to inspect the data of the [DataFrame] as the example below shows 386 | /// 387 | /// [maxRows] will display the first maxRows/2 and the last maxRows/2 rows of the [DataFrame] 388 | /// 389 | /// [maxCols] will display the first maxCols-1 columns and the last column of the [DataFrame] 390 | /// 391 | /// ````txt 392 | /// DataFrame (13 x 10) 393 | /// id age salary children gender profession ... weight 394 | /// 1 25 30000 2 M Teacher ... 78.3 395 | /// 2 46 85000 0 M Manager ... 45.2 396 | /// 3 36 45000 1 F Teacher ... 98.4 397 | /// 4 23 10000 5 M Mushroom Collector ... 57.4 398 | /// 5 22 30000 2 M Butcher ... 87.9 399 | /// ... ... ... ... ... ... ... ... 400 | /// 9 23 N/A 2 M Unemployed ... 56.7 401 | /// 10 25 32000 4 F Teacher ... 98.7 402 | /// 11 49 34700 0 M Plumber ... 120.3 403 | /// 12 36 45000 1 F Paramedic ... 67.9 404 | /// 13 23 42900 2 M Researcher ... 92.3 405 | /// ```` 406 | @override 407 | String toString({int maxRows = 10, int maxCols = 7}); 408 | } 409 | -------------------------------------------------------------------------------- /lib/src/data_frame/data_frame_impl.dart: -------------------------------------------------------------------------------- 1 | import 'package:json_annotation/json_annotation.dart'; 2 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 3 | import 'package:ml_dataframe/src/data_frame/data_frame_json_keys.dart'; 4 | import 'package:ml_dataframe/src/data_frame/exceptions/wrong_series_shape_exception.dart'; 5 | import 'package:ml_dataframe/src/data_frame/helpers/convert_rows_to_series.dart'; 6 | import 'package:ml_dataframe/src/data_frame/helpers/convert_series_to_rows.dart'; 7 | import 'package:ml_dataframe/src/data_frame/helpers/data_frame_to_string.dart'; 8 | import 'package:ml_dataframe/src/data_frame/helpers/generate_unordered_indices.dart'; 9 | import 'package:ml_dataframe/src/data_frame/series.dart'; 10 | import 'package:ml_dataframe/src/numerical_converter/helpers/from_numerical_converter_json.dart'; 11 | import 'package:ml_dataframe/src/numerical_converter/helpers/numerical_converter_to_json.dart'; 12 | import 'package:ml_dataframe/src/numerical_converter/numerical_converter.dart'; 13 | import 'package:ml_dataframe/src/serializable/serializable_mixin.dart'; 14 | import 'package:ml_linalg/linalg.dart'; 15 | import 'package:quiver/iterables.dart'; 16 | 17 | part 'data_frame_impl.g.dart'; 18 | 19 | @JsonSerializable() 20 | class DataFrameImpl with SerializableMixin implements DataFrame { 21 | DataFrameImpl(this.rows, this.header, this.toNumberConverter) 22 | : series = convertRowsToSeries(header, rows); 23 | 24 | DataFrameImpl.fromSeries(this.series, this.toNumberConverter) 25 | : header = series.map((series) => series.name), 26 | rows = convertSeriesToRows(series); 27 | 28 | DataFrameImpl.fromMatrix( 29 | Matrix matrix, 30 | this.header, 31 | this.toNumberConverter, 32 | Iterable? areSeriesDiscrete, 33 | ) : rows = matrix.rows, 34 | series = zip([ 35 | header, 36 | matrix.columns, 37 | areSeriesDiscrete ?? List.filled(matrix.columnsNum, false) 38 | ]).map((seriesData) => Series( 39 | seriesData[0] as String, 40 | seriesData[1] as Iterable, 41 | isDiscrete: seriesData[2] as bool, 42 | )) { 43 | _cachedMatrices[matrix.dtype] = matrix; 44 | } 45 | 46 | factory DataFrameImpl.fromJson(Map json) => 47 | _$DataFrameImplFromJson(json); 48 | 49 | @override 50 | Map toJson() => _$DataFrameImplToJson(this); 51 | 52 | @override 53 | @JsonKey(name: dataFrameHeaderJsonKey) 54 | final Iterable header; 55 | 56 | @override 57 | @JsonKey(name: dataFrameRowsJsonKey) 58 | final Iterable rows; 59 | 60 | @override 61 | final Iterable series; 62 | 63 | @JsonKey( 64 | name: dataFrameNumericalConverterJsonKey, 65 | toJson: numericalConverterToJson, 66 | fromJson: fromNumericalConverterJson, 67 | ) 68 | final NumericalConverter toNumberConverter; 69 | 70 | @override 71 | List get shape => [ 72 | series.first.data.length, 73 | header.length, 74 | ]; 75 | 76 | final Map _cachedMatrices = {}; 77 | 78 | @override 79 | Series operator [](Object key) { 80 | final seriesName = key is int ? header.elementAt(key) : key; 81 | final series = _getCachedOrCreateSeriesByName()[seriesName]; 82 | 83 | if (series == null) { 84 | throw Exception('Failed to find a series by key "$key". ' 85 | 'The type of the key is "${key.runtimeType}"'); 86 | } 87 | 88 | return series; 89 | } 90 | 91 | @override 92 | DataFrame sampleFromSeries({ 93 | Iterable indices = const [], 94 | Iterable names = const [], 95 | }) { 96 | if (indices.isNotEmpty) { 97 | final maxIdx = series.length - 1; 98 | final outRangedIndices = indices.where((idx) => idx < 0 || idx > maxIdx); 99 | 100 | if (outRangedIndices.isNotEmpty) { 101 | throw RangeError('Some of provided indices are out of range: ' 102 | '$outRangedIndices, while the valid range is 0..$maxIdx (both ' 103 | 'inclusive)'); 104 | } 105 | 106 | return _sampleFromSeries(indices); 107 | } 108 | 109 | final absentNames = Set.from(names).difference(Set.from(header)); 110 | 111 | if (absentNames.isNotEmpty) { 112 | throw Exception('Columns with names $absentNames do not exist'); 113 | } 114 | ; 115 | 116 | return _sampleFromSeries(names); 117 | } 118 | 119 | @override 120 | DataFrame sampleFromRows(Iterable indices) { 121 | final rowsAsList = rows.toList(growable: false); 122 | final selectedRows = indices.map((index) => rowsAsList[index]); 123 | 124 | return DataFrame( 125 | selectedRows, 126 | headerExists: false, 127 | header: header, 128 | ); 129 | } 130 | 131 | @override 132 | DataFrame addSeries(Series newSeries) { 133 | if (newSeries.data.length != shape.first) { 134 | throw WrongSeriesShapeException(shape.first, newSeries.data.length); 135 | } 136 | 137 | return DataFrame.fromSeries([...series, newSeries]); 138 | } 139 | 140 | @override 141 | DataFrame dropSeries({ 142 | Iterable indices = const [], 143 | Iterable names = const [], 144 | }) { 145 | if (indices.isNotEmpty) { 146 | return _dropByIndices(indices, series); 147 | } 148 | 149 | return _dropByNames(names, series); 150 | } 151 | 152 | @override 153 | Matrix toMatrix([DType dtype = DType.float32]) => 154 | _cachedMatrices[dtype] ??= Matrix.fromList( 155 | toNumberConverter 156 | .convertRawDataStrict(rows) 157 | .map((row) => row.toList()) 158 | .toList(), 159 | dtype: dtype, 160 | ); 161 | 162 | @override 163 | DataFrame shuffle({int? seed}) { 164 | final rowsAsList = rows.toList(); 165 | final indices = generateUnorderedIndices(shape.first, seed); 166 | final shuffledRows = indices.map((index) => rowsAsList[index]); 167 | 168 | return DataFrame(shuffledRows, header: header, headerExists: false); 169 | } 170 | 171 | @override 172 | DataFrame map(R Function(T value) mapper) { 173 | return DataFrame(rows.map((row) => row.map((value) => mapper(value as T))), 174 | headerExists: false, header: header); 175 | } 176 | 177 | @override 178 | DataFrame mapSeries(R Function(T value) mapper, 179 | {int? index, String? name}) { 180 | if (name == null && index == null) { 181 | throw Exception('"name" or "index" must be specified'); 182 | } 183 | 184 | if (name != null) { 185 | this[name]; 186 | } 187 | 188 | if (index != null) { 189 | this[index]; 190 | } 191 | 192 | var i = 0; 193 | 194 | return DataFrame.fromSeries(series.map((series) => i++ == index || 195 | name == series.name 196 | ? Series(series.name, series.data.map((value) => mapper(value as T)), 197 | isDiscrete: series.isDiscrete) 198 | : series)); 199 | } 200 | 201 | DataFrame _sampleFromSeries(Iterable ids) => 202 | DataFrame.fromSeries(ids.map((dynamic id) => this[id as Object])); 203 | 204 | DataFrame _dropByIndices(Iterable indices, Iterable series) { 205 | final uniqueIndices = Set.from(indices); 206 | final newSeries = enumerate(series) 207 | .where((indexedSeries) => !uniqueIndices.contains(indexedSeries.index)) 208 | .map((indexedSeries) => indexedSeries.value); 209 | 210 | return DataFrame.fromSeries(newSeries); 211 | } 212 | 213 | DataFrame _dropByNames(Iterable names, Iterable series) { 214 | final uniqueNames = Set.from(names); 215 | final newSeries = 216 | series.where((series) => !uniqueNames.contains(series.name)); 217 | 218 | return DataFrame.fromSeries(newSeries); 219 | } 220 | 221 | Map _getCachedOrCreateSeriesByName() => _seriesByName ??= 222 | Map.fromEntries(series.map((series) => MapEntry(series.name, series))); 223 | Map? _seriesByName; 224 | 225 | @override 226 | String toString({int maxRows = 10, int maxCols = 7}) { 227 | return dataFrameToString( 228 | this, 229 | maxRows: maxRows, 230 | maxCols: maxCols, 231 | ); 232 | } 233 | } 234 | -------------------------------------------------------------------------------- /lib/src/data_frame/data_frame_impl.g.dart: -------------------------------------------------------------------------------- 1 | // GENERATED CODE - DO NOT MODIFY BY HAND 2 | 3 | part of 'data_frame_impl.dart'; 4 | 5 | // ************************************************************************** 6 | // JsonSerializableGenerator 7 | // ************************************************************************** 8 | 9 | DataFrameImpl _$DataFrameImplFromJson(Map json) { 10 | return $checkedNew('DataFrameImpl', json, () { 11 | $checkKeys(json, allowedKeys: const ['H', 'R', 'N']); 12 | final val = DataFrameImpl( 13 | $checkedConvert(json, 'R', 14 | (v) => (v as List).map((e) => e as List)), 15 | $checkedConvert( 16 | json, 'H', (v) => (v as List).map((e) => e as String)), 17 | $checkedConvert(json, 'N', (v) => fromNumericalConverterJson(v)), 18 | ); 19 | return val; 20 | }, fieldKeyMap: const {'rows': 'R', 'header': 'H', 'toNumberConverter': 'N'}); 21 | } 22 | 23 | Map _$DataFrameImplToJson(DataFrameImpl instance) => 24 | { 25 | 'H': instance.header.toList(), 26 | 'R': instance.rows.map((e) => e.toList()).toList(), 27 | 'N': numericalConverterToJson(instance.toNumberConverter), 28 | }; 29 | -------------------------------------------------------------------------------- /lib/src/data_frame/data_frame_json_keys.dart: -------------------------------------------------------------------------------- 1 | const dataFrameHeaderJsonKey = 'H'; 2 | const dataFrameRowsJsonKey = 'R'; 3 | const dataFrameNumericalConverterJsonKey = 'N'; 4 | -------------------------------------------------------------------------------- /lib/src/data_frame/exceptions/wrong_series_shape_exception.dart: -------------------------------------------------------------------------------- 1 | class WrongSeriesShapeException implements Exception { 2 | WrongSeriesShapeException(this.expectedLength, this.actualLength); 3 | 4 | final int expectedLength; 5 | final int actualLength; 6 | 7 | @override 8 | String toString() => 'Wrong series shape, expected series data length ' 9 | '$expectedLength, got $actualLength'; 10 | } 11 | -------------------------------------------------------------------------------- /lib/src/data_frame/factories/from_csv.dart: -------------------------------------------------------------------------------- 1 | import 'package:csv/csv.dart'; 2 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 3 | import 'package:ml_dataframe/src/data_frame/factories/from_raw_data.dart'; 4 | import 'package:ml_dataframe/src/data_reader/data_reader.dart'; 5 | 6 | /// Creates a [DataFrame] instance from a CSV file 7 | /// 8 | /// [fileName] a path to the CSV file 9 | /// 10 | /// [columnDelimiter] a delimiter that divides columns in the CSV file. Default 11 | /// value is `,` 12 | /// 13 | /// [eol] End of line character. Default value is `\n` 14 | /// 15 | /// [headerExists] Whether a header row exists in the CSV file or not. Default 16 | /// value is `true`. If `false`, an autogenerated header row will be used 17 | /// 18 | /// [header] A custom header row. Meaningless if [headerExists] is `true` since 19 | /// the original header row has more priority 20 | /// 21 | /// [autoHeaderPrefix] A prefix that will be used in autogenerated header row 22 | /// column names. Default value is `col_` 23 | /// 24 | /// [columns] A collection of indices to pick from the CSV file 25 | /// 26 | /// [columnNames] A collection of column names to pick from the CSV file. [columns] 27 | /// has precedence over [columnNames] if both of them are specified 28 | /// 29 | /// Usage examples: 30 | /// 31 | /// - Case 1. A header row doesn't exist 32 | /// 33 | /// Suppose, one has a CSV file `path/to/file.csv` with the following content: 34 | /// 35 | /// ``` 36 | /// 10,20,30 37 | /// 11,24,33 38 | /// 12,25,36 39 | /// 13,26,37 40 | /// ``` 41 | /// 42 | /// and one creates a [DataFrame] instance from it: 43 | /// 44 | /// ```dart 45 | /// import 'package:ml_dataframe/ml_dataframe.dart'; 46 | /// 47 | /// void main() async { 48 | /// final data = await fromCsv('path/to/file.csv', headerExists: false); // prefix `col_` will be used as a default prefix for the column name 49 | /// 50 | /// print(data); 51 | /// // DataFrame (4x3) 52 | /// // col_1 col_2 col_3 53 | /// // 10 20 30 54 | /// // 11 24 33 55 | /// // 12 25 36 56 | /// // 13 26 37 57 | /// } 58 | /// ``` 59 | /// 60 | /// - Case 2. A header row exists 61 | /// 62 | /// Suppose, one has a CSV file `path/to/file.csv` with the following content: 63 | /// 64 | /// ``` 65 | /// feature_1,feature_2,feature_3 66 | /// 10,20,30 67 | /// 11,24,33 68 | /// 12,25,36 69 | /// 13,26,37 70 | /// ``` 71 | /// 72 | /// and one creates a [DataFrame] instance from it: 73 | /// 74 | /// ```dart 75 | /// import 'package:ml_dataframe/ml_dataframe.dart'; 76 | /// 77 | /// void main() async { 78 | /// final data = await fromCsv('path/to/file.csv'); // default value for `headerExists` is true 79 | /// 80 | /// print(data); 81 | /// // DataFrame (4x3) 82 | /// // feature_1 feature_2 feature_3 83 | /// // 10 20 30 84 | /// // 11 24 33 85 | /// // 12 25 36 86 | /// // 13 26 37 87 | /// } 88 | /// ``` 89 | /// 90 | /// - Case 3. A header row doesn't exist, autogenerated header is used 91 | /// 92 | /// Suppose, one has a CSV file `path/to/file.csv` with the following content: 93 | /// 94 | /// ``` 95 | /// 10,20,30 96 | /// 11,24,33 97 | /// 12,25,36 98 | /// 13,26,37 99 | /// ``` 100 | /// 101 | /// and one creates a [DataFrame] instance from it: 102 | /// 103 | /// ```dart 104 | /// import 'package:ml_dataframe/ml_dataframe.dart'; 105 | /// 106 | /// void main() async { 107 | /// final data = await fromCsv('path/to/file.csv', 108 | /// headerExists: false, 109 | /// header: ['custom_col_1', 'custom_col_2', 'custom_col_3']); 110 | /// 111 | /// print(data); 112 | /// // DataFrame (4x3) 113 | /// // custom_col_1 custom_col_2 custom_col_3 114 | /// // 10 20 30 115 | /// // 11 24 33 116 | /// // 12 25 36 117 | /// // 13 26 37 118 | /// } 119 | /// ``` 120 | /// 121 | /// - Case 4. Pick specific columns by indices 122 | /// 123 | /// Suppose, one has a CSV file `path/to/file.csv` with the following content: 124 | /// 125 | /// ``` 126 | /// 10,20,30 127 | /// 11,24,33 128 | /// 12,25,36 129 | /// 13,26,37 130 | /// ``` 131 | /// 132 | /// and one creates a [DataFrame] instance from it: 133 | /// 134 | /// ```dart 135 | /// import 'package:ml_dataframe/ml_dataframe.dart'; 136 | /// 137 | /// void main() async { 138 | /// final data = await fromCsv('path/to/file.csv', 139 | /// headerExists: false, 140 | /// columns: [0, 2]); 141 | /// 142 | /// print(data); 143 | /// // DataFrame (4x2) 144 | /// // col_1 col_3 145 | /// // 10 30 146 | /// // 11 33 147 | /// // 12 36 148 | /// // 13 37 149 | /// } 150 | /// ``` 151 | /// 152 | /// - Case 5. Pick specific columns by names 153 | /// 154 | /// Suppose, one has a CSV file `path/to/file.csv` with the following content: 155 | /// 156 | /// ``` 157 | /// feature_1,feature_2,feature_3 158 | /// 10,20,30 159 | /// 11,24,33 160 | /// 12,25,36 161 | /// 13,26,37 162 | /// ``` 163 | /// 164 | /// and one creates a [DataFrame] instance from it: 165 | /// 166 | /// ```dart 167 | /// import 'package:ml_dataframe/ml_dataframe.dart'; 168 | /// 169 | /// void main() async { 170 | /// final data = await fromCsv('path/to/file.csv', 171 | /// columnNames: ['feature_1', 'feature_3']); 172 | /// 173 | /// print(data); 174 | /// // DataFrame (4x2) 175 | /// // feature_1 feature_3 176 | /// // 10 30 177 | /// // 11 33 178 | /// // 12 36 179 | /// // 13 37 180 | /// } 181 | /// ``` 182 | Future fromCsv( 183 | String fileName, { 184 | String columnDelimiter = defaultFieldDelimiter, 185 | String eol = '\n', 186 | bool headerExists = true, 187 | Iterable header = const [], 188 | String autoHeaderPrefix = defaultHeaderPrefix, 189 | Iterable columns = const [], 190 | Iterable columnNames = const [], 191 | }) async { 192 | final reader = DataReader.csv(fileName, columnDelimiter, eol); 193 | final data = await reader.extractData(); 194 | 195 | return fromRawData( 196 | data, 197 | headerExists: headerExists, 198 | predefinedHeader: header, 199 | autoHeaderPrefix: autoHeaderPrefix, 200 | columns: columns, 201 | columnNames: columnNames, 202 | ); 203 | } 204 | -------------------------------------------------------------------------------- /lib/src/data_frame/factories/from_json.dart: -------------------------------------------------------------------------------- 1 | import 'dart:convert'; 2 | import 'dart:io'; 3 | 4 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 5 | import 'package:ml_dataframe/src/data_frame/data_frame_impl.dart'; 6 | 7 | /// Creates a [DataFrame] instance from a previously persisted json file 8 | /// 9 | /// A usage example: 10 | /// 11 | /// ```dart 12 | /// import 'package:ml_dataframe/ml_dataframe.dart'; 13 | /// 14 | /// void main() async { 15 | /// final data = DataFrame([ 16 | /// ['feature_1', 'feature_2', 'feature_3'], 17 | /// [1, 10, 100], 18 | /// [2, 20, 200], 19 | /// [3, 30, 300], 20 | /// ]); 21 | /// 22 | /// await data.saveAsJson('path/to/json_file.json'); 23 | /// 24 | /// // ... 25 | /// 26 | /// final restoredDataFrame = await fromJson('path/to/json_file.json'); 27 | /// 28 | /// print(restoredDataFrame); 29 | /// // DataFrame (3 x 3) 30 | /// // feature_1 feature_2 feature_3 31 | /// // 1 10 100 32 | /// // 2 20 200 33 | /// // 3 30 300 34 | /// } 35 | /// ``` 36 | Future fromJson(String fileName) async { 37 | final file = File(fileName); 38 | final dataAsString = await file.readAsString(); 39 | final decoded = jsonDecode(dataAsString) as Map; 40 | 41 | return DataFrameImpl.fromJson(decoded); 42 | } 43 | -------------------------------------------------------------------------------- /lib/src/data_frame/factories/from_matrix.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/ml_dataframe.dart'; 2 | import 'package:ml_dataframe/src/data_frame/data_frame_impl.dart'; 3 | import 'package:ml_dataframe/src/data_frame/helpers/get_header.dart'; 4 | import 'package:ml_dataframe/src/numerical_converter/numerical_converter_impl.dart'; 5 | import 'package:ml_linalg/linalg.dart'; 6 | import 'package:quiver/iterables.dart'; 7 | 8 | DataFrame fromMatrix( 9 | Matrix data, { 10 | Iterable predefinedHeader = const [], 11 | String autoHeaderPrefix = defaultHeaderPrefix, 12 | Iterable columns = const [], 13 | Iterable discreteColumns = const [], 14 | Iterable discreteColumnNames = const [], 15 | }) { 16 | final header = getHeader( 17 | columns.isNotEmpty == true ? columns.length : data.columnsNum, 18 | autoHeaderPrefix, 19 | [], 20 | predefinedHeader); 21 | 22 | final selectedData = 23 | columns.isNotEmpty == true ? data.sample(columnIndices: columns) : data; 24 | 25 | final areSeriesDiscrete = enumerate(header).map( 26 | (indexedName) => 27 | discreteColumns.contains(indexedName.index) || 28 | discreteColumnNames.contains(indexedName.value), 29 | ); 30 | 31 | return DataFrameImpl.fromMatrix( 32 | selectedData, header, NumericalConverterImpl(), areSeriesDiscrete); 33 | } 34 | -------------------------------------------------------------------------------- /lib/src/data_frame/factories/from_raw_csv.dart: -------------------------------------------------------------------------------- 1 | import 'package:csv/csv.dart'; 2 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 3 | import 'package:ml_dataframe/src/data_frame/factories/from_raw_data.dart'; 4 | 5 | DataFrame fromRawCsv( 6 | String rawContent, { 7 | String fieldDelimiter = defaultFieldDelimiter, 8 | String textDelimiter = defaultTextDelimiter, 9 | String? textEndDelimiter, 10 | String eol = '\n', 11 | bool headerExists = true, 12 | Iterable header = const [], 13 | String autoHeaderPrefix = defaultHeaderPrefix, 14 | Iterable columns = const [], 15 | Iterable columnNames = const [], 16 | }) { 17 | final converter = CsvToListConverter( 18 | fieldDelimiter: fieldDelimiter, 19 | textDelimiter: textDelimiter, 20 | textEndDelimiter: textDelimiter, 21 | eol: eol, 22 | ); 23 | final data = converter.convert(rawContent); 24 | 25 | return fromRawData( 26 | data, 27 | headerExists: headerExists, 28 | predefinedHeader: header, 29 | autoHeaderPrefix: autoHeaderPrefix, 30 | columns: columns, 31 | columnNames: columnNames, 32 | ); 33 | } 34 | -------------------------------------------------------------------------------- /lib/src/data_frame/factories/from_raw_data.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/ml_dataframe.dart'; 2 | import 'package:ml_dataframe/src/data_frame/data_frame_impl.dart'; 3 | import 'package:ml_dataframe/src/data_frame/helpers/get_header.dart'; 4 | import 'package:ml_dataframe/src/data_selector/data_selector.dart'; 5 | import 'package:ml_dataframe/src/numerical_converter/numerical_converter_impl.dart'; 6 | import 'package:quiver/iterables.dart'; 7 | 8 | DataFrame fromRawData( 9 | Iterable> data, { 10 | bool headerExists = true, 11 | Iterable predefinedHeader = const [], 12 | String autoHeaderPrefix = defaultHeaderPrefix, 13 | Iterable columns = const [], 14 | Iterable columnNames = const [], 15 | }) { 16 | final columnsNum = columns.isNotEmpty 17 | ? columns.length 18 | : data.isEmpty 19 | ? predefinedHeader.length 20 | : data.first.length; 21 | 22 | final header = getHeader( 23 | columnsNum, 24 | autoHeaderPrefix, 25 | headerExists 26 | ? data.isEmpty 27 | ? [] 28 | : data.first.map((dynamic el) => el.toString()) 29 | : [], 30 | predefinedHeader); 31 | 32 | final defaultIndices = count(0).take(columnsNum); 33 | 34 | final filteredIndices = enumerate(header) 35 | .where((indexedName) => columnNames.contains(indexedName.value)) 36 | .map((indexedName) => indexedName.index); 37 | 38 | final columnIndices = columns.isNotEmpty 39 | ? columns 40 | : predefinedHeader.isNotEmpty || columnNames.isEmpty 41 | ? defaultIndices 42 | : filteredIndices; 43 | 44 | final originalHeadlessData = headerExists ? data.skip(1) : data; 45 | 46 | final selectedData = DataSelector(columnIndices).select(originalHeadlessData); 47 | 48 | final selectedHeader = (predefinedHeader.isNotEmpty 49 | ? enumerate(header) 50 | : enumerate(header).where((indexedName) => columnIndices.isNotEmpty 51 | ? columnIndices.contains(indexedName.index) 52 | : true)) 53 | .map((indexedName) => indexedName.value); 54 | 55 | return DataFrameImpl( 56 | selectedData, selectedHeader, const NumericalConverterImpl()); 57 | } 58 | -------------------------------------------------------------------------------- /lib/src/data_frame/factories/prefilled_dataframes/_deprecated/load_iris_dataset.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 2 | import 'package:ml_dataframe/src/data_frame/factories/prefilled_dataframes/get_iris_data_frame.dart'; 3 | 4 | /// Returns a [DataFrame] instance filled with [Iris](https://www.kaggle.com/datasets/uciml/iris) dataset 5 | /// 6 | /// The dataset consists of 6 columns and 150 rows. The data is divided into 3 7 | /// classes - `Iris-setosa`, `Iris-versicolor` and `Iris-virginica` 8 | /// 9 | /// The target column is `Species` 10 | /// 11 | /// The dataset is good for training classification models 12 | /// 13 | /// Deprecated, use synchronous [getIrisDataFrame] instead 14 | @deprecated 15 | Future loadIrisDataset() { 16 | return Future.value(getIrisDataFrame()); 17 | } 18 | -------------------------------------------------------------------------------- /lib/src/data_frame/factories/prefilled_dataframes/_deprecated/load_pima_indians_diabetes_dataset.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 2 | import 'package:ml_dataframe/src/data_frame/factories/prefilled_dataframes/get_pima_indians_diabetes_data_frame.dart'; 3 | 4 | /// Returns a [DataFrame] instance filled with [Pima Indians diabetes](https://www.kaggle.com/datasets/uciml/pima-indians-diabetes-database) dataset 5 | /// 6 | /// The dataset consists of 9 columns and 768 rows 7 | /// 8 | /// The target column is `Outcome` which contains either `1` or `0` 9 | /// 10 | /// The dataset is good for training classification models 11 | /// 12 | /// Deprecated, use synchronous [getPimaIndiansDiabetesDataFrame] instead 13 | @deprecated 14 | Future loadPimaIndiansDiabetesDataset() { 15 | return Future.value(getPimaIndiansDiabetesDataFrame()); 16 | } 17 | -------------------------------------------------------------------------------- /lib/src/data_frame/factories/prefilled_dataframes/get_iris_data_frame.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 2 | 3 | /// Returns a [DataFrame] instance filled with [Iris](https://www.kaggle.com/datasets/uciml/iris) dataset 4 | /// 5 | /// The dataset consists of 6 columns and 150 rows. The data is divided into 3 6 | /// classes - `Iris-setosa`, `Iris-versicolor` and `Iris-virginica` 7 | /// 8 | /// The target column is `Species` 9 | /// 10 | /// The dataset is good for training classification models 11 | DataFrame getIrisDataFrame() { 12 | return DataFrame.fromRawCsv(''' 13 | Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species 14 | 1,5.1,3.5,1.4,0.2,Iris-setosa 15 | 2,4.9,3.0,1.4,0.2,Iris-setosa 16 | 3,4.7,3.2,1.3,0.2,Iris-setosa 17 | 4,4.6,3.1,1.5,0.2,Iris-setosa 18 | 5,5.0,3.6,1.4,0.2,Iris-setosa 19 | 6,5.4,3.9,1.7,0.4,Iris-setosa 20 | 7,4.6,3.4,1.4,0.3,Iris-setosa 21 | 8,5.0,3.4,1.5,0.2,Iris-setosa 22 | 9,4.4,2.9,1.4,0.2,Iris-setosa 23 | 10,4.9,3.1,1.5,0.1,Iris-setosa 24 | 11,5.4,3.7,1.5,0.2,Iris-setosa 25 | 12,4.8,3.4,1.6,0.2,Iris-setosa 26 | 13,4.8,3.0,1.4,0.1,Iris-setosa 27 | 14,4.3,3.0,1.1,0.1,Iris-setosa 28 | 15,5.8,4.0,1.2,0.2,Iris-setosa 29 | 16,5.7,4.4,1.5,0.4,Iris-setosa 30 | 17,5.4,3.9,1.3,0.4,Iris-setosa 31 | 18,5.1,3.5,1.4,0.3,Iris-setosa 32 | 19,5.7,3.8,1.7,0.3,Iris-setosa 33 | 20,5.1,3.8,1.5,0.3,Iris-setosa 34 | 21,5.4,3.4,1.7,0.2,Iris-setosa 35 | 22,5.1,3.7,1.5,0.4,Iris-setosa 36 | 23,4.6,3.6,1.0,0.2,Iris-setosa 37 | 24,5.1,3.3,1.7,0.5,Iris-setosa 38 | 25,4.8,3.4,1.9,0.2,Iris-setosa 39 | 26,5.0,3.0,1.6,0.2,Iris-setosa 40 | 27,5.0,3.4,1.6,0.4,Iris-setosa 41 | 28,5.2,3.5,1.5,0.2,Iris-setosa 42 | 29,5.2,3.4,1.4,0.2,Iris-setosa 43 | 30,4.7,3.2,1.6,0.2,Iris-setosa 44 | 31,4.8,3.1,1.6,0.2,Iris-setosa 45 | 32,5.4,3.4,1.5,0.4,Iris-setosa 46 | 33,5.2,4.1,1.5,0.1,Iris-setosa 47 | 34,5.5,4.2,1.4,0.2,Iris-setosa 48 | 35,4.9,3.1,1.5,0.1,Iris-setosa 49 | 36,5.0,3.2,1.2,0.2,Iris-setosa 50 | 37,5.5,3.5,1.3,0.2,Iris-setosa 51 | 38,4.9,3.1,1.5,0.1,Iris-setosa 52 | 39,4.4,3.0,1.3,0.2,Iris-setosa 53 | 40,5.1,3.4,1.5,0.2,Iris-setosa 54 | 41,5.0,3.5,1.3,0.3,Iris-setosa 55 | 42,4.5,2.3,1.3,0.3,Iris-setosa 56 | 43,4.4,3.2,1.3,0.2,Iris-setosa 57 | 44,5.0,3.5,1.6,0.6,Iris-setosa 58 | 45,5.1,3.8,1.9,0.4,Iris-setosa 59 | 46,4.8,3.0,1.4,0.3,Iris-setosa 60 | 47,5.1,3.8,1.6,0.2,Iris-setosa 61 | 48,4.6,3.2,1.4,0.2,Iris-setosa 62 | 49,5.3,3.7,1.5,0.2,Iris-setosa 63 | 50,5.0,3.3,1.4,0.2,Iris-setosa 64 | 51,7.0,3.2,4.7,1.4,Iris-versicolor 65 | 52,6.4,3.2,4.5,1.5,Iris-versicolor 66 | 53,6.9,3.1,4.9,1.5,Iris-versicolor 67 | 54,5.5,2.3,4.0,1.3,Iris-versicolor 68 | 55,6.5,2.8,4.6,1.5,Iris-versicolor 69 | 56,5.7,2.8,4.5,1.3,Iris-versicolor 70 | 57,6.3,3.3,4.7,1.6,Iris-versicolor 71 | 58,4.9,2.4,3.3,1.0,Iris-versicolor 72 | 59,6.6,2.9,4.6,1.3,Iris-versicolor 73 | 60,5.2,2.7,3.9,1.4,Iris-versicolor 74 | 61,5.0,2.0,3.5,1.0,Iris-versicolor 75 | 62,5.9,3.0,4.2,1.5,Iris-versicolor 76 | 63,6.0,2.2,4.0,1.0,Iris-versicolor 77 | 64,6.1,2.9,4.7,1.4,Iris-versicolor 78 | 65,5.6,2.9,3.6,1.3,Iris-versicolor 79 | 66,6.7,3.1,4.4,1.4,Iris-versicolor 80 | 67,5.6,3.0,4.5,1.5,Iris-versicolor 81 | 68,5.8,2.7,4.1,1.0,Iris-versicolor 82 | 69,6.2,2.2,4.5,1.5,Iris-versicolor 83 | 70,5.6,2.5,3.9,1.1,Iris-versicolor 84 | 71,5.9,3.2,4.8,1.8,Iris-versicolor 85 | 72,6.1,2.8,4.0,1.3,Iris-versicolor 86 | 73,6.3,2.5,4.9,1.5,Iris-versicolor 87 | 74,6.1,2.8,4.7,1.2,Iris-versicolor 88 | 75,6.4,2.9,4.3,1.3,Iris-versicolor 89 | 76,6.6,3.0,4.4,1.4,Iris-versicolor 90 | 77,6.8,2.8,4.8,1.4,Iris-versicolor 91 | 78,6.7,3.0,5.0,1.7,Iris-versicolor 92 | 79,6.0,2.9,4.5,1.5,Iris-versicolor 93 | 80,5.7,2.6,3.5,1.0,Iris-versicolor 94 | 81,5.5,2.4,3.8,1.1,Iris-versicolor 95 | 82,5.5,2.4,3.7,1.0,Iris-versicolor 96 | 83,5.8,2.7,3.9,1.2,Iris-versicolor 97 | 84,6.0,2.7,5.1,1.6,Iris-versicolor 98 | 85,5.4,3.0,4.5,1.5,Iris-versicolor 99 | 86,6.0,3.4,4.5,1.6,Iris-versicolor 100 | 87,6.7,3.1,4.7,1.5,Iris-versicolor 101 | 88,6.3,2.3,4.4,1.3,Iris-versicolor 102 | 89,5.6,3.0,4.1,1.3,Iris-versicolor 103 | 90,5.5,2.5,4.0,1.3,Iris-versicolor 104 | 91,5.5,2.6,4.4,1.2,Iris-versicolor 105 | 92,6.1,3.0,4.6,1.4,Iris-versicolor 106 | 93,5.8,2.6,4.0,1.2,Iris-versicolor 107 | 94,5.0,2.3,3.3,1.0,Iris-versicolor 108 | 95,5.6,2.7,4.2,1.3,Iris-versicolor 109 | 96,5.7,3.0,4.2,1.2,Iris-versicolor 110 | 97,5.7,2.9,4.2,1.3,Iris-versicolor 111 | 98,6.2,2.9,4.3,1.3,Iris-versicolor 112 | 99,5.1,2.5,3.0,1.1,Iris-versicolor 113 | 100,5.7,2.8,4.1,1.3,Iris-versicolor 114 | 101,6.3,3.3,6.0,2.5,Iris-virginica 115 | 102,5.8,2.7,5.1,1.9,Iris-virginica 116 | 103,7.1,3.0,5.9,2.1,Iris-virginica 117 | 104,6.3,2.9,5.6,1.8,Iris-virginica 118 | 105,6.5,3.0,5.8,2.2,Iris-virginica 119 | 106,7.6,3.0,6.6,2.1,Iris-virginica 120 | 107,4.9,2.5,4.5,1.7,Iris-virginica 121 | 108,7.3,2.9,6.3,1.8,Iris-virginica 122 | 109,6.7,2.5,5.8,1.8,Iris-virginica 123 | 110,7.2,3.6,6.1,2.5,Iris-virginica 124 | 111,6.5,3.2,5.1,2.0,Iris-virginica 125 | 112,6.4,2.7,5.3,1.9,Iris-virginica 126 | 113,6.8,3.0,5.5,2.1,Iris-virginica 127 | 114,5.7,2.5,5.0,2.0,Iris-virginica 128 | 115,5.8,2.8,5.1,2.4,Iris-virginica 129 | 116,6.4,3.2,5.3,2.3,Iris-virginica 130 | 117,6.5,3.0,5.5,1.8,Iris-virginica 131 | 118,7.7,3.8,6.7,2.2,Iris-virginica 132 | 119,7.7,2.6,6.9,2.3,Iris-virginica 133 | 120,6.0,2.2,5.0,1.5,Iris-virginica 134 | 121,6.9,3.2,5.7,2.3,Iris-virginica 135 | 122,5.6,2.8,4.9,2.0,Iris-virginica 136 | 123,7.7,2.8,6.7,2.0,Iris-virginica 137 | 124,6.3,2.7,4.9,1.8,Iris-virginica 138 | 125,6.7,3.3,5.7,2.1,Iris-virginica 139 | 126,7.2,3.2,6.0,1.8,Iris-virginica 140 | 127,6.2,2.8,4.8,1.8,Iris-virginica 141 | 128,6.1,3.0,4.9,1.8,Iris-virginica 142 | 129,6.4,2.8,5.6,2.1,Iris-virginica 143 | 130,7.2,3.0,5.8,1.6,Iris-virginica 144 | 131,7.4,2.8,6.1,1.9,Iris-virginica 145 | 132,7.9,3.8,6.4,2.0,Iris-virginica 146 | 133,6.4,2.8,5.6,2.2,Iris-virginica 147 | 134,6.3,2.8,5.1,1.5,Iris-virginica 148 | 135,6.1,2.6,5.6,1.4,Iris-virginica 149 | 136,7.7,3.0,6.1,2.3,Iris-virginica 150 | 137,6.3,3.4,5.6,2.4,Iris-virginica 151 | 138,6.4,3.1,5.5,1.8,Iris-virginica 152 | 139,6.0,3.0,4.8,1.8,Iris-virginica 153 | 140,6.9,3.1,5.4,2.1,Iris-virginica 154 | 141,6.7,3.1,5.6,2.4,Iris-virginica 155 | 142,6.9,3.1,5.1,2.3,Iris-virginica 156 | 143,5.8,2.7,5.1,1.9,Iris-virginica 157 | 144,6.8,3.2,5.9,2.3,Iris-virginica 158 | 145,6.7,3.3,5.7,2.5,Iris-virginica 159 | 146,6.7,3.0,5.2,2.3,Iris-virginica 160 | 147,6.3,2.5,5.0,1.9,Iris-virginica 161 | 148,6.5,3.0,5.2,2.0,Iris-virginica 162 | 149,6.2,3.4,5.4,2.3,Iris-virginica 163 | 150,5.9,3.0,5.1,1.8,Iris-virginica 164 | '''); 165 | } 166 | -------------------------------------------------------------------------------- /lib/src/data_frame/factories/prefilled_dataframes/get_pima_indians_diabetes_data_frame.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 2 | 3 | /// Returns a [DataFrame] instance filled with [Pima Indians diabetes](https://www.kaggle.com/datasets/uciml/pima-indians-diabetes-database) dataset 4 | /// 5 | /// The dataset consists of 9 columns and 768 rows 6 | /// 7 | /// The target column is `Outcome` which contains either `1` or `0` 8 | /// 9 | /// The dataset is good for training classification models 10 | DataFrame getPimaIndiansDiabetesDataFrame() { 11 | return DataFrame.fromRawCsv(''' 12 | Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome 13 | 6,148,72,35,0,33.6,0.627,50,1 14 | 1,85,66,29,0,26.6,0.351,31,0 15 | 8,183,64,0,0,23.3,0.672,32,1 16 | 1,89,66,23,94,28.1,0.167,21,0 17 | 0,137,40,35,168,43.1,2.288,33,1 18 | 5,116,74,0,0,25.6,0.201,30,0 19 | 3,78,50,32,88,31,0.248,26,1 20 | 10,115,0,0,0,35.3,0.134,29,0 21 | 2,197,70,45,543,30.5,0.158,53,1 22 | 8,125,96,0,0,0,0.232,54,1 23 | 4,110,92,0,0,37.6,0.191,30,0 24 | 10,168,74,0,0,38,0.537,34,1 25 | 10,139,80,0,0,27.1,1.441,57,0 26 | 1,189,60,23,846,30.1,0.398,59,1 27 | 5,166,72,19,175,25.8,0.587,51,1 28 | 7,100,0,0,0,30,0.484,32,1 29 | 0,118,84,47,230,45.8,0.551,31,1 30 | 7,107,74,0,0,29.6,0.254,31,1 31 | 1,103,30,38,83,43.3,0.183,33,0 32 | 1,115,70,30,96,34.6,0.529,32,1 33 | 3,126,88,41,235,39.3,0.704,27,0 34 | 8,99,84,0,0,35.4,0.388,50,0 35 | 7,196,90,0,0,39.8,0.451,41,1 36 | 9,119,80,35,0,29,0.263,29,1 37 | 11,143,94,33,146,36.6,0.254,51,1 38 | 10,125,70,26,115,31.1,0.205,41,1 39 | 7,147,76,0,0,39.4,0.257,43,1 40 | 1,97,66,15,140,23.2,0.487,22,0 41 | 13,145,82,19,110,22.2,0.245,57,0 42 | 5,117,92,0,0,34.1,0.337,38,0 43 | 5,109,75,26,0,36,0.546,60,0 44 | 3,158,76,36,245,31.6,0.851,28,1 45 | 3,88,58,11,54,24.8,0.267,22,0 46 | 6,92,92,0,0,19.9,0.188,28,0 47 | 10,122,78,31,0,27.6,0.512,45,0 48 | 4,103,60,33,192,24,0.966,33,0 49 | 11,138,76,0,0,33.2,0.42,35,0 50 | 9,102,76,37,0,32.9,0.665,46,1 51 | 2,90,68,42,0,38.2,0.503,27,1 52 | 4,111,72,47,207,37.1,1.39,56,1 53 | 3,180,64,25,70,34,0.271,26,0 54 | 7,133,84,0,0,40.2,0.696,37,0 55 | 7,106,92,18,0,22.7,0.235,48,0 56 | 9,171,110,24,240,45.4,0.721,54,1 57 | 7,159,64,0,0,27.4,0.294,40,0 58 | 0,180,66,39,0,42,1.893,25,1 59 | 1,146,56,0,0,29.7,0.564,29,0 60 | 2,71,70,27,0,28,0.586,22,0 61 | 7,103,66,32,0,39.1,0.344,31,1 62 | 7,105,0,0,0,0,0.305,24,0 63 | 1,103,80,11,82,19.4,0.491,22,0 64 | 1,101,50,15,36,24.2,0.526,26,0 65 | 5,88,66,21,23,24.4,0.342,30,0 66 | 8,176,90,34,300,33.7,0.467,58,1 67 | 7,150,66,42,342,34.7,0.718,42,0 68 | 1,73,50,10,0,23,0.248,21,0 69 | 7,187,68,39,304,37.7,0.254,41,1 70 | 0,100,88,60,110,46.8,0.962,31,0 71 | 0,146,82,0,0,40.5,1.781,44,0 72 | 0,105,64,41,142,41.5,0.173,22,0 73 | 2,84,0,0,0,0,0.304,21,0 74 | 8,133,72,0,0,32.9,0.27,39,1 75 | 5,44,62,0,0,25,0.587,36,0 76 | 2,141,58,34,128,25.4,0.699,24,0 77 | 7,114,66,0,0,32.8,0.258,42,1 78 | 5,99,74,27,0,29,0.203,32,0 79 | 0,109,88,30,0,32.5,0.855,38,1 80 | 2,109,92,0,0,42.7,0.845,54,0 81 | 1,95,66,13,38,19.6,0.334,25,0 82 | 4,146,85,27,100,28.9,0.189,27,0 83 | 2,100,66,20,90,32.9,0.867,28,1 84 | 5,139,64,35,140,28.6,0.411,26,0 85 | 13,126,90,0,0,43.4,0.583,42,1 86 | 4,129,86,20,270,35.1,0.231,23,0 87 | 1,79,75,30,0,32,0.396,22,0 88 | 1,0,48,20,0,24.7,0.14,22,0 89 | 7,62,78,0,0,32.6,0.391,41,0 90 | 5,95,72,33,0,37.7,0.37,27,0 91 | 0,131,0,0,0,43.2,0.27,26,1 92 | 2,112,66,22,0,25,0.307,24,0 93 | 3,113,44,13,0,22.4,0.14,22,0 94 | 2,74,0,0,0,0,0.102,22,0 95 | 7,83,78,26,71,29.3,0.767,36,0 96 | 0,101,65,28,0,24.6,0.237,22,0 97 | 5,137,108,0,0,48.8,0.227,37,1 98 | 2,110,74,29,125,32.4,0.698,27,0 99 | 13,106,72,54,0,36.6,0.178,45,0 100 | 2,100,68,25,71,38.5,0.324,26,0 101 | 15,136,70,32,110,37.1,0.153,43,1 102 | 1,107,68,19,0,26.5,0.165,24,0 103 | 1,80,55,0,0,19.1,0.258,21,0 104 | 4,123,80,15,176,32,0.443,34,0 105 | 7,81,78,40,48,46.7,0.261,42,0 106 | 4,134,72,0,0,23.8,0.277,60,1 107 | 2,142,82,18,64,24.7,0.761,21,0 108 | 6,144,72,27,228,33.9,0.255,40,0 109 | 2,92,62,28,0,31.6,0.13,24,0 110 | 1,71,48,18,76,20.4,0.323,22,0 111 | 6,93,50,30,64,28.7,0.356,23,0 112 | 1,122,90,51,220,49.7,0.325,31,1 113 | 1,163,72,0,0,39,1.222,33,1 114 | 1,151,60,0,0,26.1,0.179,22,0 115 | 0,125,96,0,0,22.5,0.262,21,0 116 | 1,81,72,18,40,26.6,0.283,24,0 117 | 2,85,65,0,0,39.6,0.93,27,0 118 | 1,126,56,29,152,28.7,0.801,21,0 119 | 1,96,122,0,0,22.4,0.207,27,0 120 | 4,144,58,28,140,29.5,0.287,37,0 121 | 3,83,58,31,18,34.3,0.336,25,0 122 | 0,95,85,25,36,37.4,0.247,24,1 123 | 3,171,72,33,135,33.3,0.199,24,1 124 | 8,155,62,26,495,34,0.543,46,1 125 | 1,89,76,34,37,31.2,0.192,23,0 126 | 4,76,62,0,0,34,0.391,25,0 127 | 7,160,54,32,175,30.5,0.588,39,1 128 | 4,146,92,0,0,31.2,0.539,61,1 129 | 5,124,74,0,0,34,0.22,38,1 130 | 5,78,48,0,0,33.7,0.654,25,0 131 | 4,97,60,23,0,28.2,0.443,22,0 132 | 4,99,76,15,51,23.2,0.223,21,0 133 | 0,162,76,56,100,53.2,0.759,25,1 134 | 6,111,64,39,0,34.2,0.26,24,0 135 | 2,107,74,30,100,33.6,0.404,23,0 136 | 5,132,80,0,0,26.8,0.186,69,0 137 | 0,113,76,0,0,33.3,0.278,23,1 138 | 1,88,30,42,99,55,0.496,26,1 139 | 3,120,70,30,135,42.9,0.452,30,0 140 | 1,118,58,36,94,33.3,0.261,23,0 141 | 1,117,88,24,145,34.5,0.403,40,1 142 | 0,105,84,0,0,27.9,0.741,62,1 143 | 4,173,70,14,168,29.7,0.361,33,1 144 | 9,122,56,0,0,33.3,1.114,33,1 145 | 3,170,64,37,225,34.5,0.356,30,1 146 | 8,84,74,31,0,38.3,0.457,39,0 147 | 2,96,68,13,49,21.1,0.647,26,0 148 | 2,125,60,20,140,33.8,0.088,31,0 149 | 0,100,70,26,50,30.8,0.597,21,0 150 | 0,93,60,25,92,28.7,0.532,22,0 151 | 0,129,80,0,0,31.2,0.703,29,0 152 | 5,105,72,29,325,36.9,0.159,28,0 153 | 3,128,78,0,0,21.1,0.268,55,0 154 | 5,106,82,30,0,39.5,0.286,38,0 155 | 2,108,52,26,63,32.5,0.318,22,0 156 | 10,108,66,0,0,32.4,0.272,42,1 157 | 4,154,62,31,284,32.8,0.237,23,0 158 | 0,102,75,23,0,0,0.572,21,0 159 | 9,57,80,37,0,32.8,0.096,41,0 160 | 2,106,64,35,119,30.5,1.4,34,0 161 | 5,147,78,0,0,33.7,0.218,65,0 162 | 2,90,70,17,0,27.3,0.085,22,0 163 | 1,136,74,50,204,37.4,0.399,24,0 164 | 4,114,65,0,0,21.9,0.432,37,0 165 | 9,156,86,28,155,34.3,1.189,42,1 166 | 1,153,82,42,485,40.6,0.687,23,0 167 | 8,188,78,0,0,47.9,0.137,43,1 168 | 7,152,88,44,0,50,0.337,36,1 169 | 2,99,52,15,94,24.6,0.637,21,0 170 | 1,109,56,21,135,25.2,0.833,23,0 171 | 2,88,74,19,53,29,0.229,22,0 172 | 17,163,72,41,114,40.9,0.817,47,1 173 | 4,151,90,38,0,29.7,0.294,36,0 174 | 7,102,74,40,105,37.2,0.204,45,0 175 | 0,114,80,34,285,44.2,0.167,27,0 176 | 2,100,64,23,0,29.7,0.368,21,0 177 | 0,131,88,0,0,31.6,0.743,32,1 178 | 6,104,74,18,156,29.9,0.722,41,1 179 | 3,148,66,25,0,32.5,0.256,22,0 180 | 4,120,68,0,0,29.6,0.709,34,0 181 | 4,110,66,0,0,31.9,0.471,29,0 182 | 3,111,90,12,78,28.4,0.495,29,0 183 | 6,102,82,0,0,30.8,0.18,36,1 184 | 6,134,70,23,130,35.4,0.542,29,1 185 | 2,87,0,23,0,28.9,0.773,25,0 186 | 1,79,60,42,48,43.5,0.678,23,0 187 | 2,75,64,24,55,29.7,0.37,33,0 188 | 8,179,72,42,130,32.7,0.719,36,1 189 | 6,85,78,0,0,31.2,0.382,42,0 190 | 0,129,110,46,130,67.1,0.319,26,1 191 | 5,143,78,0,0,45,0.19,47,0 192 | 5,130,82,0,0,39.1,0.956,37,1 193 | 6,87,80,0,0,23.2,0.084,32,0 194 | 0,119,64,18,92,34.9,0.725,23,0 195 | 1,0,74,20,23,27.7,0.299,21,0 196 | 5,73,60,0,0,26.8,0.268,27,0 197 | 4,141,74,0,0,27.6,0.244,40,0 198 | 7,194,68,28,0,35.9,0.745,41,1 199 | 8,181,68,36,495,30.1,0.615,60,1 200 | 1,128,98,41,58,32,1.321,33,1 201 | 8,109,76,39,114,27.9,0.64,31,1 202 | 5,139,80,35,160,31.6,0.361,25,1 203 | 3,111,62,0,0,22.6,0.142,21,0 204 | 9,123,70,44,94,33.1,0.374,40,0 205 | 7,159,66,0,0,30.4,0.383,36,1 206 | 11,135,0,0,0,52.3,0.578,40,1 207 | 8,85,55,20,0,24.4,0.136,42,0 208 | 5,158,84,41,210,39.4,0.395,29,1 209 | 1,105,58,0,0,24.3,0.187,21,0 210 | 3,107,62,13,48,22.9,0.678,23,1 211 | 4,109,64,44,99,34.8,0.905,26,1 212 | 4,148,60,27,318,30.9,0.15,29,1 213 | 0,113,80,16,0,31,0.874,21,0 214 | 1,138,82,0,0,40.1,0.236,28,0 215 | 0,108,68,20,0,27.3,0.787,32,0 216 | 2,99,70,16,44,20.4,0.235,27,0 217 | 6,103,72,32,190,37.7,0.324,55,0 218 | 5,111,72,28,0,23.9,0.407,27,0 219 | 8,196,76,29,280,37.5,0.605,57,1 220 | 5,162,104,0,0,37.7,0.151,52,1 221 | 1,96,64,27,87,33.2,0.289,21,0 222 | 7,184,84,33,0,35.5,0.355,41,1 223 | 2,81,60,22,0,27.7,0.29,25,0 224 | 0,147,85,54,0,42.8,0.375,24,0 225 | 7,179,95,31,0,34.2,0.164,60,0 226 | 0,140,65,26,130,42.6,0.431,24,1 227 | 9,112,82,32,175,34.2,0.26,36,1 228 | 12,151,70,40,271,41.8,0.742,38,1 229 | 5,109,62,41,129,35.8,0.514,25,1 230 | 6,125,68,30,120,30,0.464,32,0 231 | 5,85,74,22,0,29,1.224,32,1 232 | 5,112,66,0,0,37.8,0.261,41,1 233 | 0,177,60,29,478,34.6,1.072,21,1 234 | 2,158,90,0,0,31.6,0.805,66,1 235 | 7,119,0,0,0,25.2,0.209,37,0 236 | 7,142,60,33,190,28.8,0.687,61,0 237 | 1,100,66,15,56,23.6,0.666,26,0 238 | 1,87,78,27,32,34.6,0.101,22,0 239 | 0,101,76,0,0,35.7,0.198,26,0 240 | 3,162,52,38,0,37.2,0.652,24,1 241 | 4,197,70,39,744,36.7,2.329,31,0 242 | 0,117,80,31,53,45.2,0.089,24,0 243 | 4,142,86,0,0,44,0.645,22,1 244 | 6,134,80,37,370,46.2,0.238,46,1 245 | 1,79,80,25,37,25.4,0.583,22,0 246 | 4,122,68,0,0,35,0.394,29,0 247 | 3,74,68,28,45,29.7,0.293,23,0 248 | 4,171,72,0,0,43.6,0.479,26,1 249 | 7,181,84,21,192,35.9,0.586,51,1 250 | 0,179,90,27,0,44.1,0.686,23,1 251 | 9,164,84,21,0,30.8,0.831,32,1 252 | 0,104,76,0,0,18.4,0.582,27,0 253 | 1,91,64,24,0,29.2,0.192,21,0 254 | 4,91,70,32,88,33.1,0.446,22,0 255 | 3,139,54,0,0,25.6,0.402,22,1 256 | 6,119,50,22,176,27.1,1.318,33,1 257 | 2,146,76,35,194,38.2,0.329,29,0 258 | 9,184,85,15,0,30,1.213,49,1 259 | 10,122,68,0,0,31.2,0.258,41,0 260 | 0,165,90,33,680,52.3,0.427,23,0 261 | 9,124,70,33,402,35.4,0.282,34,0 262 | 1,111,86,19,0,30.1,0.143,23,0 263 | 9,106,52,0,0,31.2,0.38,42,0 264 | 2,129,84,0,0,28,0.284,27,0 265 | 2,90,80,14,55,24.4,0.249,24,0 266 | 0,86,68,32,0,35.8,0.238,25,0 267 | 12,92,62,7,258,27.6,0.926,44,1 268 | 1,113,64,35,0,33.6,0.543,21,1 269 | 3,111,56,39,0,30.1,0.557,30,0 270 | 2,114,68,22,0,28.7,0.092,25,0 271 | 1,193,50,16,375,25.9,0.655,24,0 272 | 11,155,76,28,150,33.3,1.353,51,1 273 | 3,191,68,15,130,30.9,0.299,34,0 274 | 3,141,0,0,0,30,0.761,27,1 275 | 4,95,70,32,0,32.1,0.612,24,0 276 | 3,142,80,15,0,32.4,0.2,63,0 277 | 4,123,62,0,0,32,0.226,35,1 278 | 5,96,74,18,67,33.6,0.997,43,0 279 | 0,138,0,0,0,36.3,0.933,25,1 280 | 2,128,64,42,0,40,1.101,24,0 281 | 0,102,52,0,0,25.1,0.078,21,0 282 | 2,146,0,0,0,27.5,0.24,28,1 283 | 10,101,86,37,0,45.6,1.136,38,1 284 | 2,108,62,32,56,25.2,0.128,21,0 285 | 3,122,78,0,0,23,0.254,40,0 286 | 1,71,78,50,45,33.2,0.422,21,0 287 | 13,106,70,0,0,34.2,0.251,52,0 288 | 2,100,70,52,57,40.5,0.677,25,0 289 | 7,106,60,24,0,26.5,0.296,29,1 290 | 0,104,64,23,116,27.8,0.454,23,0 291 | 5,114,74,0,0,24.9,0.744,57,0 292 | 2,108,62,10,278,25.3,0.881,22,0 293 | 0,146,70,0,0,37.9,0.334,28,1 294 | 10,129,76,28,122,35.9,0.28,39,0 295 | 7,133,88,15,155,32.4,0.262,37,0 296 | 7,161,86,0,0,30.4,0.165,47,1 297 | 2,108,80,0,0,27,0.259,52,1 298 | 7,136,74,26,135,26,0.647,51,0 299 | 5,155,84,44,545,38.7,0.619,34,0 300 | 1,119,86,39,220,45.6,0.808,29,1 301 | 4,96,56,17,49,20.8,0.34,26,0 302 | 5,108,72,43,75,36.1,0.263,33,0 303 | 0,78,88,29,40,36.9,0.434,21,0 304 | 0,107,62,30,74,36.6,0.757,25,1 305 | 2,128,78,37,182,43.3,1.224,31,1 306 | 1,128,48,45,194,40.5,0.613,24,1 307 | 0,161,50,0,0,21.9,0.254,65,0 308 | 6,151,62,31,120,35.5,0.692,28,0 309 | 2,146,70,38,360,28,0.337,29,1 310 | 0,126,84,29,215,30.7,0.52,24,0 311 | 14,100,78,25,184,36.6,0.412,46,1 312 | 8,112,72,0,0,23.6,0.84,58,0 313 | 0,167,0,0,0,32.3,0.839,30,1 314 | 2,144,58,33,135,31.6,0.422,25,1 315 | 5,77,82,41,42,35.8,0.156,35,0 316 | 5,115,98,0,0,52.9,0.209,28,1 317 | 3,150,76,0,0,21,0.207,37,0 318 | 2,120,76,37,105,39.7,0.215,29,0 319 | 10,161,68,23,132,25.5,0.326,47,1 320 | 0,137,68,14,148,24.8,0.143,21,0 321 | 0,128,68,19,180,30.5,1.391,25,1 322 | 2,124,68,28,205,32.9,0.875,30,1 323 | 6,80,66,30,0,26.2,0.313,41,0 324 | 0,106,70,37,148,39.4,0.605,22,0 325 | 2,155,74,17,96,26.6,0.433,27,1 326 | 3,113,50,10,85,29.5,0.626,25,0 327 | 7,109,80,31,0,35.9,1.127,43,1 328 | 2,112,68,22,94,34.1,0.315,26,0 329 | 3,99,80,11,64,19.3,0.284,30,0 330 | 3,182,74,0,0,30.5,0.345,29,1 331 | 3,115,66,39,140,38.1,0.15,28,0 332 | 6,194,78,0,0,23.5,0.129,59,1 333 | 4,129,60,12,231,27.5,0.527,31,0 334 | 3,112,74,30,0,31.6,0.197,25,1 335 | 0,124,70,20,0,27.4,0.254,36,1 336 | 13,152,90,33,29,26.8,0.731,43,1 337 | 2,112,75,32,0,35.7,0.148,21,0 338 | 1,157,72,21,168,25.6,0.123,24,0 339 | 1,122,64,32,156,35.1,0.692,30,1 340 | 10,179,70,0,0,35.1,0.2,37,0 341 | 2,102,86,36,120,45.5,0.127,23,1 342 | 6,105,70,32,68,30.8,0.122,37,0 343 | 8,118,72,19,0,23.1,1.476,46,0 344 | 2,87,58,16,52,32.7,0.166,25,0 345 | 1,180,0,0,0,43.3,0.282,41,1 346 | 12,106,80,0,0,23.6,0.137,44,0 347 | 1,95,60,18,58,23.9,0.26,22,0 348 | 0,165,76,43,255,47.9,0.259,26,0 349 | 0,117,0,0,0,33.8,0.932,44,0 350 | 5,115,76,0,0,31.2,0.343,44,1 351 | 9,152,78,34,171,34.2,0.893,33,1 352 | 7,178,84,0,0,39.9,0.331,41,1 353 | 1,130,70,13,105,25.9,0.472,22,0 354 | 1,95,74,21,73,25.9,0.673,36,0 355 | 1,0,68,35,0,32,0.389,22,0 356 | 5,122,86,0,0,34.7,0.29,33,0 357 | 8,95,72,0,0,36.8,0.485,57,0 358 | 8,126,88,36,108,38.5,0.349,49,0 359 | 1,139,46,19,83,28.7,0.654,22,0 360 | 3,116,0,0,0,23.5,0.187,23,0 361 | 3,99,62,19,74,21.8,0.279,26,0 362 | 5,0,80,32,0,41,0.346,37,1 363 | 4,92,80,0,0,42.2,0.237,29,0 364 | 4,137,84,0,0,31.2,0.252,30,0 365 | 3,61,82,28,0,34.4,0.243,46,0 366 | 1,90,62,12,43,27.2,0.58,24,0 367 | 3,90,78,0,0,42.7,0.559,21,0 368 | 9,165,88,0,0,30.4,0.302,49,1 369 | 1,125,50,40,167,33.3,0.962,28,1 370 | 13,129,0,30,0,39.9,0.569,44,1 371 | 12,88,74,40,54,35.3,0.378,48,0 372 | 1,196,76,36,249,36.5,0.875,29,1 373 | 5,189,64,33,325,31.2,0.583,29,1 374 | 5,158,70,0,0,29.8,0.207,63,0 375 | 5,103,108,37,0,39.2,0.305,65,0 376 | 4,146,78,0,0,38.5,0.52,67,1 377 | 4,147,74,25,293,34.9,0.385,30,0 378 | 5,99,54,28,83,34,0.499,30,0 379 | 6,124,72,0,0,27.6,0.368,29,1 380 | 0,101,64,17,0,21,0.252,21,0 381 | 3,81,86,16,66,27.5,0.306,22,0 382 | 1,133,102,28,140,32.8,0.234,45,1 383 | 3,173,82,48,465,38.4,2.137,25,1 384 | 0,118,64,23,89,0,1.731,21,0 385 | 0,84,64,22,66,35.8,0.545,21,0 386 | 2,105,58,40,94,34.9,0.225,25,0 387 | 2,122,52,43,158,36.2,0.816,28,0 388 | 12,140,82,43,325,39.2,0.528,58,1 389 | 0,98,82,15,84,25.2,0.299,22,0 390 | 1,87,60,37,75,37.2,0.509,22,0 391 | 4,156,75,0,0,48.3,0.238,32,1 392 | 0,93,100,39,72,43.4,1.021,35,0 393 | 1,107,72,30,82,30.8,0.821,24,0 394 | 0,105,68,22,0,20,0.236,22,0 395 | 1,109,60,8,182,25.4,0.947,21,0 396 | 1,90,62,18,59,25.1,1.268,25,0 397 | 1,125,70,24,110,24.3,0.221,25,0 398 | 1,119,54,13,50,22.3,0.205,24,0 399 | 5,116,74,29,0,32.3,0.66,35,1 400 | 8,105,100,36,0,43.3,0.239,45,1 401 | 5,144,82,26,285,32,0.452,58,1 402 | 3,100,68,23,81,31.6,0.949,28,0 403 | 1,100,66,29,196,32,0.444,42,0 404 | 5,166,76,0,0,45.7,0.34,27,1 405 | 1,131,64,14,415,23.7,0.389,21,0 406 | 4,116,72,12,87,22.1,0.463,37,0 407 | 4,158,78,0,0,32.9,0.803,31,1 408 | 2,127,58,24,275,27.7,1.6,25,0 409 | 3,96,56,34,115,24.7,0.944,39,0 410 | 0,131,66,40,0,34.3,0.196,22,1 411 | 3,82,70,0,0,21.1,0.389,25,0 412 | 3,193,70,31,0,34.9,0.241,25,1 413 | 4,95,64,0,0,32,0.161,31,1 414 | 6,137,61,0,0,24.2,0.151,55,0 415 | 5,136,84,41,88,35,0.286,35,1 416 | 9,72,78,25,0,31.6,0.28,38,0 417 | 5,168,64,0,0,32.9,0.135,41,1 418 | 2,123,48,32,165,42.1,0.52,26,0 419 | 4,115,72,0,0,28.9,0.376,46,1 420 | 0,101,62,0,0,21.9,0.336,25,0 421 | 8,197,74,0,0,25.9,1.191,39,1 422 | 1,172,68,49,579,42.4,0.702,28,1 423 | 6,102,90,39,0,35.7,0.674,28,0 424 | 1,112,72,30,176,34.4,0.528,25,0 425 | 1,143,84,23,310,42.4,1.076,22,0 426 | 1,143,74,22,61,26.2,0.256,21,0 427 | 0,138,60,35,167,34.6,0.534,21,1 428 | 3,173,84,33,474,35.7,0.258,22,1 429 | 1,97,68,21,0,27.2,1.095,22,0 430 | 4,144,82,32,0,38.5,0.554,37,1 431 | 1,83,68,0,0,18.2,0.624,27,0 432 | 3,129,64,29,115,26.4,0.219,28,1 433 | 1,119,88,41,170,45.3,0.507,26,0 434 | 2,94,68,18,76,26,0.561,21,0 435 | 0,102,64,46,78,40.6,0.496,21,0 436 | 2,115,64,22,0,30.8,0.421,21,0 437 | 8,151,78,32,210,42.9,0.516,36,1 438 | 4,184,78,39,277,37,0.264,31,1 439 | 0,94,0,0,0,0,0.256,25,0 440 | 1,181,64,30,180,34.1,0.328,38,1 441 | 0,135,94,46,145,40.6,0.284,26,0 442 | 1,95,82,25,180,35,0.233,43,1 443 | 2,99,0,0,0,22.2,0.108,23,0 444 | 3,89,74,16,85,30.4,0.551,38,0 445 | 1,80,74,11,60,30,0.527,22,0 446 | 2,139,75,0,0,25.6,0.167,29,0 447 | 1,90,68,8,0,24.5,1.138,36,0 448 | 0,141,0,0,0,42.4,0.205,29,1 449 | 12,140,85,33,0,37.4,0.244,41,0 450 | 5,147,75,0,0,29.9,0.434,28,0 451 | 1,97,70,15,0,18.2,0.147,21,0 452 | 6,107,88,0,0,36.8,0.727,31,0 453 | 0,189,104,25,0,34.3,0.435,41,1 454 | 2,83,66,23,50,32.2,0.497,22,0 455 | 4,117,64,27,120,33.2,0.23,24,0 456 | 8,108,70,0,0,30.5,0.955,33,1 457 | 4,117,62,12,0,29.7,0.38,30,1 458 | 0,180,78,63,14,59.4,2.42,25,1 459 | 1,100,72,12,70,25.3,0.658,28,0 460 | 0,95,80,45,92,36.5,0.33,26,0 461 | 0,104,64,37,64,33.6,0.51,22,1 462 | 0,120,74,18,63,30.5,0.285,26,0 463 | 1,82,64,13,95,21.2,0.415,23,0 464 | 2,134,70,0,0,28.9,0.542,23,1 465 | 0,91,68,32,210,39.9,0.381,25,0 466 | 2,119,0,0,0,19.6,0.832,72,0 467 | 2,100,54,28,105,37.8,0.498,24,0 468 | 14,175,62,30,0,33.6,0.212,38,1 469 | 1,135,54,0,0,26.7,0.687,62,0 470 | 5,86,68,28,71,30.2,0.364,24,0 471 | 10,148,84,48,237,37.6,1.001,51,1 472 | 9,134,74,33,60,25.9,0.46,81,0 473 | 9,120,72,22,56,20.8,0.733,48,0 474 | 1,71,62,0,0,21.8,0.416,26,0 475 | 8,74,70,40,49,35.3,0.705,39,0 476 | 5,88,78,30,0,27.6,0.258,37,0 477 | 10,115,98,0,0,24,1.022,34,0 478 | 0,124,56,13,105,21.8,0.452,21,0 479 | 0,74,52,10,36,27.8,0.269,22,0 480 | 0,97,64,36,100,36.8,0.6,25,0 481 | 8,120,0,0,0,30,0.183,38,1 482 | 6,154,78,41,140,46.1,0.571,27,0 483 | 1,144,82,40,0,41.3,0.607,28,0 484 | 0,137,70,38,0,33.2,0.17,22,0 485 | 0,119,66,27,0,38.8,0.259,22,0 486 | 7,136,90,0,0,29.9,0.21,50,0 487 | 4,114,64,0,0,28.9,0.126,24,0 488 | 0,137,84,27,0,27.3,0.231,59,0 489 | 2,105,80,45,191,33.7,0.711,29,1 490 | 7,114,76,17,110,23.8,0.466,31,0 491 | 8,126,74,38,75,25.9,0.162,39,0 492 | 4,132,86,31,0,28,0.419,63,0 493 | 3,158,70,30,328,35.5,0.344,35,1 494 | 0,123,88,37,0,35.2,0.197,29,0 495 | 4,85,58,22,49,27.8,0.306,28,0 496 | 0,84,82,31,125,38.2,0.233,23,0 497 | 0,145,0,0,0,44.2,0.63,31,1 498 | 0,135,68,42,250,42.3,0.365,24,1 499 | 1,139,62,41,480,40.7,0.536,21,0 500 | 0,173,78,32,265,46.5,1.159,58,0 501 | 4,99,72,17,0,25.6,0.294,28,0 502 | 8,194,80,0,0,26.1,0.551,67,0 503 | 2,83,65,28,66,36.8,0.629,24,0 504 | 2,89,90,30,0,33.5,0.292,42,0 505 | 4,99,68,38,0,32.8,0.145,33,0 506 | 4,125,70,18,122,28.9,1.144,45,1 507 | 3,80,0,0,0,0,0.174,22,0 508 | 6,166,74,0,0,26.6,0.304,66,0 509 | 5,110,68,0,0,26,0.292,30,0 510 | 2,81,72,15,76,30.1,0.547,25,0 511 | 7,195,70,33,145,25.1,0.163,55,1 512 | 6,154,74,32,193,29.3,0.839,39,0 513 | 2,117,90,19,71,25.2,0.313,21,0 514 | 3,84,72,32,0,37.2,0.267,28,0 515 | 6,0,68,41,0,39,0.727,41,1 516 | 7,94,64,25,79,33.3,0.738,41,0 517 | 3,96,78,39,0,37.3,0.238,40,0 518 | 10,75,82,0,0,33.3,0.263,38,0 519 | 0,180,90,26,90,36.5,0.314,35,1 520 | 1,130,60,23,170,28.6,0.692,21,0 521 | 2,84,50,23,76,30.4,0.968,21,0 522 | 8,120,78,0,0,25,0.409,64,0 523 | 12,84,72,31,0,29.7,0.297,46,1 524 | 0,139,62,17,210,22.1,0.207,21,0 525 | 9,91,68,0,0,24.2,0.2,58,0 526 | 2,91,62,0,0,27.3,0.525,22,0 527 | 3,99,54,19,86,25.6,0.154,24,0 528 | 3,163,70,18,105,31.6,0.268,28,1 529 | 9,145,88,34,165,30.3,0.771,53,1 530 | 7,125,86,0,0,37.6,0.304,51,0 531 | 13,76,60,0,0,32.8,0.18,41,0 532 | 6,129,90,7,326,19.6,0.582,60,0 533 | 2,68,70,32,66,25,0.187,25,0 534 | 3,124,80,33,130,33.2,0.305,26,0 535 | 6,114,0,0,0,0,0.189,26,0 536 | 9,130,70,0,0,34.2,0.652,45,1 537 | 3,125,58,0,0,31.6,0.151,24,0 538 | 3,87,60,18,0,21.8,0.444,21,0 539 | 1,97,64,19,82,18.2,0.299,21,0 540 | 3,116,74,15,105,26.3,0.107,24,0 541 | 0,117,66,31,188,30.8,0.493,22,0 542 | 0,111,65,0,0,24.6,0.66,31,0 543 | 2,122,60,18,106,29.8,0.717,22,0 544 | 0,107,76,0,0,45.3,0.686,24,0 545 | 1,86,66,52,65,41.3,0.917,29,0 546 | 6,91,0,0,0,29.8,0.501,31,0 547 | 1,77,56,30,56,33.3,1.251,24,0 548 | 4,132,0,0,0,32.9,0.302,23,1 549 | 0,105,90,0,0,29.6,0.197,46,0 550 | 0,57,60,0,0,21.7,0.735,67,0 551 | 0,127,80,37,210,36.3,0.804,23,0 552 | 3,129,92,49,155,36.4,0.968,32,1 553 | 8,100,74,40,215,39.4,0.661,43,1 554 | 3,128,72,25,190,32.4,0.549,27,1 555 | 10,90,85,32,0,34.9,0.825,56,1 556 | 4,84,90,23,56,39.5,0.159,25,0 557 | 1,88,78,29,76,32,0.365,29,0 558 | 8,186,90,35,225,34.5,0.423,37,1 559 | 5,187,76,27,207,43.6,1.034,53,1 560 | 4,131,68,21,166,33.1,0.16,28,0 561 | 1,164,82,43,67,32.8,0.341,50,0 562 | 4,189,110,31,0,28.5,0.68,37,0 563 | 1,116,70,28,0,27.4,0.204,21,0 564 | 3,84,68,30,106,31.9,0.591,25,0 565 | 6,114,88,0,0,27.8,0.247,66,0 566 | 1,88,62,24,44,29.9,0.422,23,0 567 | 1,84,64,23,115,36.9,0.471,28,0 568 | 7,124,70,33,215,25.5,0.161,37,0 569 | 1,97,70,40,0,38.1,0.218,30,0 570 | 8,110,76,0,0,27.8,0.237,58,0 571 | 11,103,68,40,0,46.2,0.126,42,0 572 | 11,85,74,0,0,30.1,0.3,35,0 573 | 6,125,76,0,0,33.8,0.121,54,1 574 | 0,198,66,32,274,41.3,0.502,28,1 575 | 1,87,68,34,77,37.6,0.401,24,0 576 | 6,99,60,19,54,26.9,0.497,32,0 577 | 0,91,80,0,0,32.4,0.601,27,0 578 | 2,95,54,14,88,26.1,0.748,22,0 579 | 1,99,72,30,18,38.6,0.412,21,0 580 | 6,92,62,32,126,32,0.085,46,0 581 | 4,154,72,29,126,31.3,0.338,37,0 582 | 0,121,66,30,165,34.3,0.203,33,1 583 | 3,78,70,0,0,32.5,0.27,39,0 584 | 2,130,96,0,0,22.6,0.268,21,0 585 | 3,111,58,31,44,29.5,0.43,22,0 586 | 2,98,60,17,120,34.7,0.198,22,0 587 | 1,143,86,30,330,30.1,0.892,23,0 588 | 1,119,44,47,63,35.5,0.28,25,0 589 | 6,108,44,20,130,24,0.813,35,0 590 | 2,118,80,0,0,42.9,0.693,21,1 591 | 10,133,68,0,0,27,0.245,36,0 592 | 2,197,70,99,0,34.7,0.575,62,1 593 | 0,151,90,46,0,42.1,0.371,21,1 594 | 6,109,60,27,0,25,0.206,27,0 595 | 12,121,78,17,0,26.5,0.259,62,0 596 | 8,100,76,0,0,38.7,0.19,42,0 597 | 8,124,76,24,600,28.7,0.687,52,1 598 | 1,93,56,11,0,22.5,0.417,22,0 599 | 8,143,66,0,0,34.9,0.129,41,1 600 | 6,103,66,0,0,24.3,0.249,29,0 601 | 3,176,86,27,156,33.3,1.154,52,1 602 | 0,73,0,0,0,21.1,0.342,25,0 603 | 11,111,84,40,0,46.8,0.925,45,1 604 | 2,112,78,50,140,39.4,0.175,24,0 605 | 3,132,80,0,0,34.4,0.402,44,1 606 | 2,82,52,22,115,28.5,1.699,25,0 607 | 6,123,72,45,230,33.6,0.733,34,0 608 | 0,188,82,14,185,32,0.682,22,1 609 | 0,67,76,0,0,45.3,0.194,46,0 610 | 1,89,24,19,25,27.8,0.559,21,0 611 | 1,173,74,0,0,36.8,0.088,38,1 612 | 1,109,38,18,120,23.1,0.407,26,0 613 | 1,108,88,19,0,27.1,0.4,24,0 614 | 6,96,0,0,0,23.7,0.19,28,0 615 | 1,124,74,36,0,27.8,0.1,30,0 616 | 7,150,78,29,126,35.2,0.692,54,1 617 | 4,183,0,0,0,28.4,0.212,36,1 618 | 1,124,60,32,0,35.8,0.514,21,0 619 | 1,181,78,42,293,40,1.258,22,1 620 | 1,92,62,25,41,19.5,0.482,25,0 621 | 0,152,82,39,272,41.5,0.27,27,0 622 | 1,111,62,13,182,24,0.138,23,0 623 | 3,106,54,21,158,30.9,0.292,24,0 624 | 3,174,58,22,194,32.9,0.593,36,1 625 | 7,168,88,42,321,38.2,0.787,40,1 626 | 6,105,80,28,0,32.5,0.878,26,0 627 | 11,138,74,26,144,36.1,0.557,50,1 628 | 3,106,72,0,0,25.8,0.207,27,0 629 | 6,117,96,0,0,28.7,0.157,30,0 630 | 2,68,62,13,15,20.1,0.257,23,0 631 | 9,112,82,24,0,28.2,1.282,50,1 632 | 0,119,0,0,0,32.4,0.141,24,1 633 | 2,112,86,42,160,38.4,0.246,28,0 634 | 2,92,76,20,0,24.2,1.698,28,0 635 | 6,183,94,0,0,40.8,1.461,45,0 636 | 0,94,70,27,115,43.5,0.347,21,0 637 | 2,108,64,0,0,30.8,0.158,21,0 638 | 4,90,88,47,54,37.7,0.362,29,0 639 | 0,125,68,0,0,24.7,0.206,21,0 640 | 0,132,78,0,0,32.4,0.393,21,0 641 | 5,128,80,0,0,34.6,0.144,45,0 642 | 4,94,65,22,0,24.7,0.148,21,0 643 | 7,114,64,0,0,27.4,0.732,34,1 644 | 0,102,78,40,90,34.5,0.238,24,0 645 | 2,111,60,0,0,26.2,0.343,23,0 646 | 1,128,82,17,183,27.5,0.115,22,0 647 | 10,92,62,0,0,25.9,0.167,31,0 648 | 13,104,72,0,0,31.2,0.465,38,1 649 | 5,104,74,0,0,28.8,0.153,48,0 650 | 2,94,76,18,66,31.6,0.649,23,0 651 | 7,97,76,32,91,40.9,0.871,32,1 652 | 1,100,74,12,46,19.5,0.149,28,0 653 | 0,102,86,17,105,29.3,0.695,27,0 654 | 4,128,70,0,0,34.3,0.303,24,0 655 | 6,147,80,0,0,29.5,0.178,50,1 656 | 4,90,0,0,0,28,0.61,31,0 657 | 3,103,72,30,152,27.6,0.73,27,0 658 | 2,157,74,35,440,39.4,0.134,30,0 659 | 1,167,74,17,144,23.4,0.447,33,1 660 | 0,179,50,36,159,37.8,0.455,22,1 661 | 11,136,84,35,130,28.3,0.26,42,1 662 | 0,107,60,25,0,26.4,0.133,23,0 663 | 1,91,54,25,100,25.2,0.234,23,0 664 | 1,117,60,23,106,33.8,0.466,27,0 665 | 5,123,74,40,77,34.1,0.269,28,0 666 | 2,120,54,0,0,26.8,0.455,27,0 667 | 1,106,70,28,135,34.2,0.142,22,0 668 | 2,155,52,27,540,38.7,0.24,25,1 669 | 2,101,58,35,90,21.8,0.155,22,0 670 | 1,120,80,48,200,38.9,1.162,41,0 671 | 11,127,106,0,0,39,0.19,51,0 672 | 3,80,82,31,70,34.2,1.292,27,1 673 | 10,162,84,0,0,27.7,0.182,54,0 674 | 1,199,76,43,0,42.9,1.394,22,1 675 | 8,167,106,46,231,37.6,0.165,43,1 676 | 9,145,80,46,130,37.9,0.637,40,1 677 | 6,115,60,39,0,33.7,0.245,40,1 678 | 1,112,80,45,132,34.8,0.217,24,0 679 | 4,145,82,18,0,32.5,0.235,70,1 680 | 10,111,70,27,0,27.5,0.141,40,1 681 | 6,98,58,33,190,34,0.43,43,0 682 | 9,154,78,30,100,30.9,0.164,45,0 683 | 6,165,68,26,168,33.6,0.631,49,0 684 | 1,99,58,10,0,25.4,0.551,21,0 685 | 10,68,106,23,49,35.5,0.285,47,0 686 | 3,123,100,35,240,57.3,0.88,22,0 687 | 8,91,82,0,0,35.6,0.587,68,0 688 | 6,195,70,0,0,30.9,0.328,31,1 689 | 9,156,86,0,0,24.8,0.23,53,1 690 | 0,93,60,0,0,35.3,0.263,25,0 691 | 3,121,52,0,0,36,0.127,25,1 692 | 2,101,58,17,265,24.2,0.614,23,0 693 | 2,56,56,28,45,24.2,0.332,22,0 694 | 0,162,76,36,0,49.6,0.364,26,1 695 | 0,95,64,39,105,44.6,0.366,22,0 696 | 4,125,80,0,0,32.3,0.536,27,1 697 | 5,136,82,0,0,0,0.64,69,0 698 | 2,129,74,26,205,33.2,0.591,25,0 699 | 3,130,64,0,0,23.1,0.314,22,0 700 | 1,107,50,19,0,28.3,0.181,29,0 701 | 1,140,74,26,180,24.1,0.828,23,0 702 | 1,144,82,46,180,46.1,0.335,46,1 703 | 8,107,80,0,0,24.6,0.856,34,0 704 | 13,158,114,0,0,42.3,0.257,44,1 705 | 2,121,70,32,95,39.1,0.886,23,0 706 | 7,129,68,49,125,38.5,0.439,43,1 707 | 2,90,60,0,0,23.5,0.191,25,0 708 | 7,142,90,24,480,30.4,0.128,43,1 709 | 3,169,74,19,125,29.9,0.268,31,1 710 | 0,99,0,0,0,25,0.253,22,0 711 | 4,127,88,11,155,34.5,0.598,28,0 712 | 4,118,70,0,0,44.5,0.904,26,0 713 | 2,122,76,27,200,35.9,0.483,26,0 714 | 6,125,78,31,0,27.6,0.565,49,1 715 | 1,168,88,29,0,35,0.905,52,1 716 | 2,129,0,0,0,38.5,0.304,41,0 717 | 4,110,76,20,100,28.4,0.118,27,0 718 | 6,80,80,36,0,39.8,0.177,28,0 719 | 10,115,0,0,0,0,0.261,30,1 720 | 2,127,46,21,335,34.4,0.176,22,0 721 | 9,164,78,0,0,32.8,0.148,45,1 722 | 2,93,64,32,160,38,0.674,23,1 723 | 3,158,64,13,387,31.2,0.295,24,0 724 | 5,126,78,27,22,29.6,0.439,40,0 725 | 10,129,62,36,0,41.2,0.441,38,1 726 | 0,134,58,20,291,26.4,0.352,21,0 727 | 3,102,74,0,0,29.5,0.121,32,0 728 | 7,187,50,33,392,33.9,0.826,34,1 729 | 3,173,78,39,185,33.8,0.97,31,1 730 | 10,94,72,18,0,23.1,0.595,56,0 731 | 1,108,60,46,178,35.5,0.415,24,0 732 | 5,97,76,27,0,35.6,0.378,52,1 733 | 4,83,86,19,0,29.3,0.317,34,0 734 | 1,114,66,36,200,38.1,0.289,21,0 735 | 1,149,68,29,127,29.3,0.349,42,1 736 | 5,117,86,30,105,39.1,0.251,42,0 737 | 1,111,94,0,0,32.8,0.265,45,0 738 | 4,112,78,40,0,39.4,0.236,38,0 739 | 1,116,78,29,180,36.1,0.496,25,0 740 | 0,141,84,26,0,32.4,0.433,22,0 741 | 2,175,88,0,0,22.9,0.326,22,0 742 | 2,92,52,0,0,30.1,0.141,22,0 743 | 3,130,78,23,79,28.4,0.323,34,1 744 | 8,120,86,0,0,28.4,0.259,22,1 745 | 2,174,88,37,120,44.5,0.646,24,1 746 | 2,106,56,27,165,29,0.426,22,0 747 | 2,105,75,0,0,23.3,0.56,53,0 748 | 4,95,60,32,0,35.4,0.284,28,0 749 | 0,126,86,27,120,27.4,0.515,21,0 750 | 8,65,72,23,0,32,0.6,42,0 751 | 2,99,60,17,160,36.6,0.453,21,0 752 | 1,102,74,0,0,39.5,0.293,42,1 753 | 11,120,80,37,150,42.3,0.785,48,1 754 | 3,102,44,20,94,30.8,0.4,26,0 755 | 1,109,58,18,116,28.5,0.219,22,0 756 | 9,140,94,0,0,32.7,0.734,45,1 757 | 13,153,88,37,140,40.6,1.174,39,0 758 | 12,100,84,33,105,30,0.488,46,0 759 | 1,147,94,41,0,49.3,0.358,27,1 760 | 1,81,74,41,57,46.3,1.096,32,0 761 | 3,187,70,22,200,36.4,0.408,36,1 762 | 6,162,62,0,0,24.3,0.178,50,1 763 | 4,136,70,0,0,31.2,1.182,22,1 764 | 1,121,78,39,74,39,0.261,28,0 765 | 3,108,62,24,0,26,0.223,25,0 766 | 0,181,88,44,510,43.3,0.222,26,1 767 | 8,154,78,32,0,32.4,0.443,45,1 768 | 1,128,88,39,110,36.5,1.057,37,1 769 | 7,137,90,41,0,32,0.391,39,0 770 | 0,123,72,0,0,36.3,0.258,52,1 771 | 1,106,76,0,0,37.5,0.197,26,0 772 | 6,190,92,0,0,35.5,0.278,66,1 773 | 2,88,58,26,16,28.4,0.766,22,0 774 | 9,170,74,31,0,44,0.403,43,1 775 | 9,89,62,0,0,22.5,0.142,33,0 776 | 10,101,76,48,180,32.9,0.171,63,0 777 | 2,122,70,27,0,36.8,0.34,27,0 778 | 5,121,72,23,112,26.2,0.245,30,0 779 | 1,126,60,0,0,30.1,0.349,47,1 780 | 1,93,70,31,0,30.4,0.315,23,0 781 | '''); 782 | } 783 | -------------------------------------------------------------------------------- /lib/src/data_frame/helpers/convert_rows_to_series.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/series.dart'; 2 | 3 | Iterable convertRowsToSeries( 4 | Iterable columnHeaders, 5 | Iterable> rows, 6 | ) { 7 | final rowIterators = rows.map((row) => row.iterator).toList(growable: false); 8 | 9 | return columnHeaders.map(((header) { 10 | final column = rowIterators 11 | .where((iterator) => iterator.moveNext()) 12 | .map((iterator) => iterator.current) 13 | .toList(growable: false); 14 | 15 | return Series(header, column); 16 | })).toList(growable: false); 17 | } 18 | -------------------------------------------------------------------------------- /lib/src/data_frame/helpers/convert_series_to_rows.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/series.dart'; 2 | 3 | Iterable> convertSeriesToRows(Iterable series) sync* { 4 | final iterators = 5 | series.map((series) => series.data.iterator).toList(growable: false); 6 | 7 | while (iterators.fold(true, (isActive, iterator) => iterator.moveNext())) { 8 | yield iterators.map((iterator) => iterator.current).toList(growable: false); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /lib/src/data_frame/helpers/data_frame_to_string.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 2 | import 'dart:math' as math; 3 | 4 | class _SeriesDisplayData { 5 | _SeriesDisplayData(this.colIndex, this.colTitle) : maxChars = colTitle.length; 6 | int colIndex = 0; 7 | int maxChars; 8 | String colTitle; 9 | List data = []; 10 | } 11 | 12 | const kPadding = 3; 13 | const kSkippingSymbol = '...'; 14 | 15 | String dataFrameToString( 16 | DataFrame dataFrame, { 17 | int maxRows = 10, 18 | int maxCols = 7, 19 | }) { 20 | maxCols = math.min(dataFrame.header.length, maxCols); 21 | maxRows = math.min(dataFrame.rows.length, maxRows); 22 | 23 | final nRows = dataFrame.shape[0]; 24 | final nCols = dataFrame.shape[1]; 25 | final basicString = 'DataFrame ($nRows x $nCols)'; 26 | 27 | if (maxRows == 0 || maxCols == 0) { 28 | return basicString; 29 | } 30 | 31 | final seriesDisplayData = <_SeriesDisplayData>[]; 32 | final rowCountStartEnd = 33 | maxRows > 1 ? [maxRows ~/ 2, maxRows - maxRows ~/ 2] : [1, 0]; 34 | final colCountStartEnd = maxCols > 1 ? [maxCols - 1, 1] : [1, 0]; 35 | 36 | var j = 0; 37 | for (var colTitle in dataFrame.header) { 38 | if (j < colCountStartEnd[0] || j >= nCols - colCountStartEnd[1]) { 39 | seriesDisplayData.add(_SeriesDisplayData(j, colTitle)); 40 | } else if (j == colCountStartEnd[0] && colCountStartEnd[1] != 0) { 41 | seriesDisplayData.add(_SeriesDisplayData(j, kSkippingSymbol)); 42 | } 43 | j++; 44 | } 45 | 46 | var i = 0; 47 | for (var row in dataFrame.rows) { 48 | if (i < rowCountStartEnd[0] || i >= nRows - rowCountStartEnd[1]) { 49 | var j = 0; 50 | var seriesCounter = 0; 51 | for (var value in row) { 52 | if (j < colCountStartEnd[0] || j >= nCols - colCountStartEnd[1]) { 53 | var displayData = seriesDisplayData[seriesCounter]; 54 | var valueString = value.toString(); 55 | displayData.data.add(valueString); 56 | displayData.maxChars = 57 | math.max(displayData.maxChars, valueString.length); 58 | seriesCounter++; 59 | } else if (j == colCountStartEnd[0] && colCountStartEnd[1] != 0) { 60 | seriesDisplayData[seriesCounter].data.add(kSkippingSymbol); 61 | seriesDisplayData[seriesCounter].maxChars = kSkippingSymbol.length; 62 | seriesCounter++; 63 | } 64 | j++; 65 | } 66 | } else if (i == rowCountStartEnd[0] && rowCountStartEnd[1] != 0) { 67 | for (var d in seriesDisplayData) { 68 | d.maxChars = math.max(kSkippingSymbol.length, d.maxChars); 69 | d.data.add(kSkippingSymbol); 70 | } 71 | } 72 | i++; 73 | } 74 | 75 | var finalLines = []; 76 | // construct header line: 77 | finalLines.add(basicString); 78 | finalLines.add(seriesDisplayData 79 | .map((d) => d.colTitle.padLeft(d.maxChars) + ' ' * kPadding) 80 | .join('') 81 | .trimRight()); 82 | // construct other lines: 83 | for (var i = 0; i < seriesDisplayData[0].data.length; i++) { 84 | finalLines.add(seriesDisplayData 85 | .map((d) => d.data[i].padLeft(d.maxChars) + ' ' * kPadding) 86 | .join('') 87 | .trimRight()); 88 | } 89 | 90 | return finalLines.join('\n'); 91 | } 92 | -------------------------------------------------------------------------------- /lib/src/data_frame/helpers/generate_unordered_indices.dart: -------------------------------------------------------------------------------- 1 | import 'dart:math' as math; 2 | 3 | import 'package:quiver/iterables.dart'; 4 | 5 | List generateUnorderedIndices(int length, [int? seed]) { 6 | if (length <= 0) { 7 | return []; 8 | } 9 | 10 | final generator = math.Random(seed); 11 | final orderedIndices = List.generate(length, (i) => i); 12 | final indices = [...orderedIndices].toList()..shuffle(generator); 13 | final ensureIndicesAreUnordered = () => 14 | zip([indices, orderedIndices]).any((pair) => pair.first != pair.last); 15 | 16 | while (!ensureIndicesAreUnordered()) { 17 | indices.shuffle(generator); 18 | } 19 | 20 | return indices; 21 | } 22 | -------------------------------------------------------------------------------- /lib/src/data_frame/helpers/get_header.dart: -------------------------------------------------------------------------------- 1 | import 'package:quiver/iterables.dart'; 2 | 3 | Iterable getHeader( 4 | int columnsNum, 5 | String autoHeaderPrefix, [ 6 | Iterable rawActualHeader = const [], 7 | Iterable predefinedHeader = const [], 8 | ]) { 9 | if (predefinedHeader.isNotEmpty) { 10 | return predefinedHeader.take(columnsNum); 11 | } 12 | 13 | if (rawActualHeader.isNotEmpty) { 14 | return rawActualHeader.map((name) => name.trim()); 15 | } 16 | 17 | return count(0).take(columnsNum).map((index) => '$autoHeaderPrefix$index'); 18 | } 19 | -------------------------------------------------------------------------------- /lib/src/data_frame/series.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 2 | import 'package:ml_dataframe/src/data_frame/series_impl.dart'; 3 | 4 | /// A named sequence of values that can be used as a column for the [DataFrame]. 5 | /// 6 | /// [name] A series name 7 | /// 8 | /// [data] A collection of dynamically typed data 9 | /// 10 | /// [isDiscrete] Whether the data is discrete or not. If [isDiscrete] is `true`, 11 | /// unique values from the [data] will be extracted and saved to 12 | /// [discreteValues] field 13 | /// 14 | /// Discrete values are elements from a finite set of values. Examples of 15 | /// discrete values: 16 | /// 17 | /// - Blood group. Possible values are A, B, AB and O 18 | /// 19 | /// - Size. Possible values are XS, S, M, L, XL 20 | /// 21 | /// Usage examples: 22 | /// 23 | /// - A series with discrete values 24 | /// 25 | /// ```dart 26 | /// import 'package:ml_dataframe/ml_dataframe'; 27 | /// 28 | /// void main() { 29 | /// final series = Series('super_series', [1, 4, 3, 1, 4, 3], isDiscrete: true); 30 | /// 31 | /// print(series); 32 | /// // super_series: [1, 4, 3, 1, 4, 3] 33 | /// 34 | /// print(series.discreteValues); 35 | /// // [1, 4, 3] 36 | /// } 37 | /// ``` 38 | /// 39 | /// - A series with continuous values 40 | /// 41 | /// ```dart 42 | /// import 'package:ml_dataframe/ml_dataframe'; 43 | /// 44 | /// void main() { 45 | /// final series = Series('super_series', [1, 14, 3.4, 10, 'some_string', 111, false]); 46 | /// 47 | /// print(series); 48 | /// // super_series: [1, 14, 3.4, 10, 'some_string', 111, false] 49 | /// 50 | /// print(series.discreteValues); 51 | /// // [] 52 | /// } 53 | /// ``` 54 | abstract class Series { 55 | factory Series( 56 | String name, 57 | Iterable data, { 58 | bool isDiscrete, 59 | }) = SeriesImpl; 60 | 61 | /// Creates a [Series] instance from a json-serializable [Series] 62 | /// representation 63 | /// 64 | /// A usage example: 65 | /// 66 | /// ```dart 67 | /// import 'package:ml_dataframe/ml_dataframe'; 68 | /// 69 | /// void main() { 70 | /// final json = { 71 | /// 'N': 'awesome_series', // a series' name 72 | /// 'D': [1, 5, 1, 2, 3, 1, 4], // series' data 73 | /// 'ISD': true, // whether a series id discrete or not 74 | /// }; 75 | /// final series = Series.fromJson(json); 76 | /// 77 | /// print(series); 78 | /// // awesome_series: [1, 5, 1, 2, 3, 1, 4] 79 | /// } 80 | /// ``` 81 | /// 82 | /// One can get the JSON serializable representation by calling [toJson] 83 | /// method 84 | factory Series.fromJson(Map json) = SeriesImpl.fromJson; 85 | 86 | /// Returns a json-serializable representation of a [Series] instance 87 | /// 88 | /// A usage example: 89 | /// 90 | /// ```dart 91 | /// import 'package:ml_dataframe/ml_dataframe'; 92 | /// 93 | /// void main() { 94 | /// final series = Series('super_series', [10, 22, 33, 44]); 95 | /// final json = series.toJson(); 96 | /// 97 | /// print(json); 98 | /// // {'N': 'super_series', 'D': [10, 22, 33, 44], 'ISD': false} 99 | /// } 100 | /// ``` 101 | Map toJson(); 102 | 103 | /// A name of a [Series] instance 104 | String get name; 105 | 106 | /// Returns a lazy iterable of [Series] instance data 107 | Iterable get data; 108 | 109 | /// Returns true if a [Series] instance contains just discrete values instead of 110 | /// continuous ones 111 | bool get isDiscrete; 112 | 113 | /// Returns a lazy iterable of the [data]'s unique values if a [Series] 114 | /// instance marked as [isDiscrete]. If [isDiscrete] is `false`, an empty 115 | /// list will be returned 116 | Iterable get discreteValues; 117 | } 118 | -------------------------------------------------------------------------------- /lib/src/data_frame/series_impl.dart: -------------------------------------------------------------------------------- 1 | import 'package:json_annotation/json_annotation.dart'; 2 | import 'package:ml_dataframe/ml_dataframe.dart'; 3 | import 'package:ml_dataframe/src/data_frame/series_json_keys.dart'; 4 | 5 | part 'series_impl.g.dart'; 6 | 7 | /// A column of the [DataFrame]. 8 | /// 9 | /// [name] A column name 10 | /// 11 | /// [data] A collection of dynamically typed data 12 | /// 13 | /// [isDiscrete] Whether the data is discrete (categorical) or not. If 14 | /// [isDiscrete] is `true`, unique values from the [data] will be extracted and 15 | /// saved to [discreteValues] field 16 | @JsonSerializable() 17 | class SeriesImpl implements Series { 18 | SeriesImpl( 19 | this.name, 20 | this.data, { 21 | this.isDiscrete = false, 22 | }) : discreteValues = 23 | isDiscrete ? Set.from(data) : const []; 24 | 25 | factory SeriesImpl.fromJson(Map json) => 26 | _$SeriesImplFromJson(json); 27 | 28 | @override 29 | Map toJson() => _$SeriesImplToJson(this); 30 | 31 | /// A name of the [SeriesImpl] 32 | @override 33 | @JsonKey(name: seriesNameJsonKey) 34 | final String name; 35 | 36 | /// Returns a lazy iterable of data containing in the [SeriesImpl] 37 | @override 38 | @JsonKey(name: seriesDataJsonKey) 39 | final Iterable data; 40 | 41 | /// Returns true if the [SeriesImpl] contains just discrete values instead of 42 | /// continuous ones 43 | @override 44 | @JsonKey(name: isSeriesDiscreteJsonKey) 45 | final bool isDiscrete; 46 | 47 | /// Returns a lazy iterable of the [data]'s unique values if the [SeriesImpl] 48 | /// marked as [isDiscrete]. If [isDiscrete] is `false`, an empty list will be 49 | /// returned 50 | @override 51 | final Iterable discreteValues; 52 | 53 | @override 54 | String toString() { 55 | return '$name: ${data.toString()}'; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /lib/src/data_frame/series_impl.g.dart: -------------------------------------------------------------------------------- 1 | // GENERATED CODE - DO NOT MODIFY BY HAND 2 | 3 | part of 'series_impl.dart'; 4 | 5 | // ************************************************************************** 6 | // JsonSerializableGenerator 7 | // ************************************************************************** 8 | 9 | SeriesImpl _$SeriesImplFromJson(Map json) { 10 | return $checkedNew('SeriesImpl', json, () { 11 | $checkKeys(json, allowedKeys: const ['N', 'D', 'ISD']); 12 | final val = SeriesImpl( 13 | $checkedConvert(json, 'N', (v) => v as String), 14 | $checkedConvert(json, 'D', (v) => v as List), 15 | isDiscrete: $checkedConvert(json, 'ISD', (v) => v as bool), 16 | ); 17 | return val; 18 | }, fieldKeyMap: const {'name': 'N', 'data': 'D', 'isDiscrete': 'ISD'}); 19 | } 20 | 21 | Map _$SeriesImplToJson(SeriesImpl instance) => 22 | { 23 | 'N': instance.name, 24 | 'D': instance.data.toList(), 25 | 'ISD': instance.isDiscrete, 26 | }; 27 | -------------------------------------------------------------------------------- /lib/src/data_frame/series_json_keys.dart: -------------------------------------------------------------------------------- 1 | const seriesNameJsonKey = 'N'; 2 | const seriesDataJsonKey = 'D'; 3 | const isSeriesDiscreteJsonKey = 'ISD'; 4 | -------------------------------------------------------------------------------- /lib/src/data_reader/csv_data_reader.dart: -------------------------------------------------------------------------------- 1 | import 'dart:convert'; 2 | 3 | import 'dart:io'; 4 | 5 | import 'package:csv/csv.dart'; 6 | import 'package:ml_dataframe/src/data_reader/data_reader.dart'; 7 | import 'package:ml_dataframe/src/data_reader/file_factory/file_factory.dart'; 8 | 9 | class CsvDataReader implements DataReader { 10 | CsvDataReader(String fileName, this._csvCodec, [FileFactory? createFile]) 11 | : _file = 12 | createFile == null ? fileFactory(fileName) : createFile(fileName); 13 | 14 | final CsvCodec _csvCodec; 15 | final File _file; 16 | 17 | @override 18 | Future>> extractData() => _file 19 | .openRead() 20 | .cast>() 21 | .transform(utf8.decoder) 22 | .transform(_csvCodec.decoder) 23 | .toList(); 24 | } 25 | -------------------------------------------------------------------------------- /lib/src/data_reader/data_reader.dart: -------------------------------------------------------------------------------- 1 | import 'package:csv/csv.dart'; 2 | import 'package:ml_dataframe/src/data_reader/csv_data_reader.dart'; 3 | 4 | abstract class DataReader { 5 | factory DataReader.csv( 6 | String fileName, 7 | String columnDelimiter, 8 | String eol, 9 | ) => 10 | CsvDataReader( 11 | fileName, 12 | CsvCodec(fieldDelimiter: columnDelimiter, eol: eol), 13 | ); 14 | 15 | Future>> extractData(); 16 | } 17 | -------------------------------------------------------------------------------- /lib/src/data_reader/file_factory/file_factory.dart: -------------------------------------------------------------------------------- 1 | import 'dart:io'; 2 | 3 | typedef FileFactory = File Function(String fileName); 4 | 5 | FileFactory fileFactory = (String fileName) => File(fileName); 6 | -------------------------------------------------------------------------------- /lib/src/data_selector/data_selector.dart: -------------------------------------------------------------------------------- 1 | import 'package:quiver/iterables.dart'; 2 | 3 | class DataSelector { 4 | DataSelector(this._columnIndices); 5 | 6 | final Iterable _columnIndices; 7 | 8 | Iterable> select( 9 | Iterable> headlessData) => 10 | headlessData.map(_filterRow); 11 | 12 | Iterable _filterRow(Iterable row) => 13 | _columnIndices.isNotEmpty 14 | ? enumerate(row) 15 | .where((indexed) => _columnIndices.contains(indexed.index)) 16 | .map((indexed) => indexed.value) 17 | : row; 18 | } 19 | -------------------------------------------------------------------------------- /lib/src/numerical_converter/helpers/from_numerical_converter_json.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/numerical_converter/numerical_converter.dart'; 2 | import 'package:ml_dataframe/src/numerical_converter/numerical_converter_impl.dart'; 3 | 4 | NumericalConverter fromNumericalConverterJson(dynamic _) => 5 | const NumericalConverterImpl(); 6 | -------------------------------------------------------------------------------- /lib/src/numerical_converter/helpers/numerical_converter_to_json.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/numerical_converter/numerical_converter.dart'; 2 | 3 | String numericalConverterToJson(NumericalConverter converter) => ''; 4 | -------------------------------------------------------------------------------- /lib/src/numerical_converter/numerical_converter.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 2 | 3 | abstract class NumericalConverter { 4 | DataFrame convertDataFrame(DataFrame data); 5 | Iterable> convertRawData(Iterable> data); 6 | Iterable> convertRawDataStrict( 7 | Iterable> data); 8 | } 9 | -------------------------------------------------------------------------------- /lib/src/numerical_converter/numerical_converter_impl.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 2 | import 'package:ml_dataframe/src/numerical_converter/numerical_converter.dart'; 3 | 4 | class NumericalConverterImpl implements NumericalConverter { 5 | const NumericalConverterImpl(); 6 | 7 | static final Exception _exception = 8 | Exception('Unsuccessful attempt to convert a value to number'); 9 | 10 | @override 11 | DataFrame convertDataFrame(DataFrame data) => 12 | DataFrame(convertRawData(data.rows), header: data.header); 13 | 14 | @override 15 | Iterable> convertRawData(Iterable data) => 16 | data.map((row) => row.map((value) => _convertSingle(value, false))); 17 | 18 | @override 19 | Iterable> convertRawDataStrict(Iterable data) => 20 | data.map((row) => row.map((value) => _convertSingle(value, true)!)); 21 | 22 | double? _convertSingle(dynamic value, bool strictCheck) { 23 | if (value is String) { 24 | try { 25 | return double.parse(value); 26 | } catch (e) { 27 | if (strictCheck) { 28 | throw _exception; 29 | } 30 | 31 | return null; 32 | } 33 | } 34 | 35 | if (value is bool) { 36 | if (strictCheck) { 37 | throw _exception; 38 | } 39 | 40 | return value ? 1 : 0; 41 | } 42 | 43 | if (value is! num) { 44 | if (strictCheck) { 45 | throw _exception; 46 | } 47 | 48 | return null; 49 | } 50 | 51 | return value * 1.0; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /lib/src/numerical_converter/numerical_converter_json_keys.dart: -------------------------------------------------------------------------------- 1 | const strictTypeCheckJsonKey = 'ST'; 2 | -------------------------------------------------------------------------------- /lib/src/serializable/serializable.dart: -------------------------------------------------------------------------------- 1 | import 'dart:io'; 2 | 3 | abstract class Serializable { 4 | Map toJson(); 5 | 6 | Future saveAsJson(String fileName, {bool rewrite = false}); 7 | } 8 | -------------------------------------------------------------------------------- /lib/src/serializable/serializable_mixin.dart: -------------------------------------------------------------------------------- 1 | import 'dart:convert'; 2 | import 'dart:io'; 3 | 4 | import 'package:ml_dataframe/src/serializable/serializable.dart'; 5 | 6 | mixin SerializableMixin implements Serializable { 7 | @override 8 | Future saveAsJson(String fileName, {bool rewrite = false}) async { 9 | final file = File(fileName); 10 | 11 | if (!rewrite && await file.exists()) { 12 | throw Exception('The file already exists, path $fileName'); 13 | } 14 | 15 | final serializable = toJson(); 16 | final json = jsonEncode(serializable); 17 | 18 | return file.writeAsString(json); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /pubspec.yaml: -------------------------------------------------------------------------------- 1 | name: ml_dataframe 2 | description: An in-memory untyped data storage with the possibility to query and modify it 3 | version: 1.6.0 4 | homepage: https://github.com/gyrdym/ml_dataframe 5 | 6 | environment: 7 | sdk: ">=2.12.0 <3.0.0" 8 | 9 | dependencies: 10 | csv: ^5.0.0 11 | json_annotation: ^4.0.0 12 | ml_linalg: ^13.0.0 13 | quiver: ^3.0.0 14 | 15 | dev_dependencies: 16 | benchmark_harness: ^2.0.0 17 | build_runner: ^1.10.11 18 | json_serializable: ^4.0.0 19 | mockito: ^5.0.0 20 | pedantic: ^1.11.0 21 | test: ^1.16.0 22 | -------------------------------------------------------------------------------- /test/data_frame/data_frame_from_matrix_test.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 2 | import 'package:ml_linalg/dtype.dart'; 3 | import 'package:ml_linalg/matrix.dart'; 4 | import 'package:test/test.dart'; 5 | 6 | void main() { 7 | group('DataFrame.fromMatrix', () { 8 | final sourceData = >[ 9 | [1, 2, 3, 4], 10 | [10, 20, 30, 40], 11 | [100, 200, 300, 400], 12 | [1000, 2000, 3000, 4000], 13 | ]; 14 | final matrix = Matrix.fromList(sourceData, dtype: DType.float32); 15 | 16 | test('should initialize from matrix without predefined header', () { 17 | final dataFrame = DataFrame.fromMatrix(matrix); 18 | 19 | expect(dataFrame.header, equals(['col_0', 'col_1', 'col_2', 'col_3'])); 20 | 21 | expect(dataFrame.rows, equals(sourceData)); 22 | 23 | expect(dataFrame.series, hasLength(4)); 24 | 25 | expect(dataFrame[0].name, 'col_0'); 26 | expect(dataFrame[0].data, equals([1, 10, 100, 1000])); 27 | expect(dataFrame['col_0'].data, equals([1, 10, 100, 1000])); 28 | 29 | expect(dataFrame[1].name, 'col_1'); 30 | expect(dataFrame[1].data, equals([2, 20, 200, 2000])); 31 | expect(dataFrame['col_1'].data, equals([2, 20, 200, 2000])); 32 | 33 | expect(dataFrame[2].name, 'col_2'); 34 | expect(dataFrame[2].data, equals([3, 30, 300, 3000])); 35 | expect(dataFrame['col_2'].data, equals([3, 30, 300, 3000])); 36 | 37 | expect(dataFrame[3].name, 'col_3'); 38 | expect(dataFrame[3].data, equals([4, 40, 400, 4000])); 39 | expect(dataFrame['col_3'].data, equals([4, 40, 400, 4000])); 40 | 41 | expect(dataFrame.toMatrix(), same(matrix)); 42 | }); 43 | 44 | test('should initialize from matrix with predefined header', () { 45 | final dataFrame = DataFrame.fromMatrix(matrix, 46 | header: ['how', 'doth', 'the', 'little']); 47 | 48 | expect(dataFrame.header, equals(['how', 'doth', 'the', 'little'])); 49 | 50 | expect(dataFrame.rows, equals(sourceData)); 51 | 52 | expect(dataFrame.series, hasLength(4)); 53 | 54 | expect(dataFrame[0].name, 'how'); 55 | expect(dataFrame[0].data, equals([1, 10, 100, 1000])); 56 | expect(dataFrame['how'].data, equals([1, 10, 100, 1000])); 57 | 58 | expect(dataFrame[1].name, 'doth'); 59 | expect(dataFrame[1].data, equals([2, 20, 200, 2000])); 60 | expect(dataFrame['doth'].data, equals([2, 20, 200, 2000])); 61 | 62 | expect(dataFrame[2].name, 'the'); 63 | expect(dataFrame[2].data, equals([3, 30, 300, 3000])); 64 | expect(dataFrame['the'].data, equals([3, 30, 300, 3000])); 65 | 66 | expect(dataFrame[3].name, 'little'); 67 | expect(dataFrame[3].data, equals([4, 40, 400, 4000])); 68 | expect(dataFrame['little'].data, equals([4, 40, 400, 4000])); 69 | 70 | expect(dataFrame.toMatrix(), same(matrix)); 71 | }); 72 | 73 | test( 74 | 'should ignore predefined header list elements that are out of ' 75 | 'range', () { 76 | final dataFrame = DataFrame.fromMatrix(matrix, 77 | header: ['how', 'doth', 'the', 'little', 'ololo', 'trololo']); 78 | 79 | expect(dataFrame.header, equals(['how', 'doth', 'the', 'little'])); 80 | 81 | expect(dataFrame.rows, equals(sourceData)); 82 | 83 | expect(dataFrame.series, hasLength(4)); 84 | 85 | expect(dataFrame[0].name, 'how'); 86 | expect(dataFrame['how'].data, equals([1, 10, 100, 1000])); 87 | 88 | expect(dataFrame[1].name, 'doth'); 89 | expect(dataFrame['doth'].data, equals([2, 20, 200, 2000])); 90 | 91 | expect(dataFrame[2].name, 'the'); 92 | expect(dataFrame['the'].data, equals([3, 30, 300, 3000])); 93 | 94 | expect(dataFrame[3].name, 'little'); 95 | expect(dataFrame['little'].data, equals([4, 40, 400, 4000])); 96 | }); 97 | 98 | test( 99 | 'should ignore predefined header list elements that are out of ' 100 | 'range', () { 101 | final dataFrame = DataFrame.fromMatrix(matrix, header: ['how', 'doth']); 102 | 103 | expect(dataFrame.header, equals(['how', 'doth'])); 104 | 105 | expect( 106 | dataFrame.rows, 107 | equals([ 108 | [ 109 | 1, 110 | 2, 111 | ], 112 | [ 113 | 10, 114 | 20, 115 | ], 116 | [ 117 | 100, 118 | 200, 119 | ], 120 | [ 121 | 1000, 122 | 2000, 123 | ], 124 | ])); 125 | 126 | expect(dataFrame.series, hasLength(2)); 127 | 128 | expect(dataFrame[0].name, 'how'); 129 | expect(dataFrame['how'].data, equals([1, 10, 100, 1000])); 130 | 131 | expect(dataFrame[1].name, 'doth'); 132 | expect(dataFrame['doth'].data, equals([2, 20, 200, 2000])); 133 | }, skip: true); 134 | 135 | test( 136 | 'should extract certain columns while initializing from matrix with ' 137 | 'predefined header', () { 138 | final dataFrame = DataFrame.fromMatrix(matrix, 139 | columns: [0, 3], header: ['how', 'doth', 'the', 'little']); 140 | 141 | expect(dataFrame.header, equals(['how', 'doth'])); 142 | 143 | expect( 144 | dataFrame.rows, 145 | equals([ 146 | [1, 4], 147 | [10, 40], 148 | [100, 400], 149 | [1000, 4000], 150 | ])); 151 | 152 | expect(dataFrame.series, hasLength(2)); 153 | 154 | expect(dataFrame[0].name, 'how'); 155 | expect(dataFrame[0].data, equals([1, 10, 100, 1000])); 156 | expect(dataFrame['how'].data, equals([1, 10, 100, 1000])); 157 | 158 | expect(dataFrame[1].name, 'doth'); 159 | expect(dataFrame[1].data, equals([4, 40, 400, 4000])); 160 | expect(dataFrame['doth'].data, equals([4, 40, 400, 4000])); 161 | 162 | expect( 163 | dataFrame.toMatrix(), 164 | equals([ 165 | [1, 4], 166 | [10, 40], 167 | [100, 400], 168 | [1000, 4000], 169 | ])); 170 | }); 171 | 172 | test( 173 | 'should initialize from matrix without predefined header but with ' 174 | 'auto header prefix', () { 175 | final dataFrame = 176 | DataFrame.fromMatrix(matrix, autoHeaderPrefix: 'super_'); 177 | 178 | expect(dataFrame.header, 179 | equals(['super_0', 'super_1', 'super_2', 'super_3'])); 180 | 181 | expect(dataFrame.rows, equals(sourceData)); 182 | 183 | expect(dataFrame.series, hasLength(4)); 184 | 185 | expect(dataFrame[0].name, 'super_0'); 186 | expect(dataFrame[0].data, equals([1, 10, 100, 1000])); 187 | expect(dataFrame['super_0'].data, equals([1, 10, 100, 1000])); 188 | 189 | expect(dataFrame[1].name, 'super_1'); 190 | expect(dataFrame[1].data, equals([2, 20, 200, 2000])); 191 | expect(dataFrame['super_1'].data, equals([2, 20, 200, 2000])); 192 | 193 | expect(dataFrame[2].name, 'super_2'); 194 | expect(dataFrame[2].data, equals([3, 30, 300, 3000])); 195 | expect(dataFrame['super_2'].data, equals([3, 30, 300, 3000])); 196 | 197 | expect(dataFrame[3].name, 'super_3'); 198 | expect(dataFrame[3].data, equals([4, 40, 400, 4000])); 199 | expect(dataFrame['super_3'].data, equals([4, 40, 400, 4000])); 200 | 201 | expect(dataFrame.toMatrix(), same(matrix)); 202 | }); 203 | 204 | test('should initialize from matrix using certain columns', () { 205 | final dataFrame = DataFrame.fromMatrix(matrix, columns: [0, 3]); 206 | 207 | expect(dataFrame.header, equals(['col_0', 'col_1'])); 208 | 209 | expect( 210 | dataFrame.rows, 211 | equals([ 212 | [1, 4], 213 | [10, 40], 214 | [100, 400], 215 | [1000, 4000], 216 | ])); 217 | 218 | expect(dataFrame.series, hasLength(2)); 219 | 220 | expect(dataFrame[0].name, 'col_0'); 221 | expect(dataFrame[0].data, equals([1, 10, 100, 1000])); 222 | expect(dataFrame['col_0'].data, equals([1, 10, 100, 1000])); 223 | 224 | expect(dataFrame[1].name, 'col_1'); 225 | expect(dataFrame[1].data, equals([4, 40, 400, 4000])); 226 | expect(dataFrame['col_1'].data, equals([4, 40, 400, 4000])); 227 | 228 | expect( 229 | dataFrame.toMatrix(), 230 | equals([ 231 | [1, 4], 232 | [10, 40], 233 | [100, 400], 234 | [1000, 4000], 235 | ])); 236 | }); 237 | 238 | test( 239 | 'should initialize from matrix using predefined discrete column ' 240 | 'indices', () { 241 | final dataFrame = DataFrame.fromMatrix(matrix, discreteColumns: [0, 1]); 242 | 243 | expect(dataFrame.rows, equals(sourceData)); 244 | 245 | expect(dataFrame.series, hasLength(4)); 246 | 247 | expect(dataFrame[0].name, 'col_0'); 248 | expect(dataFrame[0].data, equals([1, 10, 100, 1000])); 249 | expect(dataFrame['col_0'].data, equals([1, 10, 100, 1000])); 250 | expect(dataFrame['col_0'].isDiscrete, isTrue); 251 | 252 | expect(dataFrame[1].name, 'col_1'); 253 | expect(dataFrame[1].data, equals([2, 20, 200, 2000])); 254 | expect(dataFrame['col_1'].data, equals([2, 20, 200, 2000])); 255 | expect(dataFrame['col_1'].isDiscrete, isTrue); 256 | 257 | expect(dataFrame[2].name, 'col_2'); 258 | expect(dataFrame[2].data, equals([3, 30, 300, 3000])); 259 | expect(dataFrame['col_2'].data, equals([3, 30, 300, 3000])); 260 | expect(dataFrame['col_2'].isDiscrete, isFalse); 261 | 262 | expect(dataFrame[3].name, 'col_3'); 263 | expect(dataFrame[3].data, equals([4, 40, 400, 4000])); 264 | expect(dataFrame['col_3'].data, equals([4, 40, 400, 4000])); 265 | expect(dataFrame['col_3'].isDiscrete, isFalse); 266 | 267 | expect(dataFrame.toMatrix(), same(matrix)); 268 | }); 269 | 270 | test( 271 | 'should initialize from matrix using predefined discrete column ' 272 | 'names (in conjunction with predefined header)', () { 273 | final dataFrame = DataFrame.fromMatrix(matrix, 274 | header: ['first', 'second', 'third', 'fourth'], 275 | discreteColumnNames: ['first', 'fourth']); 276 | 277 | expect(dataFrame.rows, equals(sourceData)); 278 | 279 | expect(dataFrame.series, hasLength(4)); 280 | 281 | expect(dataFrame[0].name, 'first'); 282 | expect(dataFrame[0].data, equals([1, 10, 100, 1000])); 283 | expect(dataFrame['first'].data, equals([1, 10, 100, 1000])); 284 | expect(dataFrame['first'].isDiscrete, isTrue); 285 | 286 | expect(dataFrame[1].name, 'second'); 287 | expect(dataFrame[1].data, equals([2, 20, 200, 2000])); 288 | expect(dataFrame['second'].data, equals([2, 20, 200, 2000])); 289 | expect(dataFrame['second'].isDiscrete, isFalse); 290 | 291 | expect(dataFrame[2].name, 'third'); 292 | expect(dataFrame[2].data, equals([3, 30, 300, 3000])); 293 | expect(dataFrame['third'].data, equals([3, 30, 300, 3000])); 294 | expect(dataFrame['third'].isDiscrete, isFalse); 295 | 296 | expect(dataFrame[3].name, 'fourth'); 297 | expect(dataFrame[3].data, equals([4, 40, 400, 4000])); 298 | expect(dataFrame['fourth'].data, equals([4, 40, 400, 4000])); 299 | expect(dataFrame['fourth'].isDiscrete, isTrue); 300 | 301 | expect(dataFrame.toMatrix(), same(matrix)); 302 | }); 303 | 304 | test( 305 | 'should initialize from matrix using predefined discrete column ' 306 | 'names even if predefined header is not defined', () { 307 | final dataFrame = 308 | DataFrame.fromMatrix(matrix, discreteColumnNames: ['col_0', 'col_3']); 309 | 310 | expect(dataFrame.rows, equals(sourceData)); 311 | 312 | expect(dataFrame.series, hasLength(4)); 313 | 314 | expect(dataFrame[0].name, 'col_0'); 315 | expect(dataFrame[0].data, equals([1, 10, 100, 1000])); 316 | expect(dataFrame['col_0'].data, equals([1, 10, 100, 1000])); 317 | expect(dataFrame['col_0'].isDiscrete, isTrue); 318 | 319 | expect(dataFrame[1].name, 'col_1'); 320 | expect(dataFrame[1].data, equals([2, 20, 200, 2000])); 321 | expect(dataFrame['col_1'].data, equals([2, 20, 200, 2000])); 322 | expect(dataFrame['col_1'].isDiscrete, isFalse); 323 | 324 | expect(dataFrame[2].name, 'col_2'); 325 | expect(dataFrame[2].data, equals([3, 30, 300, 3000])); 326 | expect(dataFrame['col_2'].data, equals([3, 30, 300, 3000])); 327 | expect(dataFrame['col_2'].isDiscrete, isFalse); 328 | 329 | expect(dataFrame[3].name, 'col_3'); 330 | expect(dataFrame[3].data, equals([4, 40, 400, 4000])); 331 | expect(dataFrame['col_3'].data, equals([4, 40, 400, 4000])); 332 | expect(dataFrame['col_3'].isDiscrete, isTrue); 333 | 334 | expect(dataFrame.toMatrix(), same(matrix)); 335 | }); 336 | 337 | test( 338 | 'should ignore entire predefined discrete column name list if none of ' 339 | 'the names from the list matches the actual dataframe header', () { 340 | final dataFrame = DataFrame.fromMatrix(matrix, 341 | discreteColumnNames: ['ololo', 'trololo']); 342 | 343 | expect(dataFrame.rows, equals(sourceData)); 344 | 345 | expect(dataFrame.series, hasLength(4)); 346 | 347 | expect(dataFrame[0].name, 'col_0'); 348 | expect(dataFrame[0].data, equals([1, 10, 100, 1000])); 349 | expect(dataFrame['col_0'].data, equals([1, 10, 100, 1000])); 350 | expect(dataFrame['col_0'].isDiscrete, isFalse); 351 | 352 | expect(dataFrame[1].name, 'col_1'); 353 | expect(dataFrame[1].data, equals([2, 20, 200, 2000])); 354 | expect(dataFrame['col_1'].data, equals([2, 20, 200, 2000])); 355 | expect(dataFrame['col_1'].isDiscrete, isFalse); 356 | 357 | expect(dataFrame[2].name, 'col_2'); 358 | expect(dataFrame[2].data, equals([3, 30, 300, 3000])); 359 | expect(dataFrame['col_2'].data, equals([3, 30, 300, 3000])); 360 | expect(dataFrame['col_2'].isDiscrete, isFalse); 361 | 362 | expect(dataFrame[3].name, 'col_3'); 363 | expect(dataFrame[3].data, equals([4, 40, 400, 4000])); 364 | expect(dataFrame['col_3'].data, equals([4, 40, 400, 4000])); 365 | expect(dataFrame['col_3'].isDiscrete, isFalse); 366 | 367 | expect(dataFrame.toMatrix(), same(matrix)); 368 | }); 369 | 370 | test( 371 | 'should ignore just those names of predefined discrete column name ' 372 | 'list, that do not match the actual dataframe header names', () { 373 | final dataFrame = DataFrame.fromMatrix(matrix, 374 | discreteColumnNames: ['col_1', 'ololo', 'trololo']); 375 | 376 | expect(dataFrame.rows, equals(sourceData)); 377 | 378 | expect(dataFrame.series, hasLength(4)); 379 | 380 | expect(dataFrame[0].name, 'col_0'); 381 | expect(dataFrame[0].data, equals([1, 10, 100, 1000])); 382 | expect(dataFrame['col_0'].data, equals([1, 10, 100, 1000])); 383 | expect(dataFrame['col_0'].isDiscrete, isFalse); 384 | 385 | expect(dataFrame[1].name, 'col_1'); 386 | expect(dataFrame[1].data, equals([2, 20, 200, 2000])); 387 | expect(dataFrame['col_1'].data, equals([2, 20, 200, 2000])); 388 | expect(dataFrame['col_1'].isDiscrete, isTrue); 389 | 390 | expect(dataFrame[2].name, 'col_2'); 391 | expect(dataFrame[2].data, equals([3, 30, 300, 3000])); 392 | expect(dataFrame['col_2'].data, equals([3, 30, 300, 3000])); 393 | expect(dataFrame['col_2'].isDiscrete, isFalse); 394 | 395 | expect(dataFrame[3].name, 'col_3'); 396 | expect(dataFrame[3].data, equals([4, 40, 400, 4000])); 397 | expect(dataFrame['col_3'].data, equals([4, 40, 400, 4000])); 398 | expect(dataFrame['col_3'].isDiscrete, isFalse); 399 | 400 | expect(dataFrame.toMatrix(), same(matrix)); 401 | }); 402 | 403 | test('should consider both discrete indices and names', () { 404 | final dataFrame = DataFrame.fromMatrix(matrix, 405 | header: ['first', 'second', 'third', 'fourth'], 406 | discreteColumns: [2, 3], 407 | discreteColumnNames: ['first', 'second']); 408 | 409 | expect(dataFrame.rows, equals(sourceData)); 410 | 411 | expect(dataFrame.series, hasLength(4)); 412 | 413 | expect(dataFrame[0].name, 'first'); 414 | expect(dataFrame[0].data, equals([1, 10, 100, 1000])); 415 | expect(dataFrame['first'].data, equals([1, 10, 100, 1000])); 416 | expect(dataFrame['first'].isDiscrete, isTrue); 417 | 418 | expect(dataFrame[1].name, 'second'); 419 | expect(dataFrame[1].data, equals([2, 20, 200, 2000])); 420 | expect(dataFrame['second'].data, equals([2, 20, 200, 2000])); 421 | expect(dataFrame['second'].isDiscrete, isTrue); 422 | 423 | expect(dataFrame[2].name, 'third'); 424 | expect(dataFrame[2].data, equals([3, 30, 300, 3000])); 425 | expect(dataFrame['third'].data, equals([3, 30, 300, 3000])); 426 | expect(dataFrame['third'].isDiscrete, isTrue); 427 | 428 | expect(dataFrame[3].name, 'fourth'); 429 | expect(dataFrame[3].data, equals([4, 40, 400, 4000])); 430 | expect(dataFrame['fourth'].data, equals([4, 40, 400, 4000])); 431 | expect(dataFrame['fourth'].isDiscrete, isTrue); 432 | 433 | expect(dataFrame.toMatrix(), same(matrix)); 434 | }); 435 | }); 436 | } 437 | -------------------------------------------------------------------------------- /test/data_frame/data_frame_from_raw_csv_test.dart: -------------------------------------------------------------------------------- 1 | import 'dart:io'; 2 | 3 | import 'package:ml_dataframe/ml_dataframe.dart'; 4 | import 'package:test/test.dart'; 5 | 6 | void main() { 7 | group('DataFrame.fromRawCsv', () { 8 | final csvAsString = 9 | File('test/data_frame/data/raw_csv_with_header.txt').readAsStringSync(); 10 | 11 | test('should create a dataframe with a proper header', () { 12 | final dataframe = DataFrame.fromRawCsv(csvAsString); 13 | 14 | expect(dataframe.header, [ 15 | 'number of times pregnant', 16 | 'plasma glucose concentration a 2 hours in an oral glucose tolerance test', 17 | 'diastolic blood pressure (mm Hg)', 18 | 'triceps skin fold thickness (mm)', 19 | '2-Hour serum insulin (mu U/ml)', 20 | 'body mass index (weight in kg/(height in m)^2)', 21 | 'diabetes pedigree function', 22 | 'age (years)', 23 | 'class variable (0 or 1)' 24 | ]); 25 | }); 26 | 27 | test('should create a dataframe with a proper shape', () { 28 | final dataframe = DataFrame.fromRawCsv(csvAsString); 29 | 30 | expect(dataframe.shape, [768, 9]); 31 | }); 32 | 33 | test('should create a dataframe with proper series length', () { 34 | final dataframe = DataFrame.fromRawCsv(csvAsString); 35 | 36 | expect(dataframe.series, hasLength(9)); 37 | }); 38 | 39 | test('should create a dataframe with proper series content', () { 40 | final dataframe = DataFrame.fromRawCsv(csvAsString); 41 | 42 | expect(dataframe.series.elementAt(0).data.take(10), 43 | [6, 1, 8, 1, 0, 5, 3, 10, 2, 8]); 44 | 45 | expect(dataframe.series.elementAt(3).data.toList().sublist(760), 46 | [26, 31, 0, 48, 27, 23, 0, 31]); 47 | }); 48 | }); 49 | } 50 | -------------------------------------------------------------------------------- /test/data_frame/data_frame_from_raw_data_test.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 2 | import 'package:test/test.dart'; 3 | 4 | void main() { 5 | group('DataFrame', () { 6 | test('should initialize from dynamic-typed data without header row', () { 7 | final data = [ 8 | [1, 2, 3, true, '32'], 9 | [10, 12, 323, false, '1132'], 10 | [-10, 202, null, true, 'abs'], 11 | ]; 12 | 13 | final frame = DataFrame( 14 | data, 15 | headerExists: false, 16 | ); 17 | 18 | expect( 19 | frame.header, equals(['col_0', 'col_1', 'col_2', 'col_3', 'col_4'])); 20 | expect( 21 | frame.rows, 22 | equals([ 23 | [1, 2, 3, true, '32'], 24 | [10, 12, 323, false, '1132'], 25 | [-10, 202, null, true, 'abs'], 26 | ])); 27 | expect(frame.series.map((series) => series.name), 28 | equals(['col_0', 'col_1', 'col_2', 'col_3', 'col_4'])); 29 | expect( 30 | frame.series.map((series) => series.data), 31 | equals([ 32 | [1, 10, -10], 33 | [2, 12, 202], 34 | [3, 323, null], 35 | [true, false, true], 36 | ['32', '1132', 'abs'], 37 | ])); 38 | }); 39 | 40 | test('should initialize from dynamic-typed data with header row', () { 41 | final data = [ 42 | ['header_1', 'header_2', 'header_3', 'header_4', 'header_5'], 43 | [1, 2, 3, true, '32'], 44 | [10, 12, 323, false, '1132'], 45 | [-10, 202, null, true, 'abs'], 46 | ]; 47 | final frame = DataFrame(data, headerExists: true); 48 | 49 | expect(frame.header, 50 | equals(['header_1', 'header_2', 'header_3', 'header_4', 'header_5'])); 51 | expect( 52 | frame.rows, 53 | equals([ 54 | [1, 2, 3, true, '32'], 55 | [10, 12, 323, false, '1132'], 56 | [-10, 202, null, true, 'abs'], 57 | ])); 58 | expect(frame.series.map((series) => series.name), 59 | equals(['header_1', 'header_2', 'header_3', 'header_4', 'header_5'])); 60 | expect( 61 | frame.series.map((series) => series.data), 62 | equals([ 63 | [1, 10, -10], 64 | [2, 12, 202], 65 | [3, 323, null], 66 | [true, false, true], 67 | ['32', '1132', 'abs'], 68 | ])); 69 | }); 70 | 71 | test('should initilize from an empty list', () { 72 | final frame = DataFrame([]); 73 | 74 | expect(frame.rows, []); 75 | expect(frame.header, []); 76 | }); 77 | 78 | test('should initilize from an empty and a predefined header', () { 79 | final header = ['1', '2', '2']; 80 | final frame = DataFrame([], header: header); 81 | 82 | expect(frame.rows, []); 83 | expect(frame.header, header); 84 | }); 85 | 86 | test( 87 | 'should initialize from dynamic-typed data without header row using ' 88 | 'predefined header', () { 89 | final data = [ 90 | [1, 2, 3, true, '32'], 91 | [10, 12, 323, false, '1132'], 92 | [-10, 202, null, true, 'abs'], 93 | ]; 94 | final frame = DataFrame( 95 | data, 96 | header: ['first', 'second', 'third', 'fourth', 'fifth'], 97 | headerExists: false, 98 | ); 99 | 100 | expect(frame.header, 101 | equals(['first', 'second', 'third', 'fourth', 'fifth'])); 102 | expect( 103 | frame.rows, 104 | equals([ 105 | [1, 2, 3, true, '32'], 106 | [10, 12, 323, false, '1132'], 107 | [-10, 202, null, true, 'abs'], 108 | ])); 109 | expect(frame.series.map((series) => series.name), 110 | equals(['first', 'second', 'third', 'fourth', 'fifth'])); 111 | expect( 112 | frame.series.map((series) => series.data), 113 | equals([ 114 | [1, 10, -10], 115 | [2, 12, 202], 116 | [3, 323, null], 117 | [true, false, true], 118 | ['32', '1132', 'abs'], 119 | ])); 120 | }); 121 | 122 | test( 123 | 'should use predefined header while initializng even if source data ' 124 | 'has header row', () { 125 | final data = [ 126 | ['header_1', 'header_2', 'header_3', 'header_4', 'header_5'], 127 | [1, 2, 3, true, '32'], 128 | [10, 12, 323, false, '1132'], 129 | [-10, 202, null, true, 'abs'], 130 | ]; 131 | final frame = DataFrame(data, 132 | header: ['first', 'second', 'third', 'fourth', 'fifth'], 133 | headerExists: true); 134 | 135 | expect(frame.header, 136 | equals(['first', 'second', 'third', 'fourth', 'fifth'])); 137 | expect( 138 | frame.rows, 139 | equals([ 140 | [1, 2, 3, true, '32'], 141 | [10, 12, 323, false, '1132'], 142 | [-10, 202, null, true, 'abs'], 143 | ])); 144 | expect(frame.series.map((series) => series.name), 145 | equals(['first', 'second', 'third', 'fourth', 'fifth'])); 146 | expect( 147 | frame.series.map((series) => series.data), 148 | equals([ 149 | [1, 10, -10], 150 | [2, 12, 202], 151 | [3, 323, null], 152 | [true, false, true], 153 | ['32', '1132', 'abs'], 154 | ])); 155 | }); 156 | 157 | test('should select columns from source data by their indices', () { 158 | final data = [ 159 | ['col_1', 'col_2', 'col_3', 'col_4', 'col_5'], 160 | [1, 2, 3, true, '32'], 161 | [10, 12, 323, false, '1132'], 162 | [-10, 202, null, true, 'abs'], 163 | ]; 164 | final frame = DataFrame(data, headerExists: true, columns: [0, 2, 4]); 165 | 166 | expect(frame.header, equals(['col_1', 'col_3', 'col_5'])); 167 | expect( 168 | frame.rows, 169 | equals([ 170 | [1, 3, '32'], 171 | [10, 323, '1132'], 172 | [-10, null, 'abs'], 173 | ])); 174 | expect(frame.series.map((series) => series.name), 175 | equals(['col_1', 'col_3', 'col_5'])); 176 | expect( 177 | frame.series.map((series) => series.data), 178 | equals([ 179 | [1, 10, -10], 180 | [3, 323, null], 181 | ['32', '1132', 'abs'], 182 | ])); 183 | }); 184 | 185 | test('should select columns from source data by their names', () { 186 | final data = [ 187 | ['col_1', 'col_2', 'col_3', 'col_4', 'col_5'], 188 | [1, 2, 3, true, '32'], 189 | [10, 12, 323, false, '1132'], 190 | [-10, 202, null, true, 'abs'], 191 | ]; 192 | final frame = DataFrame(data, 193 | headerExists: true, columnNames: ['col_1', 'col_3', 'col_5']); 194 | 195 | expect(frame.header, equals(['col_1', 'col_3', 'col_5'])); 196 | expect( 197 | frame.rows, 198 | equals([ 199 | [1, 3, '32'], 200 | [10, 323, '1132'], 201 | [-10, null, 'abs'], 202 | ])); 203 | expect(frame.series.map((series) => series.name), 204 | equals(['col_1', 'col_3', 'col_5'])); 205 | expect( 206 | frame.series.map((series) => series.data), 207 | equals([ 208 | [1, 10, -10], 209 | [3, 323, null], 210 | ['32', '1132', 'abs'], 211 | ])); 212 | }); 213 | 214 | test( 215 | 'should consider predefined header while initializing if there are ' 216 | 'columns to select from source data', () { 217 | final data = [ 218 | [1, 2, 3, true, '32'], 219 | [10, 12, 323, false, '1132'], 220 | [-10, 202, null, true, 'abs'], 221 | ]; 222 | final frame = DataFrame( 223 | data, 224 | header: ['first', 'second', 'third', 'fourth', 'fifth'], 225 | columns: [0, 2, 4], 226 | headerExists: false, 227 | ); 228 | 229 | expect(frame.header, equals(['first', 'second', 'third'])); 230 | expect( 231 | frame.rows, 232 | equals([ 233 | [1, 3, '32'], 234 | [10, 323, '1132'], 235 | [-10, null, 'abs'], 236 | ])); 237 | expect(frame.series.map((series) => series.name), 238 | equals(['first', 'second', 'third'])); 239 | expect( 240 | frame.series.map((series) => series.data), 241 | equals([ 242 | [1, 10, -10], 243 | [3, 323, null], 244 | ['32', '1132', 'abs'], 245 | ])); 246 | }); 247 | }); 248 | } 249 | -------------------------------------------------------------------------------- /test/data_frame/data_frame_from_series_test.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 2 | import 'package:ml_dataframe/src/data_frame/series.dart'; 3 | import 'package:test/test.dart'; 4 | 5 | void main() { 6 | group('DataFrame.fromSeries', () { 7 | test('should initialize from a series collection', () { 8 | final series = [ 9 | Series('first', [1, 2, 3, true, '32']), 10 | Series('second', [10, 12, 323, false, '1132']), 11 | Series('third', [-10, 202, null, true, 'abs']), 12 | ]; 13 | final frame = DataFrame.fromSeries(series); 14 | 15 | expect(frame.header, equals(['first', 'second', 'third'])); 16 | expect( 17 | frame.rows, 18 | equals([ 19 | [1, 10, -10], 20 | [2, 12, 202], 21 | [3, 323, null], 22 | [true, false, true], 23 | ['32', '1132', 'abs'], 24 | ])); 25 | expect(frame.series.map((series) => series.name), 26 | equals(['first', 'second', 'third'])); 27 | expect( 28 | frame.series.map((series) => series.data), 29 | equals([ 30 | [1, 2, 3, true, '32'], 31 | [10, 12, 323, false, '1132'], 32 | [-10, 202, null, true, 'abs'], 33 | ])); 34 | }); 35 | }); 36 | } 37 | -------------------------------------------------------------------------------- /test/data_frame/data_frame_helpers_test.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/helpers/convert_rows_to_series.dart'; 2 | import 'package:ml_dataframe/src/data_frame/helpers/convert_series_to_rows.dart'; 3 | import 'package:ml_dataframe/src/data_frame/series.dart'; 4 | import 'package:test/test.dart'; 5 | 6 | void main() { 7 | group('DataFrame helpers', () { 8 | group('convertRowsToSeries', () { 9 | test( 10 | 'should convert rows of dynamic typed data into columns and combine ' 11 | 'them with the given column headers', () { 12 | final headers = ['col_1', 'col_2', 'col_3']; 13 | final rows = [ 14 | [1, null, true], 15 | [100, '23', false], 16 | [230, '11', false], 17 | [null, 'text', false], 18 | [0, '00', true], 19 | ]; 20 | final series = convertRowsToSeries(headers, rows).toList(); 21 | 22 | expect(series, hasLength(3)); 23 | 24 | expect(series[0].name, 'col_1'); 25 | expect(series[0].data, [1, 100, 230, null, 0]); 26 | 27 | expect(series[1].name, 'col_2'); 28 | expect(series[1].data, [null, '23', '11', 'text', '00']); 29 | 30 | expect(series[2].name, 'col_3'); 31 | expect(series[2].data, [true, false, false, false, true]); 32 | }); 33 | 34 | test( 35 | 'should return number of columns that is equal to the number of ' 36 | 'headers', () { 37 | final headers = ['col_1', 'col_2']; 38 | final rows = [ 39 | [1, null, true], 40 | [100, '23', false], 41 | [230, '11', false], 42 | [null, 'text', false], 43 | [0, '00', true], 44 | ]; 45 | final series = convertRowsToSeries(headers, rows).toList(); 46 | 47 | expect(series, hasLength(2)); 48 | 49 | expect(series[0].name, 'col_1'); 50 | expect(series[0].data, [1, 100, 230, null, 0]); 51 | 52 | expect(series[1].name, 'col_2'); 53 | expect(series[1].data, [null, '23', '11', 'text', '00']); 54 | 55 | expect(() => series[2], throwsRangeError); 56 | }); 57 | 58 | test( 59 | 'should return number of columns that is equal to the number of ' 60 | 'headers', () { 61 | final headers = ['col_1', 'col_2', 'col_3', 'col_4']; 62 | final rows = [ 63 | [1, null, true], 64 | [100, '23', false], 65 | [230, '11', false], 66 | [null, 'text', false], 67 | [0, '00', true], 68 | ]; 69 | final series = convertRowsToSeries(headers, rows).toList(); 70 | 71 | expect(series, hasLength(4)); 72 | 73 | expect(series[0].name, 'col_1'); 74 | expect(series[0].data, [1, 100, 230, null, 0]); 75 | 76 | expect(series[1].name, 'col_2'); 77 | expect(series[1].data, [null, '23', '11', 'text', '00']); 78 | 79 | expect(series[2].name, 'col_3'); 80 | expect(series[2].data, [true, false, false, false, true]); 81 | 82 | expect(series[3].name, 'col_4'); 83 | expect(series[3].data, isEmpty); 84 | }); 85 | }); 86 | 87 | group('convertSeriesToRows', () { 88 | test( 89 | 'should extract rows (transpose series columns) from the given ' 90 | 'series collection', () { 91 | final series = [ 92 | Series('col_1', [1, 2, 3, null, 33]), 93 | Series('col_2', [true, false, true, true, null]), 94 | Series('col_3', ['1', '2', '3', '22', '33']), 95 | ]; 96 | 97 | final rows = convertSeriesToRows(series); 98 | 99 | expect( 100 | rows, 101 | equals([ 102 | [1, true, '1'], 103 | [2, false, '2'], 104 | [3, true, '3'], 105 | [null, true, '22'], 106 | [33, null, '33'], 107 | ])); 108 | }); 109 | }); 110 | }); 111 | } 112 | -------------------------------------------------------------------------------- /test/data_frame/data_frame_test.dart: -------------------------------------------------------------------------------- 1 | import 'dart:convert'; 2 | import 'dart:io'; 3 | 4 | import 'package:ml_dataframe/ml_dataframe.dart'; 5 | import 'package:ml_dataframe/src/data_frame/data_frame_json_keys.dart'; 6 | import 'package:ml_dataframe/src/data_frame/exceptions/wrong_series_shape_exception.dart'; 7 | import 'package:ml_linalg/matrix.dart'; 8 | import 'package:test/test.dart'; 9 | 10 | void main() { 11 | group('DataFrame', () { 12 | final data = [ 13 | ['first', 'second', 'third'], 14 | ['1', 2, 3], 15 | [10, 12, 323], 16 | [-10, 202, 1000], 17 | ]; 18 | 19 | test('should convert stored data into matrix', () { 20 | final data = [ 21 | ['col_1', 'col_2', 'col_3', 'col_4', 'col_5'], 22 | ['1', 2, 3, 0, 32], 23 | [10, 12, 323, 1.5, 1132], 24 | [-10, 202, 1000, '1.5', 0.005], 25 | ]; 26 | final frame = DataFrame(data, 27 | headerExists: true, columnNames: ['col_1', 'col_3', 'col_4']); 28 | 29 | expect(frame.toMatrix(), isA()); 30 | expect( 31 | frame.toMatrix(), 32 | equals([ 33 | [1, 3, 0], 34 | [10, 323, 1.5], 35 | [-10, 1000, 1.5], 36 | ])); 37 | }); 38 | 39 | group('[] operator', () { 40 | test('should provide access to its series by series name', () { 41 | final frame = DataFrame(data); 42 | 43 | expect(frame['first'].name, 'first'); 44 | expect(frame['first'].data, equals(['1', 10, -10])); 45 | 46 | expect(frame['second'].name, 'second'); 47 | expect(frame['second'].data, equals([2, 12, 202])); 48 | 49 | expect(frame['third'].name, 'third'); 50 | expect(frame['third'].data, equals([3, 323, 1000])); 51 | }); 52 | 53 | test('should provide access to its series by series index', () { 54 | final frame = DataFrame(data); 55 | 56 | expect(frame[0].name, 'first'); 57 | expect(frame[0].data, equals(['1', 10, -10])); 58 | 59 | expect(frame[1].name, 'second'); 60 | expect(frame[1].data, equals([2, 12, 202])); 61 | 62 | expect(frame[2].name, 'third'); 63 | expect(frame[2].data, equals([3, 323, 1000])); 64 | }); 65 | 66 | test( 67 | 'should throw an error if one tries to access a series using a key of ' 68 | 'improper type (neither String nor int)', () { 69 | final frame = DataFrame(data); 70 | 71 | expect(() => frame[{1}], throwsException); 72 | expect(() => frame[1.2], throwsException); 73 | expect(() => frame[[1, 2]], throwsException); 74 | }); 75 | 76 | test( 77 | 'should throw a range error if one tries to access a series using an ' 78 | 'integer key which is out of range', () { 79 | final frame = DataFrame(data); 80 | 81 | expect(() => frame[3], throwsRangeError); 82 | expect(() => frame[4], throwsRangeError); 83 | expect(() => frame[-1], throwsRangeError); 84 | }); 85 | }); 86 | 87 | group('dropSeries', () { 88 | test( 89 | 'should drop series by indices and return a new DataFrame without ' 90 | 'these series', () { 91 | final data = [ 92 | ['1', 2, 3, 0, 32], 93 | [10, 12, 323, 1.5, 1132], 94 | [-10, 202, 1000, '1.5', 0.005], 95 | ]; 96 | final frame = DataFrame(data, headerExists: false); 97 | final reduced = frame.dropSeries(indices: [0, 2, 4]); 98 | 99 | expect( 100 | reduced.rows, 101 | equals([ 102 | [2, 0], 103 | [12, 1.5], 104 | [202, '1.5'], 105 | ])); 106 | }); 107 | 108 | test( 109 | 'should drop series by indices and return a new DataFrame without ' 110 | 'these series even if input indices are not unique', () { 111 | final data = [ 112 | ['1', 2, 3, 0, 32], 113 | [10, 12, 323, 1.5, 1132], 114 | [-10, 202, 1000, '1.5', 0.005], 115 | ]; 116 | final frame = DataFrame(data, headerExists: false); 117 | final reduced = frame.dropSeries(indices: [0, 2, 2, 0, 4, 2, 4, 0, 4]); 118 | 119 | expect( 120 | reduced.rows, 121 | equals([ 122 | [2, 0], 123 | [12, 1.5], 124 | [202, '1.5'], 125 | ])); 126 | }); 127 | 128 | test( 129 | 'should drop series by series names and return a new DataFrame ' 130 | 'without these series', () { 131 | final data = [ 132 | ['1', 2, 3, 0, 32], 133 | [10, 12, 323, 1.5, 1132], 134 | [-10, 202, 1000, '1.5', 0.005], 135 | ]; 136 | final frame = DataFrame(data, headerExists: false); 137 | final reduced = frame.dropSeries(names: ['col_0', 'col_2', 'col_4']); 138 | 139 | expect( 140 | reduced.rows, 141 | equals([ 142 | [2, 0], 143 | [12, 1.5], 144 | [202, '1.5'], 145 | ])); 146 | }); 147 | 148 | test( 149 | 'should drop series by series names and return a new DataFrame ' 150 | 'without these series even if input names are not unique', () { 151 | final data = [ 152 | ['1', 2, 3, 0, 32], 153 | [10, 12, 323, 1.5, 1132], 154 | [-10, 202, 1000, '1.5', 0.005], 155 | ]; 156 | final frame = DataFrame(data, headerExists: false); 157 | final reduced = frame.dropSeries( 158 | names: ['col_4', 'col_0', 'col_4', 'col_2', 'col_4', 'col_2']); 159 | 160 | expect( 161 | reduced.rows, 162 | equals([ 163 | [2, 0], 164 | [12, 1.5], 165 | [202, '1.5'], 166 | ])); 167 | }); 168 | 169 | test( 170 | 'should return a copy of the DataFrame if none of the parameters ' 171 | 'are specified', () { 172 | final data = [ 173 | ['1', 2, 3, 0, 32], 174 | [10, 12, 323, 1.5, 1132], 175 | [-10, 202, 1000, '1.5', 0.005], 176 | ]; 177 | final frame = DataFrame(data, headerExists: false); 178 | final reduced = frame.dropSeries(); 179 | 180 | expect( 181 | reduced.rows, 182 | equals([ 183 | ['1', 2, 3, 0, 32], 184 | [10, 12, 323, 1.5, 1132], 185 | [-10, 202, 1000, '1.5', 0.005], 186 | ])); 187 | }); 188 | }); 189 | 190 | group('sampleFromSeries', () { 191 | test('should sample dataframe by series indices', () { 192 | final data = [ 193 | ['1', 2, 3, 0, 32], 194 | [10, 12, 323, 1.5, 1132], 195 | [-10, 202, 1000, '1.5', 0.005], 196 | ]; 197 | 198 | final dataFrame = DataFrame(data, headerExists: false); 199 | final sampled = dataFrame.sampleFromSeries(indices: [0, 1, 2, 3, 4]); 200 | 201 | expect( 202 | sampled.rows, 203 | equals([ 204 | ['1', 2, 3, 0, 32], 205 | [10, 12, 323, 1.5, 1132], 206 | [-10, 202, 1000, '1.5', 0.005], 207 | ])); 208 | }); 209 | 210 | test('should support repeating indices', () { 211 | final data = [ 212 | ['1', 2, 3, 0, 32], 213 | [10, 12, 323, 1.5, 1132], 214 | [-10, 202, 1000, '1.5', 0.005], 215 | ]; 216 | 217 | final dataFrame = DataFrame(data, headerExists: false); 218 | final sampled = dataFrame.sampleFromSeries(indices: [0, 1, 0, 1]); 219 | 220 | expect( 221 | sampled.rows, 222 | equals([ 223 | ['1', 2, '1', 2], 224 | [10, 12, 10, 12], 225 | [-10, 202, -10, 202], 226 | ])); 227 | }); 228 | 229 | test('should sample dataframe by series names', () { 230 | final data = [ 231 | ['1', 2, 3, 0, 32], 232 | [10, 12, 323, 1.5, 1132], 233 | [-10, 202, 1000, '1.5', 0.005], 234 | ]; 235 | 236 | final dataFrame = DataFrame(data, headerExists: false); 237 | final sampled = dataFrame.sampleFromSeries( 238 | names: ['col_0', 'col_1', 'col_2', 'col_3', 'col_4']); 239 | 240 | expect( 241 | sampled.rows, 242 | equals([ 243 | ['1', 2, 3, 0, 32], 244 | [10, 12, 323, 1.5, 1132], 245 | [-10, 202, 1000, '1.5', 0.005], 246 | ])); 247 | }); 248 | 249 | test( 250 | 'should ignore names parameter if indices parameter is ' 251 | 'provided', () { 252 | final data = [ 253 | ['1', 2, 3, 0, 32], 254 | [10, 12, 323, 1.5, 1132], 255 | [-10, 202, 1000, '1.5', 0.005], 256 | ]; 257 | 258 | final dataFrame = DataFrame(data, headerExists: false); 259 | final sampled = dataFrame.sampleFromSeries( 260 | indices: [1], names: ['col_0', 'col_1', 'col_2', 'col_3', 'col_4']); 261 | 262 | expect( 263 | sampled.rows, 264 | equals([ 265 | [ 266 | 2, 267 | ], 268 | [ 269 | 12, 270 | ], 271 | [ 272 | 202, 273 | ], 274 | ])); 275 | }); 276 | 277 | test('should throw an error if outranged indices are provided', () { 278 | final data = [ 279 | ['1', 2, 3, 0, 32], 280 | [10, 12, 323, 1.5, 1132], 281 | [-10, 202, 1000, '1.5', 0.005], 282 | ]; 283 | 284 | final dataFrame = DataFrame(data, headerExists: false); 285 | final actual = () => dataFrame.sampleFromSeries(indices: [1, 40]); 286 | 287 | expect(actual, throwsRangeError); 288 | }); 289 | 290 | test('should throw an error if outranged negative indices are provided', 291 | () { 292 | final data = [ 293 | ['1', 2, 3, 0, 32], 294 | [10, 12, 323, 1.5, 1132], 295 | [-10, 202, 1000, '1.5', 0.005], 296 | ]; 297 | 298 | final dataFrame = DataFrame(data, headerExists: false); 299 | final actual = () => dataFrame.sampleFromSeries(indices: [2, -1]); 300 | 301 | expect(actual, throwsRangeError); 302 | }); 303 | 304 | test( 305 | 'should throw an error if names of non existent columns are ' 306 | 'provided', () { 307 | final data = [ 308 | ['1', 2, 3, 0, 32], 309 | [10, 12, 323, 1.5, 1132], 310 | [-10, 202, 1000, '1.5', 0.005], 311 | ]; 312 | 313 | final dataFrame = DataFrame(data, headerExists: false); 314 | final actual = 315 | () => dataFrame.sampleFromSeries(names: ['col_0', 'col_100']); 316 | 317 | expect(actual, throwsException); 318 | }); 319 | }); 320 | 321 | group('sampleFromRows', () { 322 | final data = [ 323 | ['1', 2, 3, 0, 32], 324 | [10, 12, 323, 1.5, 1132], 325 | [-10, 202, 1000, '1.5', 0.005], 326 | ]; 327 | final header = [ 328 | 'super_col_1', 329 | 'super_col_2', 330 | 'super_col_3', 331 | 'super_col_4', 332 | 'super_col_5', 333 | ]; 334 | 335 | final dataFrame = DataFrame(data, headerExists: false); 336 | final dataFrameWithHeader = 337 | DataFrame(data, headerExists: false, header: header); 338 | 339 | test( 340 | 'should return empty Dataframe if indices array is empty ' 341 | '(headless dataframe)', () { 342 | final sampled = dataFrame.sampleFromRows([]); 343 | 344 | expect(sampled.rows, []); 345 | }); 346 | 347 | test( 348 | 'should return a correct header for empty Dataframe if indices ' 349 | 'array is empty (headless dataframe)', () { 350 | final sampled = dataFrame.sampleFromRows([]); 351 | 352 | expect(sampled.header, dataFrame.header); 353 | }); 354 | 355 | test('should return empty Dataframe if indices array is empty', () { 356 | final sampled = dataFrameWithHeader.sampleFromRows([]); 357 | 358 | expect(sampled.rows, []); 359 | expect(sampled.header, header); 360 | }); 361 | 362 | test( 363 | 'should return a correct header for empty Dataframe if indices array ' 364 | 'is empty', () { 365 | final sampled = dataFrameWithHeader.sampleFromRows([]); 366 | 367 | expect(sampled.header, header); 368 | }); 369 | 370 | test('should return a Dataframe sampled from non-repeating indices', () { 371 | final sampled = dataFrame.sampleFromRows([0, 2]); 372 | 373 | expect(sampled.rows, [data[0], data[2]]); 374 | }); 375 | 376 | test('should return a Dataframe sampled from repeating indices', () { 377 | final sampled = dataFrame.sampleFromRows([1, 1, 1]); 378 | 379 | expect(sampled.rows, [data[1], data[1], data[1]]); 380 | }); 381 | 382 | test('should return a Dataframe sampled from unordered indices', () { 383 | final sampled = dataFrame.sampleFromRows([2, 0, 1]); 384 | 385 | expect(sampled.rows, [data[2], data[0], data[1]]); 386 | }); 387 | 388 | test( 389 | 'should return a new Dataframe instance for the same set of ' 390 | 'indices', () { 391 | final sampled1 = dataFrame.sampleFromRows([2, 0, 1]); 392 | final sampled2 = dataFrame.sampleFromRows([2, 0, 1]); 393 | 394 | expect(sampled1, isNot(sampled2)); 395 | }); 396 | }); 397 | 398 | group('addSeries', () { 399 | final series = Series('some new series', [4000, 6000, 9000]); 400 | final invalidSeries1 = 401 | Series('invalid series', [4000, 6000, 9000, 1000]); 402 | final invalidSeries2 = Series('invalid series', [4000, 6000]); 403 | 404 | test('should add a new series', () { 405 | final dataFrame = DataFrame(data); 406 | final newDataFrame = dataFrame.addSeries(series); 407 | 408 | expect(newDataFrame.series.last, series); 409 | }); 410 | 411 | test('should create a new dataframe', () { 412 | final dataFrame = DataFrame(data); 413 | final newDataFrame = dataFrame.addSeries(series); 414 | 415 | expect(newDataFrame, isNot(same(dataFrame))); 416 | }); 417 | 418 | test('should change dimension of a new dataframe', () { 419 | final dataFrame = DataFrame(data); 420 | final newDataFrame = dataFrame.addSeries(series); 421 | 422 | expect(newDataFrame.shape, [3, 4]); 423 | }); 424 | 425 | test( 426 | 'should throw an exception if a series of invalid shape is ' 427 | 'provided, case 1', () { 428 | final dataFrame = DataFrame(data); 429 | final newDataFrame = () => dataFrame.addSeries(invalidSeries1); 430 | 431 | expect(newDataFrame, throwsA(isA())); 432 | }); 433 | 434 | test( 435 | 'should throw an exception if a series of invalid shape is ' 436 | 'provided, case 2', () { 437 | final dataFrame = DataFrame(data); 438 | final newDataFrame = () => dataFrame.addSeries(invalidSeries2); 439 | 440 | expect(newDataFrame, throwsA(isA())); 441 | }); 442 | }); 443 | 444 | group('shuffle', () { 445 | test('should return a new dataframe with the same header', () { 446 | final dataframe = DataFrame(data); 447 | final shuffled = dataframe.shuffle(); 448 | 449 | expect(shuffled.header, dataframe.header); 450 | }); 451 | 452 | test('should return a new dataframe with the same number of rows', () { 453 | final dataframe = DataFrame(data); 454 | final shuffled = dataframe.shuffle(); 455 | 456 | expect(shuffled.rows, hasLength(dataframe.rows.length)); 457 | }); 458 | 459 | test('should return a new dataframe with the different order of rows', 460 | () { 461 | final dataframe = DataFrame(data); 462 | final shuffled = dataframe.shuffle(); 463 | 464 | expect(shuffled.rows, isNot(equals(dataframe.rows))); 465 | }); 466 | 467 | test('should return a new dataframe containing the same rows', () { 468 | final dataframe = DataFrame(data); 469 | final shuffled = dataframe.shuffle(); 470 | 471 | dataframe.rows.forEach((row) { 472 | expect(shuffled.rows, contains(equals(row))); 473 | }); 474 | }); 475 | }); 476 | 477 | group('serialization', () { 478 | final json = { 479 | dataFrameHeaderJsonKey: ['first', 'second', 'third'], 480 | dataFrameRowsJsonKey: [ 481 | ['1', 2, 3], 482 | [10, 12, 323], 483 | [-10, 202, 1000], 484 | ], 485 | dataFrameNumericalConverterJsonKey: '', 486 | }; 487 | 488 | final fileName = 'test/data_frame/data_frame.json'; 489 | 490 | tearDown(() async { 491 | final file = File(fileName); 492 | if (await file.exists()) { 493 | await file.delete(); 494 | } 495 | }); 496 | 497 | test('should convert to serializable map', () { 498 | final dataFrame = DataFrame(data); 499 | final actualJson = dataFrame.toJson(); 500 | 501 | expect(actualJson, equals(json)); 502 | }); 503 | 504 | test('should restore from json', () { 505 | final dataFrame = DataFrame.fromJson(json); 506 | 507 | expect(dataFrame.header, data[0]); 508 | expect(dataFrame.rows, data.skip(1)); 509 | }); 510 | 511 | test('should return a file pointer while saving as json', () async { 512 | final dataFrame = DataFrame(data); 513 | final file = await dataFrame.saveAsJson(fileName, rewrite: true); 514 | 515 | expect(file.existsSync(), isTrue); 516 | }); 517 | 518 | test('should save serializable map to json file', () async { 519 | final dataFrame = DataFrame(data); 520 | 521 | await dataFrame.saveAsJson(fileName, rewrite: true); 522 | 523 | final file = File(fileName); 524 | 525 | expect(file.existsSync(), isTrue); 526 | }); 527 | 528 | test('should save a correct json', () async { 529 | final dataFrame = DataFrame(data); 530 | 531 | await dataFrame.saveAsJson(fileName, rewrite: true); 532 | 533 | final file = File(fileName); 534 | final dataAsString = await file.readAsString(); 535 | final actualJson = jsonDecode(dataAsString) as Map; 536 | 537 | expect(actualJson, json); 538 | }); 539 | }); 540 | 541 | group('map', () { 542 | test('should map numeric values according to the given function', () { 543 | final data = DataFrame([ 544 | ['col_1', 'col_2', 'col_3'], 545 | [2, 20, 200], 546 | [3, 30, 300], 547 | [4, 40, 400], 548 | ]); 549 | final actual = data.map((value) => value * 2); 550 | 551 | expect(actual.rows, [ 552 | [4, 40, 400], 553 | [6, 60, 600], 554 | [8, 80, 800], 555 | ]); 556 | expect(actual.header, data.header); 557 | }); 558 | 559 | test('should convert values according to the given function', () { 560 | final data = DataFrame([ 561 | ['col_1', 'col_2', 'col_3'], 562 | [2, 20, 200], 563 | [3, 30, 300], 564 | [4, 40, 400], 565 | ]); 566 | final actual = data.map((value) => value.toString()); 567 | 568 | expect(actual.rows, [ 569 | ['2', '20', '200'], 570 | ['3', '30', '300'], 571 | ['4', '40', '400'], 572 | ]); 573 | expect(actual.header, data.header); 574 | }); 575 | }); 576 | 577 | group('mapSeries', () { 578 | test( 579 | 'should map numeric values according to the given function, name="col_1"', 580 | () { 581 | final data = DataFrame([ 582 | ['col_1', 'col_2', 'col_3'], 583 | [2, 20, 200], 584 | [3, 30, 300], 585 | [4, 40, 400], 586 | ]); 587 | final actual = 588 | data.mapSeries((value) => value * 2, name: 'col_1'); 589 | 590 | expect(actual.rows, [ 591 | [4, 20, 200], 592 | [6, 30, 300], 593 | [8, 40, 400], 594 | ]); 595 | expect(actual.header, data.header); 596 | }); 597 | 598 | test( 599 | 'should map numeric values according to the given function, name="col_2"', 600 | () { 601 | final data = DataFrame([ 602 | ['col_1', 'col_2', 'col_3'], 603 | [2, 20, 200], 604 | [3, 30, 300], 605 | [4, 40, 400], 606 | ]); 607 | final actual = 608 | data.mapSeries((value) => value * 2, name: 'col_2'); 609 | 610 | expect(actual.rows, [ 611 | [2, 40, 200], 612 | [3, 60, 300], 613 | [4, 80, 400], 614 | ]); 615 | expect(actual.header, data.header); 616 | }); 617 | 618 | test( 619 | 'should map numeric values according to the given function, name="col_3"', 620 | () { 621 | final data = DataFrame([ 622 | ['col_1', 'col_2', 'col_3'], 623 | [2, 20, 200], 624 | [3, 30, 300], 625 | [4, 40, 400], 626 | ]); 627 | final actual = 628 | data.mapSeries((value) => value * 2, name: 'col_3'); 629 | 630 | expect(actual.rows, [ 631 | [2, 20, 400], 632 | [3, 30, 600], 633 | [4, 40, 800], 634 | ]); 635 | expect(actual.header, data.header); 636 | }); 637 | 638 | test('should throw an error if the series does not exist', () { 639 | final data = DataFrame([ 640 | ['col_1', 'col_2', 'col_3'], 641 | [2, 20, 200], 642 | [3, 30, 300], 643 | [4, 40, 400], 644 | ]); 645 | final actual = () => 646 | data.mapSeries((value) => value * 2, name: 'col_33'); 647 | 648 | expect(actual, throwsException); 649 | }); 650 | 651 | test('should convert values according to the given function', () { 652 | final data = DataFrame([ 653 | ['col_1', 'col_2', 'col_3'], 654 | [2, 20, 200], 655 | [3, 30, 300], 656 | [4, 40, 400], 657 | ]); 658 | final actual = data.mapSeries((value) => value.toString(), 659 | name: 'col_2'); 660 | 661 | expect(actual.rows, [ 662 | [2, '20', 200], 663 | [3, '30', 300], 664 | [4, '40', 400], 665 | ]); 666 | expect(actual.header, data.header); 667 | }); 668 | 669 | test('should map numeric values according to the given function, index=0', 670 | () { 671 | final data = DataFrame([ 672 | ['col_1', 'col_2', 'col_3'], 673 | [2, 20, 200], 674 | [3, 30, 300], 675 | [4, 40, 400], 676 | ]); 677 | final actual = data.mapSeries((value) => value * 2, index: 0); 678 | 679 | expect(actual.rows, [ 680 | [4, 20, 200], 681 | [6, 30, 300], 682 | [8, 40, 400], 683 | ]); 684 | expect(actual.header, data.header); 685 | }); 686 | 687 | test('should map numeric values according to the given function, index=1', 688 | () { 689 | final data = DataFrame([ 690 | ['col_1', 'col_2', 'col_3'], 691 | [2, 20, 200], 692 | [3, 30, 300], 693 | [4, 40, 400], 694 | ]); 695 | final actual = data.mapSeries((value) => value * 2, index: 1); 696 | 697 | expect(actual.rows, [ 698 | [2, 40, 200], 699 | [3, 60, 300], 700 | [4, 80, 400], 701 | ]); 702 | expect(actual.header, data.header); 703 | }); 704 | 705 | test('should map numeric values according to the given function, index=2', 706 | () { 707 | final data = DataFrame([ 708 | ['col_1', 'col_2', 'col_3'], 709 | [2, 20, 200], 710 | [3, 30, 300], 711 | [4, 40, 400], 712 | ]); 713 | final actual = data.mapSeries((value) => value * 2, index: 2); 714 | 715 | expect(actual.rows, [ 716 | [2, 20, 400], 717 | [3, 30, 600], 718 | [4, 40, 800], 719 | ]); 720 | expect(actual.header, data.header); 721 | }); 722 | 723 | test('should throw an error if the index is out of range', () { 724 | final data = DataFrame([ 725 | ['col_1', 'col_2', 'col_3'], 726 | [2, 20, 200], 727 | [3, 30, 300], 728 | [4, 40, 400], 729 | ]); 730 | final actual = 731 | () => data.mapSeries((value) => value * 2, index: -1); 732 | 733 | expect(actual, throwsRangeError); 734 | }); 735 | 736 | test( 737 | 'should map numeric values according to the given function, discrete series', 738 | () { 739 | final data = DataFrame.fromSeries([ 740 | Series('series_1', [1, 2, 3, 2, 1], isDiscrete: true), 741 | Series('series_2', [10, 22, 33, 44.3, 10]), 742 | Series('series_3', ['1', 'a', 'b', 'c', 'd']), 743 | ]); 744 | final actual = 745 | data.mapSeries((value) => value * 2, name: 'series_1'); 746 | 747 | expect(actual.rows, [ 748 | [2, 10, '1'], 749 | [4, 22, 'a'], 750 | [6, 33, 'b'], 751 | [4, 44.3, 'c'], 752 | [2, 10, 'd'], 753 | ]); 754 | expect(actual.header, data.header); 755 | }); 756 | 757 | test('should throw an error if neither "name" nor "index" are passed', 758 | () { 759 | final data = DataFrame([ 760 | ['col_1', 'col_2', 'col_3'], 761 | [2, 20, 200], 762 | [3, 30, 300], 763 | [4, 40, 400], 764 | ]); 765 | final actual = () => data.mapSeries((value) => value * 2); 766 | 767 | expect(actual, throwsException); 768 | }); 769 | }); 770 | }); 771 | } 772 | -------------------------------------------------------------------------------- /test/data_frame/data_frame_to_string_test.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/data_frame.dart'; 2 | import 'package:test/test.dart'; 3 | 4 | void main() { 5 | group('DataFrame.fromMatrix', () { 6 | final rawCSV = ''' 7 | id,age,salary,children,gender,profession,years_of_education,married,height,weight 8 | 1,25,30000,2,M,Teacher,5,true,189,78.3 9 | 2,46,85000,0,M,Manager,7,false,176,45.2 10 | 3,36,45000,1,F,Teacher,4,true,165,98.4 11 | 4,23,10000,5,M,Mushroom Collector,0,true,179,57.4 12 | 5,22,30000,2,M,Butcher,5,true,189,87.9 13 | 6,28,82000,0,F,Scientist,10,true,179,98.3 14 | 7,46,85000,0,M,Scientist,7,false,176,67.8 15 | 8,36,45000,1,F,Teacher,4,true,165,76.8 16 | 9,23,N/A,2,M,Unemployed,0,true,179,56.7 17 | 10,25,32000,4,F,Teacher,5,true,189,98.7 18 | 11,49,34700,0,M,Plumber,7,true,176,120.3 19 | 12,36,45000,1,F,Paramedic,4,true,165,67.9 20 | 13,23,42900,2,M,Researcher,0,true,179,92.3 21 | '''; 22 | final dataFrame1 = DataFrame.fromRawCsv(rawCSV); 23 | final rawCSV2 = ''' 24 | id,age,salary 25 | 1,25,30000 26 | 2,46,85000 27 | 3,46,85000 28 | '''; 29 | final dataFrame2 = DataFrame.fromRawCsv(rawCSV2); 30 | 31 | final dataFrames = [dataFrame1, dataFrame2]; 32 | 33 | final expected = {}; 34 | 35 | expected['1:10x7'] = ''' 36 | DataFrame (13 x 10) 37 | id age salary children gender profession ... weight 38 | 1 25 30000 2 M Teacher ... 78.3 39 | 2 46 85000 0 M Manager ... 45.2 40 | 3 36 45000 1 F Teacher ... 98.4 41 | 4 23 10000 5 M Mushroom Collector ... 57.4 42 | 5 22 30000 2 M Butcher ... 87.9 43 | ... ... ... ... ... ... ... ... 44 | 9 23 N/A 2 M Unemployed ... 56.7 45 | 10 25 32000 4 F Teacher ... 98.7 46 | 11 49 34700 0 M Plumber ... 120.3 47 | 12 36 45000 1 F Paramedic ... 67.9 48 | 13 23 42900 2 M Researcher ... 92.3'''; 49 | 50 | expected['1:1000x1000'] = ''' 51 | DataFrame (13 x 10) 52 | id age salary children gender profession years_of_education married height weight 53 | 1 25 30000 2 M Teacher 5 true 189 78.3 54 | 2 46 85000 0 M Manager 7 false 176 45.2 55 | 3 36 45000 1 F Teacher 4 true 165 98.4 56 | 4 23 10000 5 M Mushroom Collector 0 true 179 57.4 57 | 5 22 30000 2 M Butcher 5 true 189 87.9 58 | 6 28 82000 0 F Scientist 10 true 179 98.3 59 | 7 46 85000 0 M Scientist 7 false 176 67.8 60 | 8 36 45000 1 F Teacher 4 true 165 76.8 61 | 9 23 N/A 2 M Unemployed 0 true 179 56.7 62 | 10 25 32000 4 F Teacher 5 true 189 98.7 63 | 11 49 34700 0 M Plumber 7 true 176 120.3 64 | 12 36 45000 1 F Paramedic 4 true 165 67.9 65 | 13 23 42900 2 M Researcher 0 true 179 92.3'''; 66 | 67 | expected['1:0x10'] = ''' 68 | DataFrame (13 x 10)'''; 69 | 70 | expected['1:3x0'] = ''' 71 | DataFrame (13 x 10)'''; 72 | 73 | expected['1:1x1000'] = ''' 74 | DataFrame (13 x 10) 75 | id age salary children gender profession years_of_education married height weight 76 | 1 25 30000 2 M Teacher 5 true 189 78.3'''; 77 | 78 | expected['1:1000x1'] = ''' 79 | DataFrame (13 x 10) 80 | id 81 | 1 82 | 2 83 | 3 84 | 4 85 | 5 86 | 6 87 | 7 88 | 8 89 | 9 90 | 10 91 | 11 92 | 12 93 | 13'''; 94 | 95 | expected['1:3x2'] = ''' 96 | DataFrame (13 x 10) 97 | id ... weight 98 | 1 ... 78.3 99 | ... ... ... 100 | 12 ... 67.9 101 | 13 ... 92.3'''; 102 | 103 | expected['1:3x3'] = ''' 104 | DataFrame (13 x 10) 105 | id age ... weight 106 | 1 25 ... 78.3 107 | ... ... ... ... 108 | 12 36 ... 67.9 109 | 13 23 ... 92.3'''; 110 | 111 | expected['1:1x1'] = ''' 112 | DataFrame (13 x 10) 113 | id 114 | 1'''; 115 | 116 | expected['2:10x7'] = ''' 117 | DataFrame (3 x 3) 118 | id age salary 119 | 1 25 30000 120 | 2 46 85000 121 | 3 46 85000'''; 122 | 123 | expected['2:1000x1000'] = ''' 124 | DataFrame (3 x 3) 125 | id age salary 126 | 1 25 30000 127 | 2 46 85000 128 | 3 46 85000'''; 129 | 130 | expected['2:0x10'] = ''' 131 | DataFrame (3 x 3)'''; 132 | 133 | expected['2:3x0'] = ''' 134 | DataFrame (3 x 3)'''; 135 | 136 | expected['2:1x1000'] = ''' 137 | DataFrame (3 x 3) 138 | id age salary 139 | 1 25 30000'''; 140 | 141 | expected['2:1000x1'] = ''' 142 | DataFrame (3 x 3) 143 | id 144 | 1 145 | 2 146 | 3'''; 147 | 148 | expected['2:3x2'] = ''' 149 | DataFrame (3 x 3) 150 | id ... salary 151 | 1 ... 30000 152 | 2 ... 85000 153 | 3 ... 85000'''; 154 | 155 | expected['2:3x3'] = ''' 156 | DataFrame (3 x 3) 157 | id age salary 158 | 1 25 30000 159 | 2 46 85000 160 | 3 46 85000'''; 161 | 162 | expected['2:1x1'] = ''' 163 | DataFrame (3 x 3) 164 | id 165 | 1'''; 166 | 167 | for (var entry in expected.entries) { 168 | final dfNumberAndSpecs = entry.key.split(':'); 169 | final dfIndex = int.parse(dfNumberAndSpecs[0]) - 1; 170 | final specs = dfNumberAndSpecs[1].split('x'); 171 | final maxRows = int.parse(specs[0]); 172 | final maxCols = int.parse(specs[1]); 173 | test( 174 | 'should print dataFrame${dfNumberAndSpecs[0]} with maxRows=$maxRows, maxCols=$maxCols correctly', 175 | () { 176 | expect(dataFrames[dfIndex].toString(maxRows: maxRows, maxCols: maxCols), 177 | entry.value); 178 | }); 179 | } 180 | }); 181 | } 182 | -------------------------------------------------------------------------------- /test/data_frame/factories/data_frame.json: -------------------------------------------------------------------------------- 1 | {"H":["country","elo98","elo15","confederation","gdp06","popu06","gdp_source","popu_source"],"R":[["Afghanistan",993,1116,"AFC",1076.461378,25631282,"World Bank","World Bank"],["Albania",1382,1549,"UEFA",7181.094192,2992547,"World Bank","World Bank"],["Algeria",1486,1671,"CAF",11093.21748,34507214,"World Bank","World Bank"],["Andorra",1067,952,"UEFA",38800,81877,"CIA (2005)","World Bank"],["Angola",1498,1435,"CAF",4996.766535,17122409,"World Bank","World Bank"],["Anguilla",612,641,"CONCACAF",8800,13677,"CIA (2004)","CIA (2007)"],["Antigua and Barbuda",1149,1264,"CONCACAF",22039.72793,83467,"World Bank","World Bank"],["Argentina",1872,2041,"CONMEBOL",15717,38988923,"IMF","World Bank"],["Armenia",1394,1512,"UEFA",5524.415411,3002911,"World Bank","World Bank"],["Aruba",1015,925,"CONCACAF",21800,100830,"CIA (2004)","World Bank"],["Australia",1787,1718,"AFC",34325.01757,20697900,"World Bank","World Bank"],["Austria",1686,1694,"UEFA",37649.52898,8268641,"World Bank","World Bank"],["Azerbaijan",1290,1394,"UEFA",9829.633776,8484550,"World Bank","World Bank"],["Bahamas",858,899,"CONCACAF",23445.7251,335622,"World Bank","World Bank"],["Bahrain",1328,1467,"AFC",39789.79887,950951,"World Bank","World Bank"],["Bangladesh",1006,967,"AFC",1870.520036,144868702,"World Bank","World Bank"],["Barbados",1291,1102,"CONCACAF",14639.43364,274923,"World Bank","World Bank"],["Belarus",1351,1543,"UEFA",11021.49345,9604000,"World Bank","World Bank"],["Belgium",1784,1887,"UEFA",35169.03136,10547958,"World Bank","World Bank"],["Belize",985,1098,"CONCACAF",7672.4079,278985,"World Bank","World Bank"],["Benin",1207,1371,"CAF",1461.396834,8443671,"World Bank","World Bank"],["Bermuda",1280,1230,"CONCACAF",53946.52141,64523,"World Bank","World Bank"],["Bhutan",640,611,"AFC",4374.569289,665568,"World Bank","World Bank"],["Bolivia",1664,1604,"CONMEBOL",4327.855389,9517395,"World Bank","World Bank"],["Bosnia and Herzegovina",1556,1694,"UEFA",7086.911934,3875157,"World Bank","World Bank"],["Botswana",1027,1286,"CAF",11259.67881,1895944,"World Bank","World Bank"],["Brazil",2065,2036,"CONMEBOL",11181.59224,188134315,"World Bank","World Bank"],["British Virgin Islands",848,652,"CONCACAF",38500,23522,"CIA (2004)","CIA (2007)"],["Brunei",775,635,"AFC",70008.50107,374697,"World Bank","World Bank"],["Bulgaria",1768,1589,"UEFA",11229.43461,7699020,"World Bank","World Bank"],["Burkina Faso",1398,1467,"CAF",1203.680784,13822257,"World Bank","World Bank"],["Burundi",1440,1312,"CAF",646.4171774,8042579,"World Bank","World Bank"],["Cambodia",1012,756,"AFC",1959.803481,13555054,"World Bank","World Bank"],["Cameroon",1597,1609,"CAF",2337.817378,18611937,"World Bank","World Bank"],["Canada",1503,1499,"CONCACAF",37822.24824,32570505,"World Bank","World Bank"],["Cape Verde",1243,1536,"CAF",4531.103468,481940,"World Bank","World Bank"],["Cayman Islands",1154,954,"CONCACAF",43800,50026,"CIA (2004)","World Bank"],["Central African Republic",1189,1268,"CAF",683.8758493,4032102,"World Bank","World Bank"],["Chad",1229,1265,"CAF",1610.122791,10356822,"World Bank","World Bank"],["Chile",1757,1912,"CONMEBOL",15428.7598,16504530,"World Bank","World Bank"],["China",1631,1588,"AFC",5724.916728,1311020000,"World Bank","World Bank"],["Colombia",1741,2004,"CONMEBOL",8955.705347,43841370,"World Bank","World Bank"],["Comoros",891,907,"CAF",1327.671705,616526,"World Bank","World Bank"],["Congo",1484,1440,"CAF",4714.789654,3646653,"World Bank","World Bank"],["Cook Islands",875,819,"OFC",9100,21750,"CIA (2005)","CIA (2007)"],["Costa Rica",1594,1827,"CONCACAF",10436.2941,4392493,"World Bank","World Bank"],["Croatia",1829,1814,"UEFA",17019.72059,4440000,"World Bank","World Bank"],["Cuba",1414,1391,"CONCACAF",14445.29081,11301100,"World Bank","World Bank"],["Curacao",1197,1065,"CONCACAF",15000,141239,"CIA (2004)","World Bank"],["Cyprus",1437,1375,"UEFA",26337.08206,1048314,"World Bank","World Bank"],["Czechia",1853,1752,"UEFA",24415.8833,10238905,"World Bank","World Bank"],["Democratic Republic of Congo",1503,1486,"CAF",565.4314219,55590838,"World Bank","World Bank"],["Denmark",1828,1738,"UEFA",37189.65988,5437272,"World Bank","World Bank"],["Djibouti",891,788,"CAF",2124.121978,787544,"World Bank","World Bank"],["Dominica",1208,958,"CONCACAF",8417.551969,70690,"World Bank","World Bank"],["Dominican Republic",1130,1227,"CONCACAF",8789.926672,9479269,"World Bank","World Bank"],["East Timor","",738,"AFC",1107.117658,999053,"World Bank","World Bank"],["Eastern Samoa",674,534,"OFC",5800,58652,"CIA (2005)","World Bank"],["Ecuador",1735,1813,"CONMEBOL",8077.650114,14023503,"World Bank","World Bank"],["Egypt",1704,1620,"CAF",8254.752851,72990754,"World Bank","World Bank"],["El Salvador",1471,1479,"CONCACAF",6559.957034,6096692,"World Bank","World Bank"],["England",1938,1903,"UEFA",39931.21369,51075643.5,"World Bank's estimate for UK, adjusted per http://en.wikipedia.org/w/index.php?title=Countries_of_the_United_Kingdom_by_GVA_per_capita&oldid=153382497","http://en.wikipedia.org/wiki/Demography_of_England#Population"],["Equatorial Guinea",1097,1393,"CAF",30740.7216,621517,"World Bank","World Bank"],["Eritrea",1205,1097,"CAF",1166.905842,5035036,"World Bank","World Bank"],["Estonia",1200,1434,"UEFA",19305.69858,1346810,"World Bank","World Bank"],["Ethiopia",1288,1356,"CAF",730.0478897,78290649,"World Bank","World Bank"],["Faroe Islands",1151,1193,"UEFA",30500,47511,"CIA (2008)","CIA (2007)"],["Fiji",1394,1330,"OFC",6655.634722,828060,"World Bank","World Bank"],["Finland",1503,1584,"UEFA",34523.70751,5266268,"World Bank","World Bank"],["France",1963,1931,"UEFA",32288.80403,63617975,"World Bank","World Bank"],["Gabon",1442,1481,"CAF",15306.27739,1412907,"World Bank","World Bank"],["Gambia",1315,1337,"CAF",1396.003921,1482324,"World Bank","World Bank"],["Georgia",1621,1443,"UEFA",4694.944531,4398000,"World Bank","World Bank"],["Germany",2023,2104,"UEFA",34667.64864,82376451,"World Bank","World Bank"],["Ghana",1472,1640,"CAF",2398.177001,21947779,"World Bank","World Bank"],["Greece",1681,1654,"UEFA",28012.85245,11127947,"World Bank","World Bank"],["Grenada",1064,1131,"CONCACAF",10447.57933,103260,"World Bank","World Bank"],["Guam",631,837,"AFC",15000,158429,"CIA (2005)","World Bank"],["Guatemala",1540,1509,"CONCACAF",6109.652627,12995374,"World Bank","World Bank"],["Guinea",1491,1558,"CAF",1112.41633,9798963,"World Bank","World Bank"],["Guinea-Bissau",1204,1230,"CAF",1193.060526,1452659,"World Bank","World Bank"],["Guyana",1073,1221,"CONCACAF",4438.833776,765367,"World Bank","World Bank"],["Haiti",1389,1442,"CONCACAF",1446.09824,9388642,"World Bank","World Bank"],["Honduras",1487,1501,"CONCACAF",3788.342396,7037428,"World Bank","World Bank"],["Hong Kong",1219,1157,"AFC",39949.67056,6857100,"World Bank","World Bank"],["Hungary",1603,1622,"UEFA",18583.62346,10071370,"World Bank","World Bank"],["Iceland",1442,1648,"UEFA",36880.29818,303782,"World Bank","World Bank"],["India",1156,1093,"AFC",3224.70242,1143289350,"World Bank","World Bank"],["Indonesia",1329,1208,"AFC",6213.507267,227709821,"World Bank","World Bank"],["Iran",1627,1723,"AFC",12667.40675,70976584,"World Bank","World Bank"],["Iraq",1625,1519,"AFC",10343.51392,28064095,"World Bank","World Bank"],["Ireland",1661,1703,"UEFA",43714.38383,4273591,"World Bank","World Bank"],["Israel",1640,1600,"UEFA",25379.73644,7053700,"World Bank","World Bank"],["Italy",1929,1853,"UEFA",31988.08666,58143979,"World Bank","World Bank"],["Ivory Coast",1610,1737,"CAF",2594.339964,17662417,"World Bank","World Bank"],["Jamaica",1626,1525,"CONCACAF",8217.370381,2653042,"World Bank","World Bank"],["Japan",1710,1755,"AFC",31790.65016,127854000,"World Bank","World Bank"],["Jordan",1411,1460,"AFC",9009.814903,5536000,"World Bank","World Bank"],["Kazakhstan",1455,1324,"UEFA",15700.48413,15308084,"World Bank","World Bank"],["Kenya",1447,1382,"CAF",2128.048097,36757498,"World Bank","World Bank"],["Kuwait",1599,1454,"AFC",86397.36265,2417445,"World Bank","World Bank"],["Kyrgyzstan",1230,1094,"AFC",2218.608574,5218400,"World Bank","World Bank"],["Laos",1021,843,"AFC",2868.883234,5895930,"World Bank","World Bank"],["Latvia",1344,1448,"UEFA",15469.29417,2218357,"World Bank","World Bank"],["Lebanon",1363,1411,"AFC",11169.14057,4079823,"World Bank","World Bank"],["Lesotho",1173,1216,"CAF",1727.392427,1940413,"World Bank","World Bank"],["Liberia",1355,1294,"CAF",457.8227679,3384791,"World Bank","World Bank"],["Libya",1446,1478,"CAF",25008.83906,5686475,"World Bank","World Bank"],["Liechtenstein",1047,1188,"UEFA",89700,35028,"CIA (2007)","World Bank"],["Lithuania",1505,1437,"UEFA",16756.73765,3269909,"World Bank","World Bank"],["Luxembourg",1166,1195,"UEFA",77104.75771,472637,"World Bank","World Bank"],["Macao",1038,640,"AFC",65853.64122,479808,"World Bank","World Bank"],["Macedonia",1505,1441,"UEFA",8562.084377,2093801,"World Bank","World Bank"],["Madagascar",1466,1284,"CAF",1304.374638,18826126,"World Bank","World Bank"],["Malawi",1345,1399,"CAF",554.0030573,13307535,"World Bank","World Bank"],["Malaysia",1262,1220,"AFC",17053.02174,26327098,"World Bank","World Bank"],["Maldives",854,1066,"AFC",8238.051995,302825,"World Bank","World Bank"],["Mali",1450,1544,"CAF",1431.40943,12325545,"World Bank","World Bank"],["Malta",1188,1198,"UEFA",22291.33607,405308,"World Bank","World Bank"],["Mauritania",1197,1217,"CAF",2538.231119,3237713,"World Bank","World Bank"],["Mauritius",1162,1039,"CAF",12107.04624,1233996,"World Bank","World Bank"],["Mexico",1826,1878,"CONCACAF",13079.64782,112116694,"World Bank","World Bank"],["Moldova",1323,1374,"UEFA",3189.989932,3585209,"World Bank","World Bank"],["Mongolia",795,689,"AFC",4995.374005,2559496,"World Bank","World Bank"],["Montenegro","",1569,"UEFA",10497.85125,616854,"World Bank","World Bank"],["Montserrat",760,714,"CONCACAF",8500,9538,"CIA","CIA (2007)"],["Morocco",1723,1536,"CAF",5254.047452,30395097,"World Bank","World Bank"],["Mozambique",1328,1370,"CAF",734.2761797,21587317,"World Bank","World Bank"],["Myanmar",1139,1065,"AFC",2575,50500070,"IMF","World Bank"],["Namibia",1348,1384,"CAF",7061.349801,2052931,"World Bank","World Bank"],["Nepal",803,899,"AFC",1598.1113,25634043,"World Bank","World Bank"],["Netherlands",1926,2000,"UEFA",40434.10001,16346101,"World Bank","World Bank"],["New Caledonia",1243,1396,"OFC",27300,235314,"CIA (2005)","World Bank"],["New Zealand",1442,1524,"OFC",27362.47889,4184600,"World Bank","World Bank"],["Nicaragua",976,1115,"CONCACAF",3595.663428,5524927,"World Bank","World Bank"],["Niger",1338,1269,"CAF",737.4623368,13679705,"World Bank","World Bank"],["Nigeria",1681,1649,"CAF",3994.643445,143314909,"World Bank","World Bank"],["North Korea",1553,1448,"AFC",1900,23969917,"CIA (2007)","World Bank"],["Northern Ireland",1568,1494,"UEFA",28212.02108,1748065,"World Bank's estimate for UK, adjusted per http://en.wikipedia.org/w/index.php?title=Countries_of_the_United_Kingdom_by_GVA_per_capita&oldid=153382497","http://en.wikipedia.org/wiki/Demographics_of_Northern_Ireland#Population"],["Norway",1878,1589,"UEFA",53871.16364,4660677,"World Bank","World Bank"],["Oman",1298,1509,"AFC",39531.6006,2554905,"World Bank","World Bank"],["Pakistan",928,963,"AFC",3740.607865,160905794,"World Bank","World Bank"],["Palestine",1281,1269,"AFC",3847.806426,3406334,"World Bank","World Bank"],["Panama",1283,1709,"CONCACAF",10907.81503,3428509,"World Bank","World Bank"],["Papua New Guinea",1128,1115,"OFC",1669.598639,6245797,"World Bank","World Bank"],["Paraguay",1708,1668,"CONMEBOL",5671.463275,6014781,"World Bank","World Bank"],["Peru",1692,1701,"CONMEBOL",7318.377448,28030688,"World Bank","World Bank"],["Philippines",751,1230,"AFC",4557.883234,87366573,"World Bank","World Bank"],["Poland",1625,1713,"UEFA",15153.01523,38141267,"World Bank","World Bank"],["Portugal",1846,1867,"UEFA",23887.03544,10522288,"World Bank","World Bank"],["Puerto Rico",1011,1055,"CONCACAF",34255.06744,3805214,"World Bank","World Bank"],["Qatar",1536,1516,"AFC",118198.7316,967602,"World Bank","World Bank"],["Romania",1826,1753,"UEFA",11349.54831,21193760,"World Bank","World Bank"],["Russia",1814,1741,"UEFA",14975.04355,142487260,"World Bank","World Bank"],["Rwanda",1213,1374,"CAF",952.2694664,9660946,"World Bank","World Bank"],["Saint Kitts and Nevis",1245,1222,"CONCACAF",20186.97334,49823,"World Bank","World Bank"],["Saint Lucia",1230,1061,"CONCACAF",9952.474072,167658,"World Bank","World Bank"],["Saint Vincent and the Grenadines",1232,1190,"CONCACAF",9203.39887,108908,"World Bank","World Bank"],["San Marino",929,865,"UEFA",77304,30130,"IMF","World Bank"],["Sao Tome e Principe",978,1050,"CAF",2336.18468,158806,"World Bank","World Bank"],["Saudi Arabia",1638,1485,"AFC",36214.54693,25371936,"World Bank","World Bank"],["Scotland",1745,1701,"UEFA",34210.36161,5117000,"World Bank's estimate for UK, adjusted per http://en.wikipedia.org/w/index.php?title=Countries_of_the_United_Kingdom_by_GVA_per_capita&oldid=153382497","http://en.wikipedia.org/wiki/Demographics_of_Scotland"],["Senegal",1452,1658,"CAF",1926.556621,11582925,"World Bank","World Bank"],["Serbia",1934,1708,"UEFA",9895.795212,7411569,"World Bank","World Bank"],["Seychelles",971,982,"CAF",17465.91509,84600,"World Bank","World Bank"],["Sierra Leone",1433,1336,"CAF",1088.42943,5280909,"World Bank","World Bank"],["Singapore",1253,1165,"AFC",59754.6699,4401400,"World Bank","World Bank"],["Slovakia",1672,1736,"UEFA",18838.76907,5373054,"World Bank","World Bank"],["Slovenia",1465,1619,"UEFA",25885.31062,2006868,"World Bank","World Bank"],["Solomon Islands",1380,1217,"OFC",1520.601904,480745,"World Bank","World Bank"],["Somalia",1081,862,"CAF",600,8687671,"CIA (2007)","World Bank"],["South Africa",1638,1559,"CAF",10642.66535,47991699,"World Bank","World Bank"],["South Korea",1739,1722,"AFC",25863.21523,48371946,"World Bank","World Bank"],["South Sudan","",1194,"CAF",2782.071345,8376893,"South Sudan not independent in 2006; used Sudan's figure instead","World Bank"],["Spain",2000,1932,"UEFA",30879.8564,44397319,"World Bank","World Bank"],["Sri Lanka",1003,786,"AFC",5710.937991,19858000,"World Bank","World Bank"],["Sudan",1400,1333,"CAF",2782.071345,32397535,"World Bank","World Bank"],["Surinam",1266,1211,"CONCACAF",11519.66755,505186,"World Bank","World Bank"],["Swaziland",1175,1195,"CAF",5834.12336,1118253,"World Bank","World Bank"],["Sweden",1809,1774,"UEFA",37594.46396,9080505,"World Bank","World Bank"],["Switzerland",1634,1768,"UEFA",43377.7759,7483934,"World Bank","World Bank"],["Syria",1447,1490,"AFC",5389,18804914,"IMF","World Bank"],["Tahiti",1453,1260,"OFC",18000,178113,"CIA (2004, French Polynesia)","2007: http://en.wikipedia.org/wiki/Tahiti"],["Taiwan",1051,809,"AFC",31333,22800000,"IMF","IMF"],["Tajikistan",1515,1264,"AFC",1651.808504,6954522,"World Bank","World Bank"],["Tanzania",1318,1247,"CAF",1732.682948,39942347,"World Bank","World Bank"],["Thailand",1458,1356,"AFC",10461.54036,65883961,"World Bank","World Bank"],["Togo",1366,1432,"CAF",1126.653498,5685845,"World Bank","World Bank"],["Tonga",931,836,"OFC",4538.663256,101617,"World Bank","World Bank"],["Trinidad and Tobago",1430,1510,"CONCACAF",26420.72169,1303478,"World Bank","World Bank"],["Tunisia",1631,1568,"CAF",8359.634501,10127900,"World Bank","World Bank"],["Turkey",1623,1676,"UEFA",13049.32491,68626337,"World Bank","World Bank"],["Turkmenistan",1349,1264,"AFC",6550.023237,4801595,"World Bank","World Bank"],["Turks and Caicos","",719,"CONCACAF",29100,27642,"CIA (2007)","World Bank"],["Uganda",1407,1499,"CAF",1196.538493,29711397,"World Bank","World Bank"],["Ukraine",1655,1783,"UEFA",7213.516757,46787750,"World Bank","World Bank"],["United Arab Emirates",1558,1654,"AFC",87766.14726,4875639,"World Bank","World Bank"],["United States",1730,1804,"CONCACAF",46437.10733,298379912,"World Bank","World Bank"],["Uruguay",1732,1894,"CONMEBOL",12097.36142,3330217,"World Bank","World Bank"],["US Virgin Islands",809,740,"CONCACAF",14500,107700,"CIA (2004)","World Bank"],["Uzbekistan",1500,1612,"AFC",2958.390926,26488250,"World Bank","World Bank"],["Vanuatu",1079,1223,"OFC",2533.892793,214654,"World Bank","World Bank"],["Venezuela",1312,1667,"CONMEBOL",14846.73691,27190882,"World Bank","World Bank"],["Vietnam",1193,1235,"AFC",3383.513008,83311200,"World Bank","World Bank"],["Wales",1529,1645,"UEFA",27485.96108,2983270.5,"World Bank's estimate for UK, adjusted per http://en.wikipedia.org/w/index.php?title=Countries_of_the_United_Kingdom_by_GVA_per_capita&oldid=153382497","http://en.wikipedia.org/wiki/Demographics_of_Wales#Population"],["Western Samoa",852,822,"OFC",4972.141147,181073,"World Bank","World Bank"],["Yemen",1179,1198,"AFC",3964.068275,20661714,"World Bank","World Bank"],["Zambia",1627,1517,"CAF",2525.384325,11781612,"World Bank","World Bank"],["Zimbabwe",1512,1334,"CAF",1495.007942,12724308,"World Bank","World Bank"]],"N":{"ST":false}} -------------------------------------------------------------------------------- /test/data_frame/factories/elo_blatter.csv: -------------------------------------------------------------------------------- 1 | country,elo98,elo15,confederation,gdp06,popu06,gdp_source,popu_source 2 | Afghanistan,993,1116,AFC,1076.461378,25631282,World Bank,World Bank 3 | Albania,1382,1549,UEFA,7181.094192,2992547,World Bank,World Bank 4 | Algeria,1486,1671,CAF,11093.21748,34507214,World Bank,World Bank 5 | Andorra,1067,952,UEFA,38800,81877,CIA (2005),World Bank 6 | Angola,1498,1435,CAF,4996.766535,17122409,World Bank,World Bank 7 | Anguilla,612,641,CONCACAF,8800,13677,CIA (2004),CIA (2007) 8 | Antigua and Barbuda,1149,1264,CONCACAF,22039.72793,83467,World Bank,World Bank 9 | Argentina,1872,2041,CONMEBOL,15717,38988923,IMF,World Bank 10 | Armenia,1394,1512,UEFA,5524.415411,3002911,World Bank,World Bank 11 | Aruba,1015,925,CONCACAF,21800,100830,CIA (2004),World Bank 12 | Australia,1787,1718,AFC,34325.01757,20697900,World Bank,World Bank 13 | Austria,1686,1694,UEFA,37649.52898,8268641,World Bank,World Bank 14 | Azerbaijan,1290,1394,UEFA,9829.633776,8484550,World Bank,World Bank 15 | Bahamas,858,899,CONCACAF,23445.7251,335622,World Bank,World Bank 16 | Bahrain,1328,1467,AFC,39789.79887,950951,World Bank,World Bank 17 | Bangladesh,1006,967,AFC,1870.520036,144868702,World Bank,World Bank 18 | Barbados,1291,1102,CONCACAF,14639.43364,274923,World Bank,World Bank 19 | Belarus,1351,1543,UEFA,11021.49345,9604000,World Bank,World Bank 20 | Belgium,1784,1887,UEFA,35169.03136,10547958,World Bank,World Bank 21 | Belize,985,1098,CONCACAF,7672.4079,278985,World Bank,World Bank 22 | Benin,1207,1371,CAF,1461.396834,8443671,World Bank,World Bank 23 | Bermuda,1280,1230,CONCACAF,53946.52141,64523,World Bank,World Bank 24 | Bhutan,640,611,AFC,4374.569289,665568,World Bank,World Bank 25 | Bolivia,1664,1604,CONMEBOL,4327.855389,9517395,World Bank,World Bank 26 | Bosnia and Herzegovina,1556,1694,UEFA,7086.911934,3875157,World Bank,World Bank 27 | Botswana,1027,1286,CAF,11259.67881,1895944,World Bank,World Bank 28 | Brazil,2065,2036,CONMEBOL,11181.59224,188134315,World Bank,World Bank 29 | British Virgin Islands,848,652,CONCACAF,38500,23522,CIA (2004),CIA (2007) 30 | Brunei,775,635,AFC,70008.50107,374697,World Bank,World Bank 31 | Bulgaria,1768,1589,UEFA,11229.43461,7699020,World Bank,World Bank 32 | Burkina Faso,1398,1467,CAF,1203.680784,13822257,World Bank,World Bank 33 | Burundi,1440,1312,CAF,646.4171774,8042579,World Bank,World Bank 34 | Cambodia,1012,756,AFC,1959.803481,13555054,World Bank,World Bank 35 | Cameroon,1597,1609,CAF,2337.817378,18611937,World Bank,World Bank 36 | Canada,1503,1499,CONCACAF,37822.24824,32570505,World Bank,World Bank 37 | Cape Verde,1243,1536,CAF,4531.103468,481940,World Bank,World Bank 38 | Cayman Islands,1154,954,CONCACAF,43800,50026,CIA (2004),World Bank 39 | Central African Republic,1189,1268,CAF,683.8758493,4032102,World Bank,World Bank 40 | Chad,1229,1265,CAF,1610.122791,10356822,World Bank,World Bank 41 | Chile,1757,1912,CONMEBOL,15428.7598,16504530,World Bank,World Bank 42 | China,1631,1588,AFC,5724.916728,1311020000,World Bank,World Bank 43 | Colombia,1741,2004,CONMEBOL,8955.705347,43841370,World Bank,World Bank 44 | Comoros,891,907,CAF,1327.671705,616526,World Bank,World Bank 45 | Congo,1484,1440,CAF,4714.789654,3646653,World Bank,World Bank 46 | Cook Islands,875,819,OFC,9100,21750,CIA (2005),CIA (2007) 47 | Costa Rica,1594,1827,CONCACAF,10436.2941,4392493,World Bank,World Bank 48 | Croatia,1829,1814,UEFA,17019.72059,4440000,World Bank,World Bank 49 | Cuba,1414,1391,CONCACAF,14445.29081,11301100,World Bank,World Bank 50 | Curacao,1197,1065,CONCACAF,15000,141239,CIA (2004),World Bank 51 | Cyprus,1437,1375,UEFA,26337.08206,1048314,World Bank,World Bank 52 | Czechia,1853,1752,UEFA,24415.8833,10238905,World Bank,World Bank 53 | Democratic Republic of Congo,1503,1486,CAF,565.4314219,55590838,World Bank,World Bank 54 | Denmark,1828,1738,UEFA,37189.65988,5437272,World Bank,World Bank 55 | Djibouti,891,788,CAF,2124.121978,787544,World Bank,World Bank 56 | Dominica,1208,958,CONCACAF,8417.551969,70690,World Bank,World Bank 57 | Dominican Republic,1130,1227,CONCACAF,8789.926672,9479269,World Bank,World Bank 58 | East Timor,,738,AFC,1107.117658,999053,World Bank,World Bank 59 | Eastern Samoa,674,534,OFC,5800,58652,CIA (2005),World Bank 60 | Ecuador,1735,1813,CONMEBOL,8077.650114,14023503,World Bank,World Bank 61 | Egypt,1704,1620,CAF,8254.752851,72990754,World Bank,World Bank 62 | El Salvador,1471,1479,CONCACAF,6559.957034,6096692,World Bank,World Bank 63 | England,1938,1903,UEFA,39931.21369,51075643.5,"World Bank's estimate for UK, adjusted per http://en.wikipedia.org/w/index.php?title=Countries_of_the_United_Kingdom_by_GVA_per_capita&oldid=153382497",http://en.wikipedia.org/wiki/Demography_of_England#Population 64 | Equatorial Guinea,1097,1393,CAF,30740.7216,621517,World Bank,World Bank 65 | Eritrea,1205,1097,CAF,1166.905842,5035036,World Bank,World Bank 66 | Estonia,1200,1434,UEFA,19305.69858,1346810,World Bank,World Bank 67 | Ethiopia,1288,1356,CAF,730.0478897,78290649,World Bank,World Bank 68 | Faroe Islands,1151,1193,UEFA,30500,47511,CIA (2008),CIA (2007) 69 | Fiji,1394,1330,OFC,6655.634722,828060,World Bank,World Bank 70 | Finland,1503,1584,UEFA,34523.70751,5266268,World Bank,World Bank 71 | France,1963,1931,UEFA,32288.80403,63617975,World Bank,World Bank 72 | Gabon,1442,1481,CAF,15306.27739,1412907,World Bank,World Bank 73 | Gambia,1315,1337,CAF,1396.003921,1482324,World Bank,World Bank 74 | Georgia,1621,1443,UEFA,4694.944531,4398000,World Bank,World Bank 75 | Germany,2023,2104,UEFA,34667.64864,82376451,World Bank,World Bank 76 | Ghana,1472,1640,CAF,2398.177001,21947779,World Bank,World Bank 77 | Greece,1681,1654,UEFA,28012.85245,11127947,World Bank,World Bank 78 | Grenada,1064,1131,CONCACAF,10447.57933,103260,World Bank,World Bank 79 | Guam,631,837,AFC,15000,158429,CIA (2005),World Bank 80 | Guatemala,1540,1509,CONCACAF,6109.652627,12995374,World Bank,World Bank 81 | Guinea,1491,1558,CAF,1112.41633,9798963,World Bank,World Bank 82 | Guinea-Bissau,1204,1230,CAF,1193.060526,1452659,World Bank,World Bank 83 | Guyana,1073,1221,CONCACAF,4438.833776,765367,World Bank,World Bank 84 | Haiti,1389,1442,CONCACAF,1446.09824,9388642,World Bank,World Bank 85 | Honduras,1487,1501,CONCACAF,3788.342396,7037428,World Bank,World Bank 86 | Hong Kong,1219,1157,AFC,39949.67056,6857100,World Bank,World Bank 87 | Hungary,1603,1622,UEFA,18583.62346,10071370,World Bank,World Bank 88 | Iceland,1442,1648,UEFA,36880.29818,303782,World Bank,World Bank 89 | India,1156,1093,AFC,3224.70242,1143289350,World Bank,World Bank 90 | Indonesia,1329,1208,AFC,6213.507267,227709821,World Bank,World Bank 91 | Iran,1627,1723,AFC,12667.40675,70976584,World Bank,World Bank 92 | Iraq,1625,1519,AFC,10343.51392,28064095,World Bank,World Bank 93 | Ireland,1661,1703,UEFA,43714.38383,4273591,World Bank,World Bank 94 | Israel,1640,1600,UEFA,25379.73644,7053700,World Bank,World Bank 95 | Italy,1929,1853,UEFA,31988.08666,58143979,World Bank,World Bank 96 | Ivory Coast,1610,1737,CAF,2594.339964,17662417,World Bank,World Bank 97 | Jamaica,1626,1525,CONCACAF,8217.370381,2653042,World Bank,World Bank 98 | Japan,1710,1755,AFC,31790.65016,127854000,World Bank,World Bank 99 | Jordan,1411,1460,AFC,9009.814903,5536000,World Bank,World Bank 100 | Kazakhstan,1455,1324,UEFA,15700.48413,15308084,World Bank,World Bank 101 | Kenya,1447,1382,CAF,2128.048097,36757498,World Bank,World Bank 102 | Kuwait,1599,1454,AFC,86397.36265,2417445,World Bank,World Bank 103 | Kyrgyzstan,1230,1094,AFC,2218.608574,5218400,World Bank,World Bank 104 | Laos,1021,843,AFC,2868.883234,5895930,World Bank,World Bank 105 | Latvia,1344,1448,UEFA,15469.29417,2218357,World Bank,World Bank 106 | Lebanon,1363,1411,AFC,11169.14057,4079823,World Bank,World Bank 107 | Lesotho,1173,1216,CAF,1727.392427,1940413,World Bank,World Bank 108 | Liberia,1355,1294,CAF,457.8227679,3384791,World Bank,World Bank 109 | Libya,1446,1478,CAF,25008.83906,5686475,World Bank,World Bank 110 | Liechtenstein,1047,1188,UEFA,89700,35028,CIA (2007),World Bank 111 | Lithuania,1505,1437,UEFA,16756.73765,3269909,World Bank,World Bank 112 | Luxembourg,1166,1195,UEFA,77104.75771,472637,World Bank,World Bank 113 | Macao,1038,640,AFC,65853.64122,479808,World Bank,World Bank 114 | Macedonia,1505,1441,UEFA,8562.084377,2093801,World Bank,World Bank 115 | Madagascar,1466,1284,CAF,1304.374638,18826126,World Bank,World Bank 116 | Malawi,1345,1399,CAF,554.0030573,13307535,World Bank,World Bank 117 | Malaysia,1262,1220,AFC,17053.02174,26327098,World Bank,World Bank 118 | Maldives,854,1066,AFC,8238.051995,302825,World Bank,World Bank 119 | Mali,1450,1544,CAF,1431.40943,12325545,World Bank,World Bank 120 | Malta,1188,1198,UEFA,22291.33607,405308,World Bank,World Bank 121 | Mauritania,1197,1217,CAF,2538.231119,3237713,World Bank,World Bank 122 | Mauritius,1162,1039,CAF,12107.04624,1233996,World Bank,World Bank 123 | Mexico,1826,1878,CONCACAF,13079.64782,112116694,World Bank,World Bank 124 | Moldova,1323,1374,UEFA,3189.989932,3585209,World Bank,World Bank 125 | Mongolia,795,689,AFC,4995.374005,2559496,World Bank,World Bank 126 | Montenegro,,1569,UEFA,10497.85125,616854,World Bank,World Bank 127 | Montserrat,760,714,CONCACAF,8500,9538,CIA,CIA (2007) 128 | Morocco,1723,1536,CAF,5254.047452,30395097,World Bank,World Bank 129 | Mozambique,1328,1370,CAF,734.2761797,21587317,World Bank,World Bank 130 | Myanmar,1139,1065,AFC,2575,50500070,IMF,World Bank 131 | Namibia,1348,1384,CAF,7061.349801,2052931,World Bank,World Bank 132 | Nepal,803,899,AFC,1598.1113,25634043,World Bank,World Bank 133 | Netherlands,1926,2000,UEFA,40434.10001,16346101,World Bank,World Bank 134 | New Caledonia,1243,1396,OFC,27300,235314,CIA (2005),World Bank 135 | New Zealand,1442,1524,OFC,27362.47889,4184600,World Bank,World Bank 136 | Nicaragua,976,1115,CONCACAF,3595.663428,5524927,World Bank,World Bank 137 | Niger,1338,1269,CAF,737.4623368,13679705,World Bank,World Bank 138 | Nigeria,1681,1649,CAF,3994.643445,143314909,World Bank,World Bank 139 | North Korea,1553,1448,AFC,1900,23969917,CIA (2007),World Bank 140 | Northern Ireland,1568,1494,UEFA,28212.02108,1748065,"World Bank's estimate for UK, adjusted per http://en.wikipedia.org/w/index.php?title=Countries_of_the_United_Kingdom_by_GVA_per_capita&oldid=153382497",http://en.wikipedia.org/wiki/Demographics_of_Northern_Ireland#Population 141 | Norway,1878,1589,UEFA,53871.16364,4660677,World Bank,World Bank 142 | Oman,1298,1509,AFC,39531.6006,2554905,World Bank,World Bank 143 | Pakistan,928,963,AFC,3740.607865,160905794,World Bank,World Bank 144 | Palestine,1281,1269,AFC,3847.806426,3406334,World Bank,World Bank 145 | Panama,1283,1709,CONCACAF,10907.81503,3428509,World Bank,World Bank 146 | Papua New Guinea,1128,1115,OFC,1669.598639,6245797,World Bank,World Bank 147 | Paraguay,1708,1668,CONMEBOL,5671.463275,6014781,World Bank,World Bank 148 | Peru,1692,1701,CONMEBOL,7318.377448,28030688,World Bank,World Bank 149 | Philippines,751,1230,AFC,4557.883234,87366573,World Bank,World Bank 150 | Poland,1625,1713,UEFA,15153.01523,38141267,World Bank,World Bank 151 | Portugal,1846,1867,UEFA,23887.03544,10522288,World Bank,World Bank 152 | Puerto Rico,1011,1055,CONCACAF,34255.06744,3805214,World Bank,World Bank 153 | Qatar,1536,1516,AFC,118198.7316,967602,World Bank,World Bank 154 | Romania,1826,1753,UEFA,11349.54831,21193760,World Bank,World Bank 155 | Russia,1814,1741,UEFA,14975.04355,142487260,World Bank,World Bank 156 | Rwanda,1213,1374,CAF,952.2694664,9660946,World Bank,World Bank 157 | Saint Kitts and Nevis,1245,1222,CONCACAF,20186.97334,49823,World Bank,World Bank 158 | Saint Lucia,1230,1061,CONCACAF,9952.474072,167658,World Bank,World Bank 159 | Saint Vincent and the Grenadines,1232,1190,CONCACAF,9203.39887,108908,World Bank,World Bank 160 | San Marino,929,865,UEFA,77304,30130,IMF,World Bank 161 | Sao Tome e Principe,978,1050,CAF,2336.18468,158806,World Bank,World Bank 162 | Saudi Arabia,1638,1485,AFC,36214.54693,25371936,World Bank,World Bank 163 | Scotland,1745,1701,UEFA,34210.36161,5117000,"World Bank's estimate for UK, adjusted per http://en.wikipedia.org/w/index.php?title=Countries_of_the_United_Kingdom_by_GVA_per_capita&oldid=153382497",http://en.wikipedia.org/wiki/Demographics_of_Scotland 164 | Senegal,1452,1658,CAF,1926.556621,11582925,World Bank,World Bank 165 | Serbia,1934,1708,UEFA,9895.795212,7411569,World Bank,World Bank 166 | Seychelles,971,982,CAF,17465.91509,84600,World Bank,World Bank 167 | Sierra Leone,1433,1336,CAF,1088.42943,5280909,World Bank,World Bank 168 | Singapore,1253,1165,AFC,59754.6699,4401400,World Bank,World Bank 169 | Slovakia,1672,1736,UEFA,18838.76907,5373054,World Bank,World Bank 170 | Slovenia,1465,1619,UEFA,25885.31062,2006868,World Bank,World Bank 171 | Solomon Islands,1380,1217,OFC,1520.601904,480745,World Bank,World Bank 172 | Somalia,1081,862,CAF,600,8687671,CIA (2007),World Bank 173 | South Africa,1638,1559,CAF,10642.66535,47991699,World Bank,World Bank 174 | South Korea,1739,1722,AFC,25863.21523,48371946,World Bank,World Bank 175 | South Sudan,,1194,CAF,2782.071345,8376893,South Sudan not independent in 2006; used Sudan's figure instead,World Bank 176 | Spain,2000,1932,UEFA,30879.8564,44397319,World Bank,World Bank 177 | Sri Lanka,1003,786,AFC,5710.937991,19858000,World Bank,World Bank 178 | Sudan,1400,1333,CAF,2782.071345,32397535,World Bank,World Bank 179 | Surinam,1266,1211,CONCACAF,11519.66755,505186,World Bank,World Bank 180 | Swaziland,1175,1195,CAF,5834.12336,1118253,World Bank,World Bank 181 | Sweden,1809,1774,UEFA,37594.46396,9080505,World Bank,World Bank 182 | Switzerland,1634,1768,UEFA,43377.7759,7483934,World Bank,World Bank 183 | Syria,1447,1490,AFC,5389,18804914,IMF,World Bank 184 | Tahiti,1453,1260,OFC,18000,178113,"CIA (2004, French Polynesia)",2007: http://en.wikipedia.org/wiki/Tahiti 185 | Taiwan,1051,809,AFC,31333,22800000,IMF,IMF 186 | Tajikistan,1515,1264,AFC,1651.808504,6954522,World Bank,World Bank 187 | Tanzania,1318,1247,CAF,1732.682948,39942347,World Bank,World Bank 188 | Thailand,1458,1356,AFC,10461.54036,65883961,World Bank,World Bank 189 | Togo,1366,1432,CAF,1126.653498,5685845,World Bank,World Bank 190 | Tonga,931,836,OFC,4538.663256,101617,World Bank,World Bank 191 | Trinidad and Tobago,1430,1510,CONCACAF,26420.72169,1303478,World Bank,World Bank 192 | Tunisia,1631,1568,CAF,8359.634501,10127900,World Bank,World Bank 193 | Turkey,1623,1676,UEFA,13049.32491,68626337,World Bank,World Bank 194 | Turkmenistan,1349,1264,AFC,6550.023237,4801595,World Bank,World Bank 195 | Turks and Caicos,,719,CONCACAF,29100,27642,CIA (2007),World Bank 196 | Uganda,1407,1499,CAF,1196.538493,29711397,World Bank,World Bank 197 | Ukraine,1655,1783,UEFA,7213.516757,46787750,World Bank,World Bank 198 | United Arab Emirates,1558,1654,AFC,87766.14726,4875639,World Bank,World Bank 199 | United States,1730,1804,CONCACAF,46437.10733,298379912,World Bank,World Bank 200 | Uruguay,1732,1894,CONMEBOL,12097.36142,3330217,World Bank,World Bank 201 | US Virgin Islands,809,740,CONCACAF,14500,107700,CIA (2004),World Bank 202 | Uzbekistan,1500,1612,AFC,2958.390926,26488250,World Bank,World Bank 203 | Vanuatu,1079,1223,OFC,2533.892793,214654,World Bank,World Bank 204 | Venezuela,1312,1667,CONMEBOL,14846.73691,27190882,World Bank,World Bank 205 | Vietnam,1193,1235,AFC,3383.513008,83311200,World Bank,World Bank 206 | Wales,1529,1645,UEFA,27485.96108,2983270.5,"World Bank's estimate for UK, adjusted per http://en.wikipedia.org/w/index.php?title=Countries_of_the_United_Kingdom_by_GVA_per_capita&oldid=153382497",http://en.wikipedia.org/wiki/Demographics_of_Wales#Population 207 | Western Samoa,852,822,OFC,4972.141147,181073,World Bank,World Bank 208 | Yemen,1179,1198,AFC,3964.068275,20661714,World Bank,World Bank 209 | Zambia,1627,1517,CAF,2525.384325,11781612,World Bank,World Bank 210 | Zimbabwe,1512,1334,CAF,1495.007942,12724308,World Bank,World Bank 211 | -------------------------------------------------------------------------------- /test/data_frame/factories/fake_data_headless.csv: -------------------------------------------------------------------------------- 1 | value_1_1,value_2_7,value_3_3,1 2 | value_1_2,value_2_2,value_3_2,10 3 | value_1_3,value_2_3,value_3_1,200 4 | value_1_2,value_2_4,value_3_3,300 5 | value_1_2,value_2_5,value_3_1,400 6 | value_1_1,value_2_6,value_3_2,500 7 | value_1_3,value_2_7,value_3_1,700 8 | -------------------------------------------------------------------------------- /test/data_frame/factories/from_csv_test.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/factories/from_csv.dart'; 2 | import 'package:test/test.dart'; 3 | 4 | void main() { 5 | group('fromCsv', () { 6 | test('should create a dataframe from csv file', () async { 7 | final data = await fromCsv('test/data_frame/factories/elo_blatter.csv'); 8 | 9 | expect(data.series, hasLength(8)); 10 | expect( 11 | data.header, 12 | equals([ 13 | 'country', 14 | 'elo98', 15 | 'elo15', 16 | 'confederation', 17 | 'gdp06', 18 | 'popu06', 19 | 'gdp_source', 20 | 'popu_source' 21 | ])); 22 | expect(data.series.map((series) => series.data.length), 23 | equals(List.filled(8, 209))); 24 | expect( 25 | data.rows.elementAt(141), 26 | equals([ 27 | 'Pakistan', 28 | 928, 29 | 963, 30 | 'AFC', 31 | 3740.607865, 32 | 160905794, 33 | 'World Bank', 34 | 'World Bank' 35 | ])); 36 | }); 37 | 38 | test('should consider `columns` parameter', () async { 39 | final data = await fromCsv('test/data_frame/factories/elo_blatter.csv', 40 | columns: [0, 2, 3]); 41 | 42 | expect(data.series, hasLength(3)); 43 | expect(data.header, equals(['country', 'elo15', 'confederation'])); 44 | expect(data.series.map((series) => series.data.length), 45 | equals(List.filled(3, 209))); 46 | expect(data.rows.elementAt(141), equals(['Pakistan', 963, 'AFC'])); 47 | }); 48 | 49 | test('should consider `columnNames` parameter', () async { 50 | final data = await fromCsv('test/data_frame/factories/elo_blatter.csv', 51 | columnNames: ['country', 'elo15', 'confederation']); 52 | 53 | expect(data.series, hasLength(3)); 54 | expect(data.header, equals(['country', 'elo15', 'confederation'])); 55 | expect(data.series.map((series) => series.data.length), 56 | equals(List.filled(3, 209))); 57 | expect(data.rows.elementAt(141), equals(['Pakistan', 963, 'AFC'])); 58 | }); 59 | }); 60 | } 61 | -------------------------------------------------------------------------------- /test/data_frame/factories/from_json_test.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/factories/from_json.dart'; 2 | import 'package:test/test.dart'; 3 | 4 | void main() { 5 | group('fromJson', () { 6 | final fileName = 'test/data_frame/factories/data_frame.json'; 7 | final header = [ 8 | 'country', 9 | 'elo98', 10 | 'elo15', 11 | 'confederation', 12 | 'gdp06', 13 | 'popu06', 14 | 'gdp_source', 15 | 'popu_source' 16 | ]; 17 | 18 | test('should return a valid data frame', () async { 19 | final dataFrame = await fromJson(fileName); 20 | 21 | expect(dataFrame.header, header); 22 | expect(dataFrame.rows.elementAt(107), [ 23 | 'Libya', 24 | 1446, 25 | 1478, 26 | 'CAF', 27 | 25008.83906, 28 | 5686475, 29 | 'World Bank', 30 | 'World Bank' 31 | ]); 32 | expect(dataFrame.series.map((series) => series.name), header); 33 | }); 34 | }); 35 | } 36 | -------------------------------------------------------------------------------- /test/data_frame/factories/prefilled_dataframes/get_housing_data_frame_test.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/ml_dataframe.dart'; 2 | import 'package:test/test.dart'; 3 | 4 | void main() { 5 | group('getHousingDataFrame', () { 6 | test('should create a dataframe', () { 7 | final data = getHousingDataFrame(); 8 | 9 | print(data); 10 | 11 | expect(data.header, [ 12 | 'CRIM', 13 | 'ZN', 14 | 'INDUS', 15 | 'CHAS', 16 | 'NOX', 17 | 'RM', 18 | 'AGE', 19 | 'DIS', 20 | 'RAD', 21 | 'TAX', 22 | 'PTRATIO', 23 | 'B', 24 | 'LSTAT', 25 | 'MEDV', 26 | ]); 27 | expect(data.shape, [506, 14]); 28 | expect(data.rows.elementAt(0), [ 29 | 0.00632, 30 | 18.00, 31 | 2.310, 32 | 0, 33 | 0.5380, 34 | 6.5750, 35 | 65.20, 36 | 4.0900, 37 | 1, 38 | 296.0, 39 | 15.30, 40 | 396.90, 41 | 4.98, 42 | 24.00 43 | ]); 44 | expect(data.rows.elementAt(505), [ 45 | 0.04741, 46 | 0.00, 47 | 11.930, 48 | 0, 49 | 0.5730, 50 | 6.0300, 51 | 80.80, 52 | 2.5050, 53 | 1, 54 | 273.0, 55 | 21.00, 56 | 396.90, 57 | 7.88, 58 | 11.90 59 | ]); 60 | }); 61 | }); 62 | } 63 | -------------------------------------------------------------------------------- /test/data_frame/factories/prefilled_dataframes/get_iris_data_frame_test.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/factories/prefilled_dataframes/get_iris_data_frame.dart'; 2 | import 'package:test/test.dart'; 3 | 4 | void main() { 5 | group('getIrisDataFrame', () { 6 | test('should create a dataframe', () { 7 | final data = getIrisDataFrame(); 8 | 9 | expect(data.header, [ 10 | 'Id', 11 | 'SepalLengthCm', 12 | 'SepalWidthCm', 13 | 'PetalLengthCm', 14 | 'PetalWidthCm', 15 | 'Species' 16 | ]); 17 | expect(data.shape, [150, 6]); 18 | expect(data.rows.elementAt(0), [1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa']); 19 | expect(data.rows.elementAt(113), 20 | [114, 5.7, 2.5, 5.0, 2.0, 'Iris-virginica']); 21 | expect(data.rows.elementAt(149), 22 | [150, 5.9, 3.0, 5.1, 1.8, 'Iris-virginica']); 23 | }); 24 | }); 25 | } 26 | -------------------------------------------------------------------------------- /test/data_frame/factories/prefilled_dataframes/get_pima_indians_diabetes_data_frame_test.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/factories/prefilled_dataframes/get_pima_indians_diabetes_data_frame.dart'; 2 | import 'package:test/test.dart'; 3 | 4 | void main() { 5 | group('getPimaIndiansDiabetesDataFrame', () { 6 | test('should create a dataframe', () { 7 | final data = getPimaIndiansDiabetesDataFrame(); 8 | 9 | expect(data.header, [ 10 | 'Pregnancies', 11 | 'Glucose', 12 | 'BloodPressure', 13 | 'SkinThickness', 14 | 'Insulin', 15 | 'BMI', 16 | 'DiabetesPedigreeFunction', 17 | 'Age', 18 | 'Outcome' 19 | ]); 20 | expect(data.shape, [768, 9]); 21 | expect(data.rows.elementAt(0), [6, 148, 72, 35, 0, 33.6, 0.627, 50, 1]); 22 | expect(data.rows.elementAt(372), [0, 84, 64, 22, 66, 35.8, 0.545, 21, 0]); 23 | expect(data.rows.elementAt(767), [1, 93, 70, 31, 0, 30.4, 0.315, 23, 0]); 24 | }); 25 | }); 26 | } 27 | -------------------------------------------------------------------------------- /test/data_frame/factories/prefilled_dataframes/get_wine_quality_dataframe_test.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/src/data_frame/factories/prefilled_dataframes/get_wine_quality_data_frame.dart'; 2 | import 'package:test/test.dart'; 3 | 4 | void main() { 5 | group('getWineQualityDataframe', () { 6 | test('should create a dataframe', () { 7 | final data = getWineQualityDataFrame(); 8 | 9 | expect(data.header, [ 10 | 'fixed acidity', 11 | 'volatile acidity', 12 | 'citric acid', 13 | 'residual sugar', 14 | 'chlorides', 15 | 'free sulfur dioxide', 16 | 'total sulfur dioxide', 17 | 'density', 18 | 'pH', 19 | 'sulphates', 20 | 'alcohol', 21 | 'quality', 22 | ]); 23 | expect(data.shape, [1599, 12]); 24 | expect(data.rows.elementAt(0), 25 | [7.4, 0.7, 0.0, 1.9, 0.076, 11.0, 34.0, 0.9978, 3.51, 0.56, 9.4, 5]); 26 | expect(data.rows.elementAt(322), [ 27 | 7.8, 28 | 0.62, 29 | 0.05, 30 | 2.3, 31 | 0.079, 32 | 6.0, 33 | 18.0, 34 | 0.99735, 35 | 3.29, 36 | 0.63, 37 | 9.3, 38 | 5 39 | ]); 40 | expect(data.rows.elementAt(1032), 41 | [8.1, 0.82, 0.0, 4.1, 0.095, 5.0, 14.0, 0.99854, 3.36, 0.53, 9.6, 5]); 42 | expect(data.rows.elementAt(1598), [ 43 | 6.0, 44 | 0.31, 45 | 0.47, 46 | 3.6, 47 | 0.067, 48 | 18.0, 49 | 42.0, 50 | 0.99549, 51 | 3.39, 52 | 0.66, 53 | 11.0, 54 | 6 55 | ]); 56 | }); 57 | }); 58 | } 59 | -------------------------------------------------------------------------------- /test/data_frame/series_test.dart: -------------------------------------------------------------------------------- 1 | import 'package:ml_dataframe/ml_dataframe.dart'; 2 | import 'package:ml_dataframe/src/data_frame/series_json_keys.dart'; 3 | import 'package:test/test.dart'; 4 | 5 | void main() { 6 | group('Series', () { 7 | final data = [1, 2, 3, '4']; 8 | final dataWithDuplicates = [1, 2, 3, '4', 1, 2, 10, '4']; 9 | final uniqueDataItems = [1, 2, 3, '4', 10]; 10 | final seriesName = 'series_name'; 11 | 12 | final json = { 13 | seriesNameJsonKey: seriesName, 14 | seriesDataJsonKey: data, 15 | isSeriesDiscreteJsonKey: false, 16 | }; 17 | 18 | final discreteDataframeJson = { 19 | seriesNameJsonKey: seriesName, 20 | seriesDataJsonKey: dataWithDuplicates, 21 | isSeriesDiscreteJsonKey: true, 22 | }; 23 | 24 | test('should initialize properly', () { 25 | final series = Series(seriesName, data); 26 | 27 | expect(series.name, seriesName); 28 | expect(series.data, equals(data)); 29 | expect(series.isDiscrete, isFalse); 30 | expect(series.discreteValues, isEmpty); 31 | }); 32 | 33 | test('should initialize as a series with discrete data item sequence', () { 34 | final series = Series(seriesName, dataWithDuplicates, isDiscrete: true); 35 | 36 | expect(series.name, seriesName); 37 | expect(series.data, equals(dataWithDuplicates)); 38 | expect(series.isDiscrete, isTrue); 39 | expect(series.discreteValues, equals([1, 2, 3, '4', 10])); 40 | }); 41 | 42 | test('should convert to json', () { 43 | final series = Series(seriesName, data); 44 | 45 | expect(series.toJson(), json); 46 | }); 47 | 48 | test('should restore from json', () { 49 | final series = Series.fromJson(json); 50 | 51 | expect(series.name, seriesName); 52 | expect(series.data, data); 53 | expect(series.isDiscrete, isFalse); 54 | expect(series.discreteValues, []); 55 | }); 56 | 57 | test('should restore discrete dataframe from json', () { 58 | final series = Series.fromJson(discreteDataframeJson); 59 | 60 | expect(series.name, seriesName); 61 | expect(series.data, dataWithDuplicates); 62 | expect(series.isDiscrete, isTrue); 63 | expect(series.discreteValues, uniqueDataItems); 64 | }); 65 | }); 66 | } 67 | -------------------------------------------------------------------------------- /test/numerical_converter/numerical_converter_impl_test.dart: -------------------------------------------------------------------------------- 1 | import 'package:test/test.dart'; 2 | 3 | void main() { 4 | group('', () {}); 5 | } 6 | --------------------------------------------------------------------------------