├── requirements.txt ├── __pycache__ ├── pre_process.cpython-311.pyc ├── pre_process.cpython-313.pyc ├── train_models.cpython-311.pyc └── train_models.cpython-313.pyc ├── Dockerfile ├── _classification ├── __pycache__ │ ├── utils.cpython-311.pyc │ ├── utils.cpython-313.pyc │ ├── parameter_finder.cpython-311.pyc │ └── parameter_finder.cpython-313.pyc ├── models │ ├── __pycache__ │ │ ├── knn.cpython-311.pyc │ │ ├── knn.cpython-313.pyc │ │ ├── mlp.cpython-311.pyc │ │ ├── mlp.cpython-313.pyc │ │ ├── svm.cpython-311.pyc │ │ ├── svm.cpython-313.pyc │ │ ├── models.cpython-311.pyc │ │ ├── models.cpython-313.pyc │ │ ├── naive_bayes.cpython-311.pyc │ │ ├── naive_bayes.cpython-313.pyc │ │ ├── perceptron.cpython-311.pyc │ │ ├── perceptron.cpython-313.pyc │ │ ├── decision_tree.cpython-311.pyc │ │ ├── decision_tree.cpython-313.pyc │ │ ├── random_forest.cpython-311.pyc │ │ ├── random_forest.cpython-313.pyc │ │ ├── NearestCentroid.cpython-311.pyc │ │ ├── NearestCentroid.cpython-313.pyc │ │ ├── logstic_regression.cpython-311.pyc │ │ └── logstic_regression.cpython-313.pyc │ ├── naive_bayes.py │ ├── svm.py │ ├── knn.py │ ├── NearestCentroid.py │ ├── logstic_regression.py │ ├── decision_tree.py │ ├── random_forest.py │ ├── perceptron.py │ ├── mlp.py │ └── models.py ├── pre_process │ ├── __pycache__ │ │ ├── standardize.cpython-311.pyc │ │ └── standardize.cpython-313.pyc │ ├── data │ │ ├── __pycache__ │ │ │ ├── parser.cpython-311.pyc │ │ │ ├── parser.cpython-312.pyc │ │ │ └── parser.cpython-313.pyc │ │ ├── spectrums.json │ │ └── parser.py │ ├── dimensionality_reduction │ │ ├── __pycache__ │ │ │ ├── lda.cpython-311.pyc │ │ │ ├── lda.cpython-313.pyc │ │ │ ├── pca.cpython-311.pyc │ │ │ └── pca.cpython-313.pyc │ │ ├── pca.py │ │ └── lda.py │ └── standardize.py ├── utils.py └── parameter_finder.py ├── classification.py ├── pre_process.py ├── train_models.py ├── README.md └── assets └── result.csv /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn 2 | pandas 3 | numpy 4 | -------------------------------------------------------------------------------- /__pycache__/pre_process.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/__pycache__/pre_process.cpython-311.pyc -------------------------------------------------------------------------------- /__pycache__/pre_process.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/__pycache__/pre_process.cpython-313.pyc -------------------------------------------------------------------------------- /__pycache__/train_models.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/__pycache__/train_models.cpython-311.pyc -------------------------------------------------------------------------------- /__pycache__/train_models.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/__pycache__/train_models.cpython-313.pyc -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11 2 | WORKDIR /classification 3 | COPY . . 4 | RUN pip install --no-cache-dir -r requirements.txt 5 | CMD ["python", "classification.py"] 6 | -------------------------------------------------------------------------------- /_classification/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/__pycache__/utils.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/__pycache__/utils.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/knn.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/knn.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/knn.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/knn.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/mlp.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/mlp.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/mlp.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/mlp.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/svm.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/svm.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/svm.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/svm.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/models.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/models.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/models.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/models.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/__pycache__/parameter_finder.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/__pycache__/parameter_finder.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/__pycache__/parameter_finder.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/__pycache__/parameter_finder.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/naive_bayes.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/naive_bayes.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/naive_bayes.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/naive_bayes.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/perceptron.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/perceptron.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/perceptron.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/perceptron.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/decision_tree.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/decision_tree.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/decision_tree.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/decision_tree.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/random_forest.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/random_forest.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/random_forest.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/random_forest.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/NearestCentroid.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/NearestCentroid.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/NearestCentroid.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/NearestCentroid.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/pre_process/__pycache__/standardize.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/__pycache__/standardize.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/pre_process/__pycache__/standardize.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/__pycache__/standardize.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/pre_process/data/__pycache__/parser.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/data/__pycache__/parser.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/pre_process/data/__pycache__/parser.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/data/__pycache__/parser.cpython-312.pyc -------------------------------------------------------------------------------- /_classification/pre_process/data/__pycache__/parser.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/data/__pycache__/parser.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/logstic_regression.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/logstic_regression.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/models/__pycache__/logstic_regression.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/logstic_regression.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/pre_process/data/spectrums.json: -------------------------------------------------------------------------------- 1 | { 2 | "s1": ["VH", "VV", "HH", "VH_1", "VV_1"], 3 | "s2" : ["B1", "B2", "B3", "B4", "B5", "B6", "B11", "B12", "B13", "B14", "B15", "B16", "NVDI", "EVI", "SAVI"] 4 | } 5 | -------------------------------------------------------------------------------- /_classification/pre_process/dimensionality_reduction/__pycache__/lda.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/dimensionality_reduction/__pycache__/lda.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/pre_process/dimensionality_reduction/__pycache__/lda.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/dimensionality_reduction/__pycache__/lda.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/pre_process/dimensionality_reduction/__pycache__/pca.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/dimensionality_reduction/__pycache__/pca.cpython-311.pyc -------------------------------------------------------------------------------- /_classification/pre_process/dimensionality_reduction/__pycache__/pca.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/dimensionality_reduction/__pycache__/pca.cpython-313.pyc -------------------------------------------------------------------------------- /_classification/pre_process/data/parser.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | def get_path_script(): 5 | return os.path.dirname(os.path.abspath(__file__)) 6 | 7 | 8 | def get_spectrums(): 9 | spectrums_file = os.path.join(get_path_script(), "spectrums.json") 10 | 11 | with open(spectrums_file, "r") as json_file: 12 | spectrums = json.load(json_file) 13 | return spectrums -------------------------------------------------------------------------------- /classification.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from pre_process import pre_process 3 | from train_models import train_models 4 | 5 | 6 | def classification(df : pd.DataFrame, 7 | class_column : str, 8 | path : str, 9 | name : str) -> None: 10 | 11 | x_data , y = pre_process(df, class_column) 12 | 13 | train_models(x_data, y, path , name) 14 | 15 | 16 | 17 | 18 | path_csv = "/home/reza/hamedan_seifi.csv" 19 | df = pd.read_csv(path_csv) 20 | 21 | def binary(lable, traget): 22 | if lable != traget: 23 | return "other" 24 | return lable 25 | 26 | df["Name"] = df["Name"].apply(lambda x : binary(x, "wi-wr-br-bi")) 27 | 28 | df = df.sample(100) 29 | classification(df , "Name", "/home/reza/hamedan_seifi", "hamedan_seifi") -------------------------------------------------------------------------------- /_classification/models/naive_bayes.py: -------------------------------------------------------------------------------- 1 | from sklearn.naive_bayes import GaussianNB 2 | 3 | # ******** Get Naive Bayes Function ******** 4 | def get_nb(): 5 | """ 6 | Initializes a Gaussian Naive Bayes classifier and provides a parameter grid for hyperparameter tuning. 7 | 8 | Returns: 9 | tuple: 10 | - GaussianNB object: An instance of the Gaussian Naive Bayes classifier. 11 | - dict: A dictionary containing hyperparameter options for tuning. 12 | """ 13 | 14 | # Define the hyperparameter grid for Gaussian Naive Bayes 15 | param_naive_bayes = { 16 | 'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5] 17 | # 'var_smoothing': A smoothing parameter added to the variance to prevent zero probabilities 18 | # and handle numerical stability issues. Values closer to 1e-9 are typical defaults. 19 | } 20 | 21 | # Initialize a GaussianNB object 22 | naive_bayes = GaussianNB() 23 | 24 | # Return the classifier and the hyperparameter grid 25 | return naive_bayes, param_naive_bayes 26 | -------------------------------------------------------------------------------- /_classification/models/svm.py: -------------------------------------------------------------------------------- 1 | from sklearn.svm import SVC 2 | 3 | # ******** Get Support Vector Machine (SVM) Function ******** 4 | def get_svm(): 5 | """ 6 | Initializes a Support Vector Classifier (SVC) and provides a parameter grid for hyperparameter tuning. 7 | 8 | Returns: 9 | tuple: 10 | - SVC object: An instance of the Support Vector Classifier. 11 | - dict: A dictionary containing hyperparameter options for tuning. 12 | """ 13 | 14 | # Define the hyperparameter grid for the SVC (Support Vector Classifier) 15 | param_svm = { 16 | 'C': [0.1, 1, 10, 100, 1000], # Regularization parameter (larger values mean less regularization) 17 | 'kernel': ['rbf'], # The kernel type to be used in the algorithm (radial basis function kernel) 18 | 'gamma': [0.001, 0.01, 0.1, 1], # Kernel coefficient for 'rbf' kernel 19 | } 20 | 21 | # Initialize a Support Vector Classifier object 22 | svm_classifier = SVC() 23 | 24 | # Return the classifier and the hyperparameter grid 25 | return svm_classifier, param_svm 26 | -------------------------------------------------------------------------------- /_classification/models/knn.py: -------------------------------------------------------------------------------- 1 | from sklearn.neighbors import KNeighborsClassifier 2 | 3 | # ******** Get K-Nearest Neighbors Function ******** 4 | def get_knn(): 5 | """ 6 | Initializes a K-Nearest Neighbors (KNN) classifier and provides a parameter grid for hyperparameter tuning. 7 | 8 | Returns: 9 | tuple: 10 | - KNeighborsClassifier object: An instance of the K-Nearest Neighbors Classifier. 11 | - dict: A dictionary containing hyperparameter options for tuning. 12 | """ 13 | 14 | # Define the hyperparameter grid for the KNN classifier 15 | param_knn = { 16 | 'n_neighbors': list(range(1, 15, 2)), # Number of neighbors to consider, ranging from 1 to 15 with a step of 2 17 | 'weights': ['uniform', 'distance'], # Weighting scheme: 'uniform' (all points equal) or 'distance' (inverse distance) 18 | 'metric': ['euclidean', 'manhattan', 'minkowski'] # Distance metrics to use for the KNN algorithm 19 | } 20 | 21 | # Initialize a KNeighborsClassifier object 22 | knn = KNeighborsClassifier() 23 | 24 | # Return the classifier and the hyperparameter grid 25 | return knn, param_knn 26 | -------------------------------------------------------------------------------- /_classification/models/NearestCentroid.py: -------------------------------------------------------------------------------- 1 | from sklearn.neighbors import NearestCentroid 2 | 3 | # ******** Get Nearest Centroid Function ******** 4 | def get_nc(): 5 | """ 6 | Initializes a Nearest Centroid classifier and provides a parameter grid for hyperparameter tuning. 7 | 8 | Returns: 9 | tuple: 10 | - NearestCentroid object: An instance of the Nearest Centroid classifier. 11 | - dict: A dictionary containing hyperparameter options for tuning. 12 | """ 13 | 14 | # Define the hyperparameter grid for the Nearest Centroid classifier 15 | param_NearestCentroid = { 16 | 'metric': ['euclidean', 'manhattan'], # Distance metrics to compute nearest centroid 17 | 'shrink_threshold': [None, 0.1, 0.2, 0.5, 0.7, 0.8] 18 | # 'shrink_threshold': Optional shrinkage threshold to regularize centroids (if not None). 19 | # Helps to improve robustness with high-dimensional data. 20 | } 21 | 22 | # Initialize a NearestCentroid object 23 | nearest_centroid = NearestCentroid() 24 | 25 | # Return the classifier and the hyperparameter grid 26 | return nearest_centroid, param_NearestCentroid 27 | -------------------------------------------------------------------------------- /_classification/models/logstic_regression.py: -------------------------------------------------------------------------------- 1 | from sklearn.linear_model import LogisticRegression 2 | 3 | # ******** Get Logistic Regression Function ******** 4 | def get_lr(): 5 | """ 6 | Initializes a Logistic Regression classifier and provides a parameter grid for hyperparameter tuning. 7 | 8 | Returns: 9 | tuple: 10 | - LogisticRegression object: An instance of the Logistic Regression classifier. 11 | - dict: A dictionary containing hyperparameter options for tuning. 12 | """ 13 | 14 | # Define the hyperparameter grid for Logistic Regression 15 | param_logsticRegression = { 16 | 'penalty': ['l1', 'l2'], # Regularization techniques: L1 (Lasso) or L2 (Ridge) 17 | 'C': [0.01, 0.1, 1, 10, 100], # Inverse regularization strength (smaller values = stronger regularization) 18 | 'solver': ['liblinear', 'saga'], # Optimization solvers for fitting the model 19 | 'max_iter': [100, 200, 300, 500] # Maximum number of iterations for solver convergence 20 | } 21 | 22 | # Initialize a LogisticRegression object 23 | logistic_regression = LogisticRegression() 24 | 25 | # Return the classifier and the hyperparameter grid 26 | return logistic_regression, param_logsticRegression 27 | -------------------------------------------------------------------------------- /_classification/models/decision_tree.py: -------------------------------------------------------------------------------- 1 | from sklearn.tree import DecisionTreeClassifier 2 | 3 | # ******** Get Decision Tree Function ******** 4 | def get_dt(): 5 | """ 6 | Initializes a Decision Tree Classifier and provides a parameter grid for hyperparameter tuning. 7 | 8 | Returns: 9 | tuple: 10 | - DecisionTreeClassifier object: An instance of the Decision Tree Classifier. 11 | - dict: A dictionary containing hyperparameter options for tuning. 12 | """ 13 | 14 | # Define the hyperparameter grid for the Decision Tree Classifier 15 | param_decisionTree = { 16 | 'criterion': ['gini', 'entropy'], # Criterion for splitting ('gini' impurity or 'entropy' for information gain) 17 | 'max_depth': [None, 10, 20, 30, 40], # Maximum depth of the tree (None means unlimited depth) 18 | 'min_samples_split': [2, 5, 10], # Minimum number of samples required to split an internal node 19 | 'min_samples_leaf': [1, 2, 4] # Minimum number of samples required to be at a leaf node 20 | } 21 | 22 | # Initialize a DecisionTreeClassifier object 23 | decision_tree = DecisionTreeClassifier() 24 | 25 | # Return the classifier and the hyperparameter grid 26 | return decision_tree, param_decisionTree 27 | -------------------------------------------------------------------------------- /_classification/models/random_forest.py: -------------------------------------------------------------------------------- 1 | from sklearn.ensemble import RandomForestClassifier 2 | 3 | # ******** Get Random Forest Function ******** 4 | def get_rf(): 5 | """ 6 | Initializes a Random Forest classifier and provides a parameter grid for hyperparameter tuning. 7 | 8 | Returns: 9 | tuple: 10 | - RandomForestClassifier object: An instance of the Random Forest classifier. 11 | - dict: A dictionary containing hyperparameter options for tuning. 12 | """ 13 | 14 | # Define the hyperparameter grid for the Random Forest classifier 15 | param_randomForest = { 16 | 'n_estimators': [50, 100, 200], # Number of trees in the forest 17 | 'criterion': ['gini', 'entropy'], # Splitting criteria: Gini Impurity or Entropy 18 | 'max_depth': [None, 10, 20, 30], # Maximum depth of the trees (None means no limit) 19 | 'min_samples_split': [2, 5, 10], # Minimum samples required to split an internal node 20 | 'min_samples_leaf': [1, 2, 4] # Minimum samples required at a leaf node 21 | } 22 | 23 | # Initialize a RandomForestClassifier object 24 | random_forest = RandomForestClassifier() 25 | 26 | # Return the classifier and the hyperparameter grid 27 | return random_forest, param_randomForest 28 | -------------------------------------------------------------------------------- /_classification/models/perceptron.py: -------------------------------------------------------------------------------- 1 | from sklearn.linear_model import Perceptron 2 | 3 | # ******** Get Perceptron Function ******** 4 | def get_pr(): 5 | """ 6 | Initializes a Perceptron classifier and provides a parameter grid for hyperparameter tuning. 7 | 8 | Returns: 9 | tuple: 10 | - Perceptron object: An instance of the Perceptron classifier. 11 | - dict: A dictionary containing hyperparameter options for tuning. 12 | """ 13 | 14 | # Define the hyperparameter grid for the Perceptron classifier 15 | param_perceptron = { 16 | 'penalty': ['l1', 'l2', 'elasticnet'], # Regularization types to prevent overfitting: 17 | # - 'l1': Lasso regularization (sparsity of features). 18 | # - 'l2': Ridge regularization (shrinks coefficients to reduce multicollinearity). 19 | # - 'elasticnet': Combination of L1 and L2 regularization. 20 | 21 | 'alpha': [0.0001, 0.001, 0.01, 0.1, 1], # Regularization strength (smaller values = stronger regularization). 22 | 23 | 'max_iter': [1000, 2000, 3000] # Maximum number of passes over the training data. 24 | } 25 | 26 | # Initialize a Perceptron classifier 27 | perceptron = Perceptron() 28 | 29 | # Return the classifier and the hyperparameter grid 30 | return perceptron, param_perceptron 31 | -------------------------------------------------------------------------------- /_classification/models/mlp.py: -------------------------------------------------------------------------------- 1 | from sklearn.neural_network import MLPClassifier 2 | 3 | # ******** Get Multi-Layer Perceptron (MLP) Classifier Function ******** 4 | def get_mlp(): 5 | """ 6 | Initializes a Multi-Layer Perceptron (MLP) classifier and provides a parameter grid for hyperparameter tuning. 7 | 8 | Returns: 9 | tuple: 10 | - MLPClassifier object: An instance of the Multi-Layer Perceptron classifier. 11 | - dict: A dictionary containing hyperparameter options for tuning. 12 | """ 13 | 14 | # Define the hyperparameter grid for the MLP classifier 15 | param_mlp = { 16 | 'hidden_layer_sizes': [ 17 | (50,), (100,), (50, 50), (100, 100) 18 | ], # Number of neurons in each hidden layer, e.g., single-layer 50 or two-layers 50-50 19 | 'activation': ['tanh', 'relu'], # Activation functions: 'tanh' or 'ReLU' 20 | 'solver': ['sgd', 'adam'], # Optimization solvers: Stochastic Gradient Descent (SGD) or Adam 21 | 'alpha': [0.0001, 0.001, 0.01], # L2 regularization parameter to prevent overfitting 22 | 'learning_rate': ['constant', 'adaptive'], # Learning rate schedule 23 | 'max_iter': [100, 200, 300, 400, 500], # Maximum number of iterations to converge 24 | } 25 | 26 | # Initialize an MLPClassifier object 27 | mlp_classifier = MLPClassifier() 28 | 29 | # Return the classifier and the hyperparameter grid 30 | return mlp_classifier, param_mlp 31 | -------------------------------------------------------------------------------- /_classification/models/models.py: -------------------------------------------------------------------------------- 1 | # Importing model functions from different scripts 2 | from .decision_tree import get_dt # Decision Tree Classifier 3 | from .knn import get_knn # K-Nearest Neighbors Classifier 4 | from .logstic_regression import get_lr # Logistic Regression Classifier 5 | from .mlp import get_mlp # Multi-Layer Perceptron Classifier 6 | from .naive_bayes import get_nb # Naive Bayes Classifier 7 | from .perceptron import get_pr # Perceptron Classifier 8 | from .random_forest import get_rf # Random Forest Classifier 9 | from .svm import get_svm # Support Vector Machine Classifier 10 | from .NearestCentroid import get_nc # Nearest Centroid Classifier 11 | 12 | # ******** Get Details of All Models Function ******** 13 | def get_details_models(): 14 | """ 15 | Returns a list of all available classification models and their respective hyperparameter grids. 16 | 17 | Each model function is called, which returns the model instance and its hyperparameters. 18 | The function returns a list of tuples, where each tuple contains a model and its associated parameter grid. 19 | 20 | Returns: 21 | list: A list of tuples, each containing a classifier and a dictionary of hyperparameters. 22 | """ 23 | return [ 24 | get_nc(), # Nearest Centroid 25 | get_knn(), # K-Nearest Neighbors 26 | get_dt(), # Decision Tree 27 | get_lr(), # Logistic Regression 28 | get_mlp(), # Multi-Layer Perceptron 29 | get_nb(), # Naive Bayes 30 | get_pr(), # Perceptron 31 | get_rf(), # Random Forest 32 | get_svm() # Support Vector Machine 33 | ] 34 | -------------------------------------------------------------------------------- /_classification/utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from .pre_process.data.parser import get_spectrums 4 | 5 | 6 | def split_data(df: pd.DataFrame) -> list: 7 | """ 8 | Splits the input DataFrame into two subsets (s1 and s2) based on specific conditions. 9 | 10 | Parameters: 11 | df: pd.DataFrame 12 | Input DataFrame containing the columns to be split. 13 | 14 | Returns: 15 | list: 16 | A list containing two NumPy arrays: 17 | - s1: Subset of the data matching the "s1" criteria. 18 | - s2: Subset of the data matching the "s2" criteria. 19 | """ 20 | 21 | # Lists to store column names for s1 and s2 22 | s1_columns = [] 23 | s2_columns = [] 24 | 25 | # Get the predefined spectral data for s1 and s2 26 | data = get_spectrums() 27 | 28 | # Iterate through each column in the input DataFrame 29 | for column in df.columns: 30 | # Check if the column belongs to s1 based on the spectral data 31 | if check_s1_and_s2(column, data["s1"]): 32 | s1_columns.append(column) 33 | # Check if the column belongs to s2 based on the spectral data 34 | elif check_s1_and_s2(column, data["s2"]): 35 | s2_columns.append(column) 36 | 37 | # Concatenate the selected columns for s1 and s2 into separate DataFrames 38 | s1 = pd.concat([df[col] for col in s1_columns], axis=1) 39 | s2 = pd.concat([df[col] for col in s2_columns], axis=1) 40 | 41 | # Convert the DataFrames to NumPy arrays and return them as a list 42 | return [ 43 | np.array(s1), # Subset for s1 44 | np.array(s2), # Subset for s2 45 | ] 46 | 47 | 48 | def check_s1_and_s2(column: str, data: list) -> bool: 49 | """ 50 | Checks if a column name matches any item in the given spectral data list. 51 | 52 | Parameters: 53 | column: str 54 | The column name to be checked. 55 | 56 | data: list 57 | A list of predefined spectral data for matching. 58 | 59 | Returns: 60 | bool: 61 | True if the column matches any item in the list (case-insensitive), False otherwise. 62 | """ 63 | # Convert column to uppercase and check if it matches any item in the data list 64 | for _column in data: 65 | if _column in column.upper(): 66 | return True 67 | return False 68 | -------------------------------------------------------------------------------- /_classification/pre_process/dimensionality_reduction/pca.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from sklearn.decomposition import PCA 4 | 5 | # ******** PCA Split Function ******** 6 | def pca_split(X_s1: np.array, X_s2: np.array) -> np.array: 7 | """ 8 | Perform PCA on two datasets separately and combine their transformed components. 9 | 10 | Parameters: 11 | X_s1 (np.array): The first dataset (2D array) to apply PCA. 12 | X_s2 (np.array): The second dataset (2D array) to apply PCA. 13 | 14 | Returns: 15 | np.array: A combined array of PCA-transformed features from both datasets. 16 | """ 17 | 18 | # Get the best number of components for the first dataset 19 | best_n_components_s1 = get_best_n_components(X_s1) 20 | # Get the best number of components for the second dataset 21 | best_n_components_s2 = get_best_n_components(X_s2) 22 | 23 | # Apply PCA to the first dataset with the optimal number of components 24 | X_pca_s1 = pca(X_s1, best_n_components_s1) 25 | # Apply PCA to the second dataset with the optimal number of components 26 | X_pca_s2 = pca(X_s2, best_n_components_s2) 27 | 28 | # Combine the PCA-transformed datasets by horizontally stacking them 29 | X = np.hstack((X_pca_s1, X_pca_s2)) 30 | return X 31 | 32 | # ******** Get Best Number of Components ******** 33 | def get_best_n_components(X: np.array) -> int: 34 | """ 35 | Determine the optimal number of PCA components that explain at least 95% of the variance. 36 | 37 | Parameters: 38 | X (np.array): The dataset (2D array) to analyze. 39 | 40 | Returns: 41 | int: The optimal number of components. 42 | """ 43 | 44 | # Fit PCA on the dataset without specifying the number of components 45 | pca = PCA().fit(X) 46 | 47 | # Compute the cumulative variance explained by each component 48 | cumulative_variance = np.cumsum(pca.explained_variance_ratio_) 49 | 50 | # Find the smallest number of components that explain at least 95% of the variance 51 | best_n_components = np.argmax(cumulative_variance >= 0.95) + 1 52 | return best_n_components 53 | 54 | # ******** PCA Function ******** 55 | def pca(X: np.array, n_components_best: int) -> np.array: 56 | """ 57 | Apply PCA on the dataset with a specified number of components. 58 | 59 | Parameters: 60 | X (np.array): The dataset (2D array) to transform. 61 | n_components_best (int): The number of components to retain. 62 | 63 | Returns: 64 | np.array: The PCA-transformed dataset. 65 | """ 66 | 67 | # Check if the number of components is not provided; compute the best number of components 68 | if isinstance(n_components_best, type(None)): 69 | n_components_best = get_best_n_components(X) 70 | 71 | # Initialize the PCA object with the specified number of components 72 | pca = PCA(n_components=n_components_best, svd_solver='auto') 73 | 74 | # Fit PCA to the dataset and transform it 75 | X_pca = pca.fit_transform(X) 76 | return X_pca 77 | -------------------------------------------------------------------------------- /pre_process.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from _classification.pre_process.standardize import standardize 4 | from _classification.utils import split_data 5 | from _classification.pre_process.dimensionality_reduction.lda import lda_split, lda 6 | from _classification.pre_process.dimensionality_reduction.pca import pca_split, pca 7 | 8 | def pre_process(df: pd.DataFrame, 9 | class_column: str) -> list: 10 | """ 11 | This function preprocesses the input data by standardizing features, 12 | performing dimensionality reduction using PCA and LDA, and splitting the data. 13 | 14 | Parameters: 15 | df: pd.DataFrame 16 | The input DataFrame containing features and the class/target column. 17 | 18 | class_column: str 19 | The name of the column representing the target variable. 20 | 21 | Returns: 22 | x_data: dict 23 | A dictionary containing the original, PCA, LDA, and split versions of the data. 24 | 25 | y: pd.Series 26 | The target variable extracted from the input DataFrame. 27 | """ 28 | 29 | # Separate features (X) and target variable (y) 30 | X, y = df.drop(class_column, axis=1), df[class_column] 31 | 32 | print("📌 Start pre process ...") 33 | 34 | # Step 1: Standardize the feature data 35 | print("--- 📌start standardize") 36 | standardize_data = standardize(X) # Standardize the input features 37 | print("--- ✅finish standardize") 38 | 39 | # Step 2: Perform dimensionality reduction (PCA and LDA) and split the data 40 | print("--- 📌start dimensionality reduction") 41 | 42 | # Initialize an empty dictionary to store processed data 43 | x_data = {} 44 | 45 | # Iterate through each standardized dataset (if multiple types of standardization are applied) 46 | for name, _data in standardize_data.items(): 47 | # Split the standardized data into two parts (e.g., for training and testing) 48 | s1, s2 = split_data(_data) 49 | 50 | # Apply PCA to the entire dataset 51 | data_pca = pca(_data, None) 52 | # Apply LDA to the entire dataset 53 | data_lda = lda(_data, y, None) 54 | 55 | # Apply PCA to the split datasets 56 | split_pca = pca_split(s1, s2) 57 | # Apply LDA to the split datasets 58 | split_lda = lda_split(s1, s2, y) 59 | 60 | # Create a temporary dictionary to store the original, PCA, LDA, and split data 61 | temp = { 62 | "original": _data, # Original standardized data 63 | "pca": data_pca, # PCA-reduced data 64 | "lda": data_lda, # LDA-reduced data 65 | "split pca": split_pca, # PCA-reduced split data (PCA on S1 | pn S2) 66 | "split lda": split_lda # LDA-reduced split data (LDA on S1 | pn S2) 67 | } 68 | 69 | # Add the processed data for the current type of standardization to the main dictionary 70 | x_data[name] = temp 71 | 72 | print("--- ✅finish dimensionality reduction") 73 | 74 | print("✅finish pre process ...") 75 | 76 | # Return the processed data (x_data) and target variable (y) 77 | return x_data, y 78 | -------------------------------------------------------------------------------- /_classification/pre_process/standardize.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import ( 3 | StandardScaler, 4 | MinMaxScaler, 5 | MaxAbsScaler, 6 | RobustScaler, 7 | Normalizer 8 | ) 9 | 10 | # ******** Standardize Function ******** 11 | def standardize(df: pd.DataFrame) -> dict: 12 | """ 13 | Applies various data scaling techniques to the input DataFrame. 14 | 15 | Parameters: 16 | df (pd.DataFrame): The input DataFrame to be scaled. 17 | 18 | Returns: 19 | dict: A dictionary containing the original and scaled DataFrames using different scalers. 20 | """ 21 | 22 | # Get the column names of the input DataFrame 23 | columns = df.columns 24 | 25 | # ******** Standard Scaling ******** 26 | # StandardScaler scales features by removing the mean and scaling to unit variance. 27 | scaler_standard = StandardScaler() # Initialize the StandardScaler 28 | X_standard_scaled = scaler_standard.fit_transform(df) # Fit and transform the data 29 | df_standard_scaled = pd.DataFrame(X_standard_scaled, columns=columns) # Create a DataFrame 30 | 31 | # ******** Min-Max Scaling ******** 32 | # MinMaxScaler scales features to a range between 0 and 1. 33 | scaler_minmax = MinMaxScaler() # Initialize the MinMaxScaler 34 | X_minmax_scaled = scaler_minmax.fit_transform(df) # Fit and transform the data 35 | df_minmax_scaled = pd.DataFrame(X_minmax_scaled, columns=columns) # Create a DataFrame 36 | 37 | # ******** Max-Abs Scaling ******** 38 | # MaxAbsScaler scales each feature by its maximum absolute value, preserving sparsity. 39 | scaler_maxabs = MaxAbsScaler() # Initialize the MaxAbsScaler 40 | X_maxabs_scaled = scaler_maxabs.fit_transform(df) # Fit and transform the data 41 | df_maxabs_scaled = pd.DataFrame(X_maxabs_scaled, columns=columns) # Create a DataFrame 42 | 43 | # ******** Robust Scaling ******** 44 | # RobustScaler scales features using statistics that are robust to outliers 45 | # (e.g., median and interquartile range). 46 | scaler_robust = RobustScaler() # Initialize the RobustScaler 47 | X_robust_scaled = scaler_robust.fit_transform(df) # Fit and transform the data 48 | df_robust_scaled = pd.DataFrame(X_robust_scaled, columns=columns) # Create a DataFrame 49 | 50 | # ******** Normalization ******** 51 | # Normalizer scales each sample (row) to have unit norm, preserving the shape of the data. 52 | scaler_normalizer = Normalizer() # Initialize the Normalizer 53 | X_normalized = scaler_normalizer.fit_transform(df) # Fit and transform the data 54 | df_normalized = pd.DataFrame(X_normalized, columns=columns) # Create a DataFrame 55 | 56 | # Return all scaled DataFrames along with the original DataFrame 57 | return { 58 | "original": df, # Original DataFrame 59 | "standard_scaled": df_standard_scaled, # Standard-scaled DataFrame 60 | "minmax_scaled": df_minmax_scaled, # MinMax-scaled DataFrame 61 | "maxabs_scaled": df_maxabs_scaled, # MaxAbs-scaled DataFrame 62 | "robust_scaled": df_robust_scaled, # Robust-scaled DataFrame 63 | "normalized": df_normalized # Normalized DataFrame 64 | } 65 | -------------------------------------------------------------------------------- /_classification/pre_process/dimensionality_reduction/lda.py: -------------------------------------------------------------------------------- 1 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis 2 | import numpy as np 3 | 4 | # ******** LDA Split Function ******** 5 | def lda_split(X_s1: np.array, X_s2: np.array, y: np.array) -> np.array: 6 | """ 7 | Perform Linear Discriminant Analysis (LDA) on two datasets separately 8 | and combine their transformed components. 9 | 10 | Parameters: 11 | X_s1 (np.array): The first dataset (2D array) for LDA. 12 | X_s2 (np.array): The second dataset (2D array) for LDA. 13 | y (np.array): The target labels (1D array) associated with the datasets. 14 | 15 | Returns: 16 | np.array: A combined array of LDA-transformed features from both datasets. 17 | """ 18 | 19 | # Get the best number of components for the first dataset 20 | best_n_components_s1 = get_best_n_components(X_s1, y) 21 | # Get the best number of components for the second dataset 22 | best_n_components_s2 = get_best_n_components(X_s2, y) 23 | 24 | # Apply LDA to the first dataset with the optimal number of components 25 | X_lda_s1 = lda(X_s1, y, best_n_components_s1) 26 | # Apply LDA to the second dataset with the optimal number of components 27 | X_lda_s2 = lda(X_s2, y, best_n_components_s2) 28 | 29 | # Combine the LDA-transformed datasets by horizontally stacking them 30 | X = np.hstack((X_lda_s1, X_lda_s2)) 31 | return X 32 | 33 | # ******** Get Best Number of Components ******** 34 | def get_best_n_components(X: np.array, y: np.array) -> int: 35 | """ 36 | Determine the optimal number of LDA components that explain at least 95% of the variance. 37 | 38 | Parameters: 39 | X (np.array): The dataset (2D array) to analyze. 40 | y (np.array): The target labels (1D array) associated with the dataset. 41 | 42 | Returns: 43 | int: The optimal number of components. 44 | """ 45 | 46 | # Initialize the LDA object 47 | lda = LinearDiscriminantAnalysis() 48 | 49 | # Fit LDA to the data and target labels 50 | X_lda = lda.fit_transform(X, y) 51 | 52 | # Compute the explained variance ratio 53 | explained_variance_ratio = lda.explained_variance_ratio_ 54 | 55 | # Compute the cumulative variance ratio 56 | cumulative_variance_ratio = np.cumsum(explained_variance_ratio) 57 | 58 | # Determine the smallest number of components that explain at least 95% of the variance 59 | n_components = np.argmax(cumulative_variance_ratio >= 0.95) + 1 60 | 61 | return n_components 62 | 63 | # ******** LDA Function ******** 64 | def lda(X: np.array, y: np.array, n_components_best: int) -> np.array: 65 | """ 66 | Apply Linear Discriminant Analysis (LDA) on the dataset with a specified number of components. 67 | 68 | Parameters: 69 | X (np.array): The dataset (2D array) to transform. 70 | y (np.array): The target labels (1D array) associated with the dataset. 71 | n_components_best (int): The number of components to retain. 72 | 73 | Returns: 74 | np.array: The LDA-transformed dataset. 75 | """ 76 | 77 | # If the number of components is not provided, compute the optimal number of components 78 | if isinstance(n_components_best, type(None)): 79 | n_components_best = get_best_n_components(X, y) 80 | 81 | # Initialize the LDA object with the specified number of components 82 | lda = LinearDiscriminantAnalysis(n_components=n_components_best) 83 | 84 | # Fit LDA to the data and transform it 85 | X_lda = lda.fit_transform(X, y) 86 | return X_lda 87 | -------------------------------------------------------------------------------- /_classification/parameter_finder.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from sklearn.model_selection import GridSearchCV 3 | from sklearn.metrics import ( 4 | recall_score, 5 | accuracy_score, 6 | precision_score, 7 | f1_score, 8 | confusion_matrix, 9 | cohen_kappa_score, 10 | make_scorer 11 | ) 12 | import numpy as np 13 | import pandas as pd 14 | import time 15 | import os # For handling file paths 16 | import matplotlib.pyplot as plt 17 | import seaborn as sns # For a more aesthetic plot 18 | 19 | # Suppress all warnings 20 | warnings.filterwarnings("ignore") 21 | 22 | 23 | def classification_parameter_finder(model, 24 | parameters: dict, 25 | X_train: np.array, 26 | y_train: np.array, 27 | X_test: np.array, 28 | y_test: np.array, 29 | method: str, 30 | path: str): 31 | 32 | 33 | """ 34 | This function performs hyperparameter tuning for a given classification model using GridSearchCV, 35 | evaluates its performance on training and testing datasets, and visualizes the confusion matrix. 36 | 37 | Parameters: 38 | model: sklearn estimator 39 | The machine learning model to be tuned (e.g., RandomForestClassifier, SVC, etc.). 40 | 41 | parameters: dict 42 | The dictionary containing hyperparameters and their possible values for GridSearchCV. 43 | 44 | X_train: np.array 45 | Training feature data. 46 | 47 | y_train: np.array 48 | Training labels. 49 | 50 | X_test: np.array 51 | Testing feature data. 52 | 53 | y_test: np.array 54 | Testing labels. 55 | 56 | method: str 57 | The name of the method or experiment (used for labeling and saving files). 58 | 59 | path: str 60 | Directory path where output files, such as the confusion matrix image, will be saved. 61 | 62 | Returns: 63 | results: pandas.DataFrame 64 | A DataFrame summarizing the best model, its hyperparameters, evaluation metrics 65 | (accuracy, precision, recall, F1-score, kappa score), and runtime information. 66 | It also includes the file path of the saved confusion matrix image. 67 | """ 68 | 69 | 70 | 71 | 72 | model_name = str(model).split('(')[0] 73 | 74 | start = time.time() 75 | 76 | 77 | kappa_scorer = make_scorer(cohen_kappa_score) 78 | 79 | grid = GridSearchCV(model, 80 | param_grid=parameters, 81 | refit=True, 82 | cv=5, 83 | n_jobs=-1, 84 | scoring=kappa_scorer) 85 | grid.fit(X_train, y_train) 86 | 87 | y_train_pred = grid.predict(X_train) 88 | y_test_pred = grid.predict(X_test) 89 | 90 | train_accuracy = accuracy_score(y_train, y_train_pred) 91 | test_accuracy = accuracy_score(y_test, y_test_pred) 92 | precision = precision_score(y_test, y_test_pred, average='weighted') 93 | recall = recall_score(y_test, y_test_pred, average='weighted') 94 | f1 = f1_score(y_test, y_test_pred, average='weighted') 95 | kappa = cohen_kappa_score(y_test, y_test_pred) 96 | 97 | # Confusion matrix 98 | conf_matrix = confusion_matrix(y_test, y_test_pred, normalize='true') 99 | class_labels = np.unique(y_test) 100 | 101 | # Save confusion matrix as an image 102 | conf_matrix_path = os.path.join(path, f"{model_name}_{method}_confusion_matrix.png") 103 | plt.figure(figsize=(10, 8)) 104 | sns.heatmap(conf_matrix, annot=True, fmt=".2f", cmap="Blues", xticklabels=class_labels, yticklabels=class_labels) 105 | plt.title(f"Confusion Matrix - {method}") 106 | plt.xlabel("Predicted Label") 107 | plt.ylabel("True Label") 108 | plt.tight_layout() 109 | plt.savefig(conf_matrix_path) 110 | plt.close() # Close the plot to avoid overwriting in subsequent calls 111 | 112 | 113 | end = time.time() 114 | 115 | # Store results in a DataFrame 116 | results = pd.DataFrame({ 117 | "method": [method], 118 | "model": [model_name], 119 | "best_params": [grid.best_params_], 120 | "train_accuracy": [train_accuracy], 121 | "test_accuracy": [test_accuracy], 122 | "precision": [precision], 123 | "recall": [recall], 124 | "f1_score": [f1], 125 | "kappa": [kappa], 126 | "confusion_matrix_path": [conf_matrix_path], 127 | "runtime": [end - start], 128 | "best_model": [grid.best_estimator_] 129 | }) 130 | 131 | return results 132 | -------------------------------------------------------------------------------- /train_models.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.model_selection import train_test_split 3 | 4 | from _classification.parameter_finder import classification_parameter_finder 5 | from _classification.models.models import get_details_models 6 | 7 | import warnings 8 | from sklearn.exceptions import ConvergenceWarning 9 | import os 10 | 11 | # Ignore ConvergenceWarning 12 | warnings.filterwarnings("ignore", category = ConvergenceWarning) 13 | 14 | 15 | def train_models(x_data : dict, 16 | y : pd.DataFrame, 17 | path : str, 18 | name : str): 19 | 20 | """ 21 | This function trains multiple machine learning models on various subsets of the input dataset, 22 | performs hyperparameter tuning, evaluates the models, and saves the results. 23 | 24 | Parameters: 25 | x_data: dict 26 | A dictionary containing the feature data split into sections and subsections 27 | (e.g., {"section1": {"subsection1": X_data, "subsection2": X_data}}). 28 | Each subsection represents a different feature subset for training the models. 29 | 30 | y: pd.DataFrame 31 | The target labels for the dataset. 32 | 33 | path: str 34 | The directory path where output files, such as confusion matrix images, will be saved. 35 | 36 | name: str 37 | A descriptive name for the experiment (currently unused but can be used for logging or saving results). 38 | 39 | Workflow: 40 | - Splits each subsection of data into training and testing sets (80/20 split). 41 | - Iterates over a list of models and their respective hyperparameters. 42 | - Calls `classification_parameter_finder` to train, tune, and evaluate each model. 43 | - Stores the results for each model and dataset combination. 44 | 45 | Output: 46 | - Results are saved in a CSV file named `result.csv` in the local directory. 47 | - Confusion matrices and related artifacts are saved to the specified `path`. 48 | - Prints progress and completion messages to the console for monitoring. 49 | 50 | Notes: 51 | - This function assumes that the `get_details_models()` function provides a list of tuples, 52 | each containing a model instance and its corresponding hyperparameter grid. 53 | - Suppresses warnings related to model convergence (e.g., ConvergenceWarning). 54 | """ 55 | 56 | 57 | if not os.path.exists(path): 58 | os.makedirs(path) 59 | print(f"Created directory: {path}") 60 | 61 | 62 | # Retrieve the list of models and their hyperparameter configurations 63 | details_models = get_details_models() 64 | 65 | # Initialize an empty list to store the results of all models 66 | results = [] 67 | 68 | print("📌start train model ...") 69 | 70 | # Iterate through each section of the feature data in x_data 71 | for name_section, data_section in x_data.items(): 72 | # Iterate through each subsection of the section 73 | for name_subsection, data_subsection in data_section.items(): 74 | # Define the method name based on the section and subsection 75 | method = f"{name_section} - {name_subsection}" 76 | 77 | # Split the current subsection data into training and testing sets 78 | X_train, X_test, y_train, y_test = train_test_split(data_subsection, y, test_size=0.2, random_state=42) 79 | 80 | # Iterate through each model and its corresponding parameters 81 | for detail_model in details_models: 82 | model, parameters = detail_model 83 | 84 | print(f"--- 📌start train <<{model}>> on <<{method}>> data") 85 | 86 | # Train and evaluate the model 87 | _result = classification_parameter_finder(model, 88 | parameters, 89 | X_train, 90 | y_train, 91 | X_test, 92 | y_test, 93 | method, 94 | path) 95 | 96 | print(f"--- ✅finish train <<{model}>> on <<{method}>> data") 97 | results.append(_result) 98 | 99 | print("✅finish train model") 100 | 101 | # Combine all results into a single DataFrame 102 | results = pd.concat(results, ignore_index=True) 103 | 104 | # Define the full path for saving the result file 105 | result_file_path = os.path.join(path, f"{name}.csv") 106 | 107 | # Save the results DataFrame to the specified path 108 | results.to_csv(result_file_path, index=False) 109 | 110 | print(f" ✅save result in {result_file_path}✅ ") 111 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Agricultural Products Classification Pipeline 2 | 3 | ## Overview 4 | This pipeline is designed to classify agricultural products using satellite data from **SENTINEL-1** and **SENTINEL-2**. The pipeline includes the following stages: 5 | 6 | 1. **Data Standardization**: Different standardization techniques are applied to the data to make it suitable for model training. 7 | 2. **Dimensionality Reduction**: PCA and LDA are applied to reduce the dimensionality of the feature space, with separate models for each satellite's data. 8 | 3. **Model Training and Hyperparameter Optimization**: Various machine learning models are trained, and hyperparameter optimization is performed using grid search. 9 | 10 | 11 | I've added the information you provided to the README. Here's the updated section that includes the satellite data input: 12 | 13 | --- 14 | 15 | ### Satellite Data Input: 16 | 17 | The input dataset contains Earth observation data from **SENTINEL-1** and **SENTINEL-2** satellites, obtained via Google Earth Engine. The data includes various bands from both satellites, as well as additional values relevant for classification tasks. 18 | 19 | #### Example of input data: 20 | 21 | | **Sample** | **0_B1** | **0_B2** | **0_B3** | **0_B4** | **0_B5** | **0_B6** | **0_B7** | **0_B8** | **0_B8A** | **0_B9** | **0_B11** | **0_B12** | **0_VV** | 22 | |------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|----------| 23 | | **Sample 1** | 0.050643478 | 0.071909783 | 0.108879348 | 0.140969565 | 0.156472826 | 0.172709783 | 0.185292391 | 0.180054348 | 0.195056522 | 0.205251087 | 0.195241304 | 0.1603 | -1 | 24 | | **Sample 2** | 0.051273684 | 0.07195 | 0.107911842 | 0.138413158 | 0.156592105 | 0.180571053 | 0.195072368 | 0.189626316 | 0.204071053 | 0.243975 | 0.199786842 | 0.161619737 | -1 | 25 | | **Sample 3** | 0.064336805 | 0.097296528 | 0.140022222 | 0.176558333 | 0.187975 | 0.19215 | 0.199796528 | 0.203748611 | 0.201070833 | 0.235688194 | 0.202470833 | -15.741307 | -1 | 26 | | **Sample 4** | 0.070949999 | 0.100846154 | 0.150261539 | 0.196115385 | 0.214473077 | 0.219430769 | 0.227103846 | 0.226692308 | 0.230776923 | 0.23485 | 0.240280769 | 0.209653846 | -1 | 27 | | **Sample 5** | 0.071380468 | 0.101917188 | 0.151620313 | 0.198378125 | 0.213576563 | 0.215678125 | 0.222285156 | 0.224170313 | 0.224170313 | 0.235323438 | 0.235323438 | 0.208569531 | -1 | 28 | | **Sample 6** | 0.072846154 | 0.100773077 | 0.150984615 | 0.198823077 | 0.213915385 | 0.217265385 | 0.224673077 | 0.226946154 | 0.226946154 | 0.234361538 | 0.237073077 | 0.206880769 | -1 | 29 | | **Sample 7** | 0.067707143 | 0.103935714 | 0.152242857 | 0.200014286 | 0.209557143 | 0.213071429 | 0.221978571 | 0.229471429 | 0.223307143 | 0.232307143 | 0.232307143 | 0.205528571 | -1 | 30 | | **Sample 8** | 0.097139552 | 0.130318657 | 0.162661194 | 0.194323881 | 0.209510448 | 0.212884328 | 0.222468657 | 0.230838806 | 0.230782836 | 0.236003731 | 0.311174627 | 0.283676866 | -1 | 31 | | **Sample 9** | 0.070247222 | 0.097663194 | 0.129397222 | 0.159320833 | 0.171659722 | 0.17494375 | 0.183878472 | 0.192720833 | 0.193045833 | 0.276390278 | 0.256345833 | 0.249488889 | -1 | 32 | | **Sample 10** | 0.060408333 | 0.085986806 | 0.121355556 | 0.154906944 | 0.168461111 | 0.1728375 | 0.182507639 | 0.191263889 | 0.192247222 | 0.282597917 | 0.263926389 | 0.249488889 | -1 | 33 | 34 | 35 | 36 | - The **bands** from **SENTINEL-2** include: `B1`, `B2`, `B3`, `B4`, `B5`, `B6`, `B11`, `B12`, etc. 37 | - The **SENTINEL-1** data includes polarization bands such as `VV` and `VH`, with additional derived features such as `VV_1` and `VH_1`. 38 | - Each row represents a specific point in time for the satellite’s data, with `1_VV` marking the timestamp of the observation. 39 | 40 | 41 | --- 42 | 43 | 44 | ## Pipeline Steps 45 | 46 | ### 1. Data Standardization 47 | The following standardization methods are applied to the data: 48 | 49 | - **Original**: Raw data without scaling. 50 | - **Standard Scaled**: Standardization using mean and variance. 51 | - **MinMax Scaled**: Scales data to a specified range (usually [0,1]). 52 | - **MaxAbs Scaled**: Scales data to [-1, 1] based on the maximum absolute value. 53 | - **Robust Scaled**: Scales data using the median and interquartile range. 54 | - **Normalized**: Scales data to unit norm. 55 | 56 | ### 2. Dimensionality Reduction 57 | Two dimensionality reduction techniques are used: 58 | 59 | - **PCA (Principal Component Analysis)**: Reduces the feature space by projecting the data into a lower-dimensional space. 60 | - **LDA (Linear Discriminant Analysis)**: A classification-specific dimensionality reduction technique. 61 | 62 | Note: Each satellite’s data is processed separately due to different band spaces. For **SENTINEL-1**, the bands are: 63 | - `VH`, `VV`, `HH`, `VH_1`, `VV_1` 64 | 65 | For **SENTINEL-2**, the bands are: 66 | - `B1`, `B2`, `B3`, `B4`, `B5`, `B6`, `B11`, `B12`, `B13`, `B14`, `B15`, `B16`, `NDVI`, `EVI`, `SAVI` 67 | 68 | ### 3. Model Training & Hyperparameter Optimization 69 | The following models are trained using grid search for hyperparameter optimization: 70 | 71 | 1. **Decision Tree Classifier** 72 | 2. **K-Nearest Neighbors (KNN)** 73 | 3. **Logistic Regression** 74 | 4. **Multilayer Perceptron (MLP)** 75 | 5. **Naive Bayes** 76 | 6. **Nearest Centroid** 77 | 7. **Perceptron** 78 | 8. **Random Forest** 79 | 9. **Support Vector Machine (SVM)** 80 | 81 | Each model is optimized based on its hyperparameter grid. 82 | 83 | --- 84 | 85 | ## Model Hyperparameters 86 | 87 | | Model | Hyperparameters | 88 | |------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| 89 | | **Decision Tree** | `criterion`: ['gini', 'entropy'], `max_depth`: [None, 10, 20, 30, 40], `min_samples_split`: [2, 5, 10], `min_samples_leaf`: [1, 2, 4] | 90 | | **K-Nearest Neighbors** | `n_neighbors`: [1, 3, 5, 7, 9, 11, 13], `weights`: ['uniform', 'distance'], `metric`: ['euclidean', 'manhattan', 'minkowski'] | 91 | | **Logistic Regression** | `penalty`: ['l1', 'l2'], `C`: [0.01, 0.1, 1, 10, 100], `solver`: ['liblinear', 'saga'], `max_iter`: [100, 200, 300, 500] | 92 | | **MLP** | `hidden_layer_sizes`: [(50,), (100,), (50, 50), (100, 100)], `activation`: ['tanh', 'relu'], `solver`: ['sgd', 'adam'], `alpha`: [0.0001, 0.001, 0.01], `max_iter`: [100, 200, 300, 400, 500] | 93 | | **Naive Bayes** | `var_smoothing`: [1e-9, 1e-8, 1e-7, 1e-6, 1e-5] | 94 | | **Nearest Centroid** | `metric`: ['euclidean', 'manhattan'], `shrink_threshold`: [None, 0.1, 0.2, 0.5, 0.7, 0.8] | 95 | | **Perceptron** | `penalty`: ['l1', 'l2', 'elasticnet'], `alpha`: [0.0001, 0.001, 0.01, 0.1, 1], `max_iter`: [1000, 2000, 3000] | 96 | | **Random Forest** | `n_estimators`: [50, 100, 200], `criterion`: ['gini', 'entropy'], `max_depth`: [None, 10, 20, 30], `min_samples_split`: [2, 5, 10], `min_samples_leaf`: [1, 2, 4] | 97 | | **SVM** | `C`: [0.1, 1, 10, 100, 1000], `kernel`: ['rbf'], `gamma`: [0.001, 0.01, 0.1, 1] | 98 | 99 | --- 100 | 101 | ## Requirements 102 | 103 | - Python 3.x 104 | - pandas 105 | - scikit-learn 106 | - numpy 107 | - matplotlib (for plotting and visualization) 108 | 109 | Here’s the updated **Setup** section with the commands: 110 | 111 | --- 112 | 113 | ## Setup 114 | 115 | 1. **Clone the repository:** 116 | 117 | ```bash 118 | git clone https://github.com/parvvaresh/Classification-of-satellite-images.git 119 | cd your-repository-name 120 | ``` 121 | 122 | 2. **Install dependencies:** 123 | 124 | First, create a virtual environment (optional but recommended): 125 | 126 | ```bash 127 | python -m venv venv 128 | source venv/bin/activate # On Windows: venv\Scripts\activate 129 | ``` 130 | 131 | Then, install the required Python libraries: 132 | 133 | ```bash 134 | pip install -r requirements.txt 135 | ``` 136 | 137 | 138 | 139 | 140 | 141 | ## Usage 142 | 143 | The pipeline is designed to handle satellite data, perform preprocessing, apply dimensionality reduction techniques, and train various models with optimized hyperparameters. 144 | 145 | To use the pipeline, follow the steps below: 146 | 147 | 1. Prepare your input data as a CSV file with the necessary features and target column. 148 | 2. Modify the input data and parameters in the respective scripts to suit your specific agricultural classification problem. 149 | 150 | --- 151 | 152 | ## Example Usage 153 | 154 | ```python 155 | import pandas as pd 156 | from pre_process import pre_process 157 | from train_models import train_models 158 | 159 | def classification(df: pd.DataFrame, 160 | class_column: str, 161 | path: str, 162 | name: str) -> None: 163 | """ 164 | This function takes a DataFrame, preprocesses the data, 165 | and trains models on the processed data. 166 | 167 | Parameters: 168 | - df: pandas DataFrame containing the data to be classified. 169 | - class_column: The column name containing the target variable. 170 | - path: The path where the trained model and results will be saved. 171 | - name: The name used to save the model and results. 172 | """ 173 | # Preprocess the data to separate features and target variable 174 | x_data, y = pre_process(df, class_column) 175 | 176 | # Train models using the processed data 177 | train_models(x_data, y, path, name) 178 | 179 | # Example usage 180 | path_csv = "/data.csv" # Path to your dataset 181 | df = pd.read_csv(path_csv) # Read the CSV into a DataFrame 182 | 183 | # Call the classification function 184 | classification(df, "ClassColumn", "/home/reza/data_test", "data_test") 185 | ``` 186 | 187 | ### Explanation: 188 | - `pre_process(df, class_column)` processes the data, separating the features (`x_data`) and target variable (`y`). 189 | - `train_models(x_data, y, path, name)` trains machine learning models and saves the trained models to the specified path (`path`) using the provided name (`name`). 190 | 191 | ### Output: 192 | After training, the results will be saved into a CSV file containing the following information: 193 | - **Method**: The data standardization and dimensionality reduction method used. 194 | - **Model name**: The name of the model. 195 | - **Best hyperparameters**: The best hyperparameters found during grid search. 196 | - **Train accuracy**: Accuracy on the training dataset. 197 | - **Test accuracy**: Accuracy on the test dataset. 198 | - **Precision**, **Recall**, **F1 Score**, **Kappa**: Metrics for model evaluation. 199 | - **Confusion Matrix path**: Path to the confusion matrix plot. 200 | - **Runtime**: The time taken to train the model. 201 | - **Best model**: The best model with its parameters. 202 | 203 | --- 204 | 205 | ### Sample Result Entry: 206 | 207 | | method | model | best_params | train_accuracy | test_accuracy | precision | recall | f1_score | kappa | confusion_matrix_path | runtime | best_model | 208 | |---------------------|------------------------|--------------------------------------------------------------------------------------------------|----------------|----------------|-----------|--------|----------|-------|------------------------|---------|----------------------------------------------| 209 | | original-original | KNeighborsClassifier | `{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}` | 1.0 | 0.925 | 0.909 | 0.925 | 0.913 | 0.903 | path_to_matrix.png | 2.43 | KNeighborsClassifier(metric='euclidean', n_neighbors=1) | 210 | 211 | --- 212 | 213 | -------------------------------------------------------------------------------- /assets/result.csv: -------------------------------------------------------------------------------- 1 | ,method,model,best_params,train_accuracy,test_accuracy,precision,recall,f1_score,kappa,confusion_matrix,runtime,best model 2 | 0,orginal-original,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.925,0.9099206349206348,0.925,0.9134917043740574,0.9031476997578692,"[ 8 0 0 0 0 0 0 7 1 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 3 | 0 0 1 0 0 0 1 0 0 0 0 7]",2.436521053314209,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)" 4 | 1,orginal-original,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 40, 'min_samples_leaf': 1, 'min_samples_split': 2}",1.0,0.85,0.8743006993006993,0.85,0.8462672064777328,0.8073836276083467,"[ 8 0 0 0 0 0 0 0 5 1 0 0 0 2 0 0 11 0 1 0 0 0 0 0 5 | 2 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 6 | 8]",3.389958620071411,"DecisionTreeClassifier(criterion='entropy', max_depth=40)" 7 | 2,orginal-pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.95,0.9297008547008547,0.95,0.9379019607843138,0.9356395816572808,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 8 | 0 0 1 0 0 0 1 0 0 0 0 7]",0.22979021072387695,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)" 9 | 3,orginal-pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 40, 'min_samples_leaf': 2, 'min_samples_split': 10}",0.89937106918239,0.825,0.8111111111111111,0.825,0.8144117647058824,0.7745571658615137,"[ 6 0 0 0 0 2 0 8 0 0 0 0 0 1 11 0 0 0 0 0 1 2 0 0 10 | 0 0 0 0 0 1 2 0 0 0 0 6]",0.6804001331329346,"DecisionTreeClassifier(criterion='entropy', max_depth=40, min_samples_leaf=2, 11 | min_samples_split=10)" 12 | 4,orginal-lda,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 13 | 0 0 0 0 1 0 0 0 0 0 0 8]",0.11695146560668945,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)" 14 | 5,orginal-lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 10}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 15 | 0 0 0 0 1 0 0 0 0 0 0 8]",0.188154935836792,"DecisionTreeClassifier(criterion='entropy', max_depth=30, min_samples_split=10)" 16 | 6,orginal-split pca,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}",0.6666666666666666,0.625,0.6489010989010989,0.625,0.6327002801120448,0.5230524642289349,"[6 1 0 0 1 0 0 0 5 2 1 0 0 0 0 0 8 0 0 1 3 0 0 1 2 0 0 0 0 0 0 0 0 0 0 1 0 17 | 0 0 0 0 0 2 0 2 0 0 0 4]",0.11127662658691406,"KNeighborsClassifier(metric='euclidean', n_neighbors=7)" 18 | 7,orginal-split pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 30, 'min_samples_leaf': 4, 'min_samples_split': 2}",0.7735849056603774,0.55,0.6736904761904762,0.55,0.5898268398268398,0.4533029612756265,"[6 1 0 0 1 0 0 0 5 1 2 0 0 0 0 0 7 0 0 4 1 0 0 1 2 0 0 0 0 0 0 0 0 0 0 1 0 19 | 0 0 0 0 0 1 1 1 0 0 3 2]",0.1759507656097412,"DecisionTreeClassifier(criterion='entropy', max_depth=30, min_samples_leaf=4)" 20 | 8,orginal-split lda,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.875,0.8535714285714284,0.875,0.861923076923077,0.8385794995964487,"[ 8 0 0 0 0 0 0 7 0 1 0 0 0 0 12 0 0 0 0 1 0 2 0 0 21 | 0 0 0 0 0 1 0 0 2 0 0 6]",0.10943460464477539,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)" 22 | 9,orginal-split lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10}",0.9245283018867925,0.875,0.9199999999999999,0.875,0.889345238095238,0.8427672955974843,"[ 7 0 0 0 1 0 0 0 6 0 2 0 0 0 0 0 11 0 0 0 1 0 0 0 23 | 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 24 | 7]",0.18211960792541504,"DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_leaf=4, 25 | min_samples_split=10)" 26 | 10,standard_scaled-original,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.975,0.951923076923077,0.975,0.9630000000000001,0.9678197908286403,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 27 | 0 0 1 0 0 0 0 0 0 0 0 8]",0.5977652072906494,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)" 28 | 11,standard_scaled-original,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 5}",0.9937106918238994,0.85,0.840967365967366,0.85,0.8352781954887218,0.8061389337641357,"[ 8 0 0 0 0 0 0 5 1 0 0 2 0 1 11 0 0 0 0 0 0 2 0 1 29 | 0 0 1 0 0 0 0 0 0 0 0 8]",3.612668752670288,"DecisionTreeClassifier(criterion='entropy', max_depth=20, min_samples_split=5)" 30 | 12,standard_scaled-pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}",1.0,0.95,0.93125,0.95,0.9392857142857143,0.9361022364217253,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 31 | 0 0 0 0 0 1 0 0 0 1 0 7]",0.27402448654174805,"KNeighborsClassifier(metric='manhattan', n_neighbors=3, weights='distance')" 32 | 13,standard_scaled-pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 10}",0.9245283018867925,0.8,0.8541666666666666,0.8,0.8071794871794872,0.7446129289704708,"[ 7 0 0 0 1 0 0 0 5 2 1 0 0 0 0 0 12 0 0 0 0 0 0 0 33 | 2 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 2 0 0 1 34 | 5]",0.7710146903991699,"DecisionTreeClassifier(criterion='entropy', min_samples_leaf=2, 35 | min_samples_split=10)" 36 | 14,standard_scaled-lda,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 37 | 0 0 0 0 1 0 0 0 0 0 0 8]",0.11973190307617188,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)" 38 | 15,standard_scaled-lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 40, 'min_samples_leaf': 1, 'min_samples_split': 2}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 39 | 0 0 0 0 1 0 0 0 0 0 0 8]",0.19898748397827148,"DecisionTreeClassifier(criterion='entropy', max_depth=40)" 40 | 16,standard_scaled-split pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 13, 'weights': 'uniform'}",0.7295597484276729,0.7,0.7158882783882783,0.7,0.7021288515406162,0.6175298804780877,"[ 6 0 0 0 0 2 0 6 0 2 0 0 0 0 10 0 1 1 0 0 1 2 0 0 41 | 1 0 0 0 0 0 2 0 2 0 0 4]",0.12825870513916016,"KNeighborsClassifier(metric='manhattan', n_neighbors=13)" 42 | 17,standard_scaled-split pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 2}",0.7987421383647799,0.575,0.6310389610389611,0.575,0.589945652173913,0.47571318427139553,"[5 1 0 0 1 0 1 0 5 1 2 0 0 0 0 0 8 0 0 3 1 0 0 0 3 0 0 0 0 0 0 0 0 0 0 1 0 43 | 0 0 0 0 0 2 1 2 0 0 1 2]",0.22144865989685059,"DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_leaf=4)" 44 | 18,standard_scaled-split lda,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.875,0.8535714285714284,0.875,0.861923076923077,0.8385794995964487,"[ 8 0 0 0 0 0 0 7 0 1 0 0 0 0 12 0 0 0 0 1 0 2 0 0 45 | 0 0 0 0 0 1 0 0 2 0 0 6]",0.11701035499572754,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)" 46 | 19,standard_scaled-split lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 20, 'min_samples_leaf': 4, 'min_samples_split': 10}",0.9245283018867925,0.875,0.9199999999999999,0.875,0.889345238095238,0.8427672955974843,"[ 7 0 0 0 1 0 0 0 6 0 2 0 0 0 0 0 11 0 0 0 1 0 0 0 47 | 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 48 | 7]",0.1955115795135498,"DecisionTreeClassifier(criterion='entropy', max_depth=20, min_samples_leaf=4, 49 | min_samples_split=10)" 50 | 20,minmax_scaled-original,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}",1.0,0.95,0.9297008547008547,0.95,0.9379019607843138,0.9356395816572808,"[ 8 0 0 0 0 0 0 7 1 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 51 | 0 0 0 0 0 1 0 0 0 0 0 8]",0.6892731189727783,"KNeighborsClassifier(metric='manhattan', n_neighbors=3, weights='distance')" 52 | 21,minmax_scaled-original,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5}",0.9748427672955975,0.825,0.8366666666666667,0.825,0.8223015873015873,0.7738287560581584,"[ 8 0 0 0 0 0 0 0 5 3 0 0 0 0 0 1 11 0 0 0 0 0 0 0 53 | 2 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 54 | 7]",3.1425938606262207,"DecisionTreeClassifier(criterion='entropy', min_samples_leaf=2, 55 | min_samples_split=5)" 56 | 22,minmax_scaled-pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}",1.0,0.95,0.93125,0.95,0.9392857142857143,0.9361022364217253,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 57 | 0 0 0 0 0 1 0 0 0 1 0 7]",0.2655339241027832,"KNeighborsClassifier(metric='manhattan', n_neighbors=3, weights='distance')" 58 | 23,minmax_scaled-pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 40, 'min_samples_leaf': 2, 'min_samples_split': 5}",0.9685534591194969,0.775,0.7986263736263737,0.775,0.785,0.7140587768069897,"[ 7 0 0 0 1 0 0 0 5 1 2 0 0 0 0 1 10 0 1 0 0 0 0 2 59 | 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 60 | 8]",0.5482699871063232,"DecisionTreeClassifier(criterion='entropy', max_depth=40, min_samples_leaf=2, 61 | min_samples_split=5)" 62 | 24,minmax_scaled-lda,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 63 | 0 0 0 0 1 0 0 0 0 0 0 8]",0.09939718246459961,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)" 64 | 25,minmax_scaled-lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 65 | 0 0 0 0 1 0 0 0 0 0 0 8]",0.15778470039367676,"DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_leaf=2)" 66 | 26,minmax_scaled-split pca,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 11, 'weights': 'uniform'}",0.7735849056603774,0.7,0.7283333333333333,0.7,0.7012726244343892,0.620253164556962,"[ 6 0 0 0 0 2 0 5 0 2 0 1 0 0 10 0 1 1 0 0 0 3 0 0 67 | 1 0 0 0 0 0 2 0 2 0 0 4]",0.09162116050720215,"KNeighborsClassifier(metric='euclidean', n_neighbors=11)" 68 | 27,minmax_scaled-split pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10}",0.8238993710691824,0.625,0.659047619047619,0.625,0.6393582887700535,0.5297805642633229,[6 0 0 0 0 2 0 6 1 1 0 0 0 0 8 0 2 2 0 1 0 2 0 0 1 0 0 0 0 0 2 1 1 0 1 3],0.14446592330932617,"DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_leaf=4, 69 | min_samples_split=10)" 70 | 28,minmax_scaled-split lda,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.875,0.8535714285714284,0.875,0.861923076923077,0.8385794995964487,"[ 8 0 0 0 0 0 0 7 0 1 0 0 0 0 12 0 0 0 0 1 0 2 0 0 71 | 0 0 0 0 0 1 0 0 2 0 0 6]",0.09004521369934082,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)" 72 | 29,minmax_scaled-split lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 40, 'min_samples_leaf': 4, 'min_samples_split': 2}",0.9245283018867925,0.9,0.9199999999999999,0.9,0.9026785714285716,0.8734177215189873,"[ 8 0 0 0 0 0 0 6 0 2 0 0 0 0 11 0 0 1 0 0 0 3 0 0 73 | 0 0 0 0 1 0 0 0 1 0 0 7]",0.16330218315124512,"DecisionTreeClassifier(criterion='entropy', max_depth=40, min_samples_leaf=4)" 74 | 30,maxabs_scaled-original,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.9,0.89625,0.9,0.8890476190476191,0.8709677419354839,"[ 8 0 0 0 0 0 0 6 1 1 0 0 0 0 12 0 0 0 0 0 0 3 0 0 75 | 0 0 1 0 0 0 0 0 1 0 0 7]",0.5694336891174316,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)" 76 | 31,maxabs_scaled-original,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 20, 'min_samples_leaf': 2, 'min_samples_split': 5}",0.9748427672955975,0.8,0.8366666666666667,0.8,0.8089682539682539,0.7431781701444622,"[ 7 0 0 0 1 0 0 0 5 3 0 0 0 0 0 1 11 0 0 0 0 0 0 0 77 | 2 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 78 | 7]",3.2607643604278564,"DecisionTreeClassifier(criterion='entropy', max_depth=20, min_samples_leaf=2, 79 | min_samples_split=5)" 80 | 32,maxabs_scaled-pca,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.925,0.908173076923077,0.925,0.9139523809523811,0.9038461538461539,"[ 8 0 0 0 0 0 0 7 0 1 0 0 0 0 12 0 0 0 0 0 0 3 0 0 81 | 0 1 0 0 0 0 0 0 1 0 0 7]",0.2538721561431885,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)" 82 | 33,maxabs_scaled-pca,DecisionTreeClassifier,"{'criterion': 'gini', 'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 5}",0.9685534591194969,0.675,0.7726190476190476,0.675,0.6906759906759906,0.5950155763239875,"[ 8 0 0 0 0 0 0 0 2 2 2 2 0 0 0 1 10 1 0 0 0 0 0 1 83 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 2 84 | 5]",0.5452220439910889,"DecisionTreeClassifier(max_depth=20, min_samples_split=5)" 85 | 34,maxabs_scaled-lda,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 86 | 0 0 0 0 1 0 0 0 0 0 0 8]",0.09782814979553223,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)" 87 | 35,maxabs_scaled-lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 10}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 88 | 0 0 0 0 1 0 0 0 0 0 0 8]",0.15465188026428223,"DecisionTreeClassifier(criterion='entropy', min_samples_split=10)" 89 | 36,maxabs_scaled-split pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 13, 'weights': 'uniform'}",0.7547169811320755,0.7,0.7091025641025641,0.7,0.6912726244343892,0.6169193934557063,"[ 6 0 0 0 0 2 0 5 1 2 0 0 0 0 10 0 0 2 0 0 0 3 0 0 90 | 1 0 0 0 0 0 2 0 2 0 0 4]",0.08480620384216309,"KNeighborsClassifier(metric='manhattan', n_neighbors=13)" 91 | 37,maxabs_scaled-split pca,DecisionTreeClassifier,"{'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10}",0.8238993710691824,0.65,0.6654761904761904,0.65,0.6533882783882784,0.5534290271132376,"[ 5 0 1 0 1 0 1 0 5 2 1 0 0 0 0 0 10 0 0 1 1 0 1 0 92 | 2 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 2 1 1 0 0 0 93 | 4]",0.15716028213500977,"DecisionTreeClassifier(max_depth=10, min_samples_leaf=4, min_samples_split=10)" 94 | 38,maxabs_scaled-split lda,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.875,0.8535714285714284,0.875,0.861923076923077,0.8385794995964487,"[ 8 0 0 0 0 0 0 7 0 1 0 0 0 0 12 0 0 0 0 1 0 2 0 0 95 | 0 0 0 0 0 1 0 0 2 0 0 6]",0.09535479545593262,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)" 96 | 39,maxabs_scaled-split lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 2}",0.9245283018867925,0.875,0.9199999999999999,0.875,0.889345238095238,0.8427672955974843,"[ 7 0 0 0 1 0 0 0 6 0 2 0 0 0 0 0 11 0 0 0 1 0 0 0 97 | 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 98 | 7]",0.1549670696258545,"DecisionTreeClassifier(criterion='entropy', min_samples_leaf=4)" 99 | 40,robust_scaled-original,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.975,0.951923076923077,0.975,0.9630000000000001,0.9678197908286403,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 100 | 0 0 1 0 0 0 0 0 0 0 0 8]",0.6036934852600098,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)" 101 | 41,robust_scaled-original,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 2}",1.0,0.85,0.9,0.85,0.8576556776556776,0.8067632850241546,"[ 7 0 0 0 1 0 0 0 5 3 0 0 0 0 0 0 12 0 0 0 0 0 0 0 102 | 2 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 103 | 8]",3.1773626804351807,"DecisionTreeClassifier(criterion='entropy', max_depth=30)" 104 | 42,robust_scaled-pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.95,0.9321428571428572,0.95,0.9385897435897436,0.9354317998385795,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 105 | 0 0 1 0 0 0 0 0 1 0 0 7]",0.13072633743286133,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)" 106 | 43,robust_scaled-pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 20, 'min_samples_leaf': 2, 'min_samples_split': 5}",0.949685534591195,0.825,0.875,0.825,0.8324175824175825,0.7752808988764045,"[ 7 0 0 0 1 0 0 0 6 1 1 0 0 0 0 0 12 0 0 0 0 0 0 1 107 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 2 0 0 0 108 | 5]",0.6361377239227295,"DecisionTreeClassifier(criterion='entropy', max_depth=20, min_samples_leaf=2, 109 | min_samples_split=5)" 110 | 44,robust_scaled-lda,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 111 | 0 0 0 0 1 0 0 0 0 0 0 8]",0.09947061538696289,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)" 112 | 45,robust_scaled-lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 113 | 0 0 0 0 1 0 0 0 0 0 0 8]",0.15237903594970703,"DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=5)" 114 | 46,robust_scaled-split pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 9, 'weights': 'uniform'}",0.6981132075471698,0.7,0.7063644688644689,0.7,0.6983193277310925,0.6175298804780877,"[ 6 1 0 0 0 1 0 6 0 2 0 0 0 0 10 0 1 1 0 0 1 2 0 0 115 | 1 0 0 0 0 0 2 0 2 0 0 4]",0.0860593318939209,"KNeighborsClassifier(metric='manhattan', n_neighbors=9)" 116 | 47,robust_scaled-split pca,DecisionTreeClassifier,"{'criterion': 'gini', 'max_depth': 20, 'min_samples_leaf': 4, 'min_samples_split': 5}",0.7861635220125787,0.55,0.5792582417582418,0.55,0.5605714285714286,0.4330708661417323,"[4 1 0 1 1 0 1 0 5 2 1 0 0 0 0 0 8 0 0 2 2 0 0 1 2 0 0 0 0 0 0 0 0 0 0 1 0 117 | 0 0 0 0 0 2 1 2 0 0 0 3]",0.15172386169433594,"DecisionTreeClassifier(max_depth=20, min_samples_leaf=4, min_samples_split=5)" 118 | 48,robust_scaled-split lda,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.875,0.8535714285714284,0.875,0.861923076923077,0.8385794995964487,"[ 8 0 0 0 0 0 0 7 0 1 0 0 0 0 12 0 0 0 0 1 0 2 0 0 119 | 0 0 0 0 0 1 0 0 2 0 0 6]",0.09231424331665039,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)" 120 | 49,robust_scaled-split lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 5}",0.9245283018867925,0.875,0.9199999999999999,0.875,0.889345238095238,0.8427672955974843,"[ 7 0 0 0 1 0 0 0 6 0 2 0 0 0 0 0 11 0 0 0 1 0 0 0 121 | 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 122 | 7]",0.16288161277770996,"DecisionTreeClassifier(criterion='entropy', min_samples_leaf=4, 123 | min_samples_split=5)" 124 | 50,normalized-original,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.9,0.8927777777777777,0.9,0.8879971988795518,0.8704453441295547,"[ 8 0 0 0 0 0 0 7 1 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 125 | 0 0 1 0 0 0 1 0 1 0 0 6]",0.6826939582824707,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)" 126 | 51,normalized-original,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 40, 'min_samples_leaf': 2, 'min_samples_split': 2}",0.9622641509433962,0.85,0.8616666666666667,0.85,0.848968253968254,0.8067632850241546,"[ 8 0 0 0 0 0 0 5 3 0 0 0 0 1 11 0 0 0 0 0 0 3 0 0 127 | 0 0 1 0 0 0 0 0 0 0 1 7]",2.462327241897583,"DecisionTreeClassifier(criterion='entropy', max_depth=40, min_samples_leaf=2)" 128 | 52,normalized-pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.9,0.8927777777777777,0.9,0.8879971988795518,0.8704453441295547,"[ 8 0 0 0 0 0 0 7 1 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 129 | 0 0 1 0 0 0 1 0 1 0 0 6]",0.12588214874267578,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)" 130 | 53,normalized-pca,DecisionTreeClassifier,"{'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 5}",0.9182389937106918,0.8,0.8333333333333333,0.8,0.7824102368220015,0.7415185783521809,"[ 6 0 2 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 131 | 0 0 1 0 0 0 3 0 1 0 1 3]",0.46621108055114746,"DecisionTreeClassifier(max_depth=10, min_samples_leaf=4, min_samples_split=5)" 132 | 54,normalized-lda,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 133 | 0 0 0 0 1 0 0 0 0 0 0 8]",0.09972310066223145,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)" 134 | 55,normalized-lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5}",1.0,0.95,0.96,0.95,0.9492063492063492,0.9362041467304625,"[ 8 0 0 0 0 0 0 8 0 0 0 0 0 0 12 0 0 0 0 0 0 3 0 0 135 | 0 0 0 0 1 0 2 0 0 0 0 6]",0.14361000061035156,"DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=5)" 136 | 56,normalized-split pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 11, 'weights': 'uniform'}",0.6792452830188679,0.7,0.7159722222222222,0.7,0.6861344537815126,0.6141479099678457,"[ 7 0 0 0 1 0 0 0 5 1 2 0 0 0 0 0 11 0 0 0 1 0 0 1 137 | 2 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 3 0 0 0 138 | 3]",0.09440398216247559,"KNeighborsClassifier(metric='manhattan', n_neighbors=11)" 139 | 57,normalized-split pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 10}",0.7735849056603774,0.55,0.5639682539682539,0.55,0.5500188536953242,0.4339622641509434,"[5 1 0 1 1 0 0 0 5 1 2 0 0 0 0 0 8 0 0 1 3 0 0 1 2 0 0 0 0 0 0 0 0 0 0 1 0 140 | 0 0 0 0 0 1 3 2 0 0 0 2]",0.1540205478668213,"DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=10)" 141 | 58,normalized-split lda,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.825,0.8419642857142857,0.825,0.8219780219780219,0.7763578274760383,"[ 8 0 0 0 0 0 0 0 5 1 1 1 0 0 0 0 11 0 0 0 1 0 0 0 142 | 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 2 0 0 0 143 | 6]",0.0857245922088623,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)" 144 | 59,normalized-split lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5}",0.9748427672955975,0.875,0.8904545454545454,0.875,0.8755422647527912,0.8415213946117274,[8 0 0 0 0 0 0 7 0 1 0 0 0 0 9 0 0 3 0 0 1 2 0 0 0 0 0 0 1 0 0 0 0 0 0 8],0.16163039207458496,"DecisionTreeClassifier(criterion='entropy', min_samples_split=5)" 145 | --------------------------------------------------------------------------------