├── requirements.txt
├── __pycache__
    ├── pre_process.cpython-311.pyc
    ├── pre_process.cpython-313.pyc
    ├── train_models.cpython-311.pyc
    └── train_models.cpython-313.pyc
├── Dockerfile
├── _classification
    ├── __pycache__
    │   ├── utils.cpython-311.pyc
    │   ├── utils.cpython-313.pyc
    │   ├── parameter_finder.cpython-311.pyc
    │   └── parameter_finder.cpython-313.pyc
    ├── models
    │   ├── __pycache__
    │   │   ├── knn.cpython-311.pyc
    │   │   ├── knn.cpython-313.pyc
    │   │   ├── mlp.cpython-311.pyc
    │   │   ├── mlp.cpython-313.pyc
    │   │   ├── svm.cpython-311.pyc
    │   │   ├── svm.cpython-313.pyc
    │   │   ├── models.cpython-311.pyc
    │   │   ├── models.cpython-313.pyc
    │   │   ├── naive_bayes.cpython-311.pyc
    │   │   ├── naive_bayes.cpython-313.pyc
    │   │   ├── perceptron.cpython-311.pyc
    │   │   ├── perceptron.cpython-313.pyc
    │   │   ├── decision_tree.cpython-311.pyc
    │   │   ├── decision_tree.cpython-313.pyc
    │   │   ├── random_forest.cpython-311.pyc
    │   │   ├── random_forest.cpython-313.pyc
    │   │   ├── NearestCentroid.cpython-311.pyc
    │   │   ├── NearestCentroid.cpython-313.pyc
    │   │   ├── logstic_regression.cpython-311.pyc
    │   │   └── logstic_regression.cpython-313.pyc
    │   ├── naive_bayes.py
    │   ├── svm.py
    │   ├── knn.py
    │   ├── NearestCentroid.py
    │   ├── logstic_regression.py
    │   ├── decision_tree.py
    │   ├── random_forest.py
    │   ├── perceptron.py
    │   ├── mlp.py
    │   └── models.py
    ├── pre_process
    │   ├── __pycache__
    │   │   ├── standardize.cpython-311.pyc
    │   │   └── standardize.cpython-313.pyc
    │   ├── data
    │   │   ├── __pycache__
    │   │   │   ├── parser.cpython-311.pyc
    │   │   │   ├── parser.cpython-312.pyc
    │   │   │   └── parser.cpython-313.pyc
    │   │   ├── spectrums.json
    │   │   └── parser.py
    │   ├── dimensionality_reduction
    │   │   ├── __pycache__
    │   │   │   ├── lda.cpython-311.pyc
    │   │   │   ├── lda.cpython-313.pyc
    │   │   │   ├── pca.cpython-311.pyc
    │   │   │   └── pca.cpython-313.pyc
    │   │   ├── pca.py
    │   │   └── lda.py
    │   └── standardize.py
    ├── utils.py
    └── parameter_finder.py
├── classification.py
├── pre_process.py
├── train_models.py
├── README.md
└── assets
    └── result.csv


/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit-learn
2 | pandas
3 | numpy
4 | 


--------------------------------------------------------------------------------
/__pycache__/pre_process.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/__pycache__/pre_process.cpython-311.pyc


--------------------------------------------------------------------------------
/__pycache__/pre_process.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/__pycache__/pre_process.cpython-313.pyc


--------------------------------------------------------------------------------
/__pycache__/train_models.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/__pycache__/train_models.cpython-311.pyc


--------------------------------------------------------------------------------
/__pycache__/train_models.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/__pycache__/train_models.cpython-313.pyc


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.11
2 | WORKDIR /classification
3 | COPY . .
4 | RUN pip install --no-cache-dir -r requirements.txt
5 | CMD ["python", "classification.py"]
6 | 


--------------------------------------------------------------------------------
/_classification/__pycache__/utils.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/__pycache__/utils.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/__pycache__/utils.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/__pycache__/utils.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/knn.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/knn.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/knn.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/knn.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/mlp.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/mlp.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/mlp.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/mlp.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/svm.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/svm.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/svm.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/svm.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/models.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/models.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/models.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/models.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/__pycache__/parameter_finder.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/__pycache__/parameter_finder.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/__pycache__/parameter_finder.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/__pycache__/parameter_finder.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/naive_bayes.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/naive_bayes.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/naive_bayes.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/naive_bayes.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/perceptron.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/perceptron.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/perceptron.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/perceptron.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/decision_tree.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/decision_tree.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/decision_tree.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/decision_tree.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/random_forest.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/random_forest.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/random_forest.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/random_forest.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/NearestCentroid.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/NearestCentroid.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/NearestCentroid.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/NearestCentroid.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/pre_process/__pycache__/standardize.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/__pycache__/standardize.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/pre_process/__pycache__/standardize.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/__pycache__/standardize.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/pre_process/data/__pycache__/parser.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/data/__pycache__/parser.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/pre_process/data/__pycache__/parser.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/data/__pycache__/parser.cpython-312.pyc


--------------------------------------------------------------------------------
/_classification/pre_process/data/__pycache__/parser.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/data/__pycache__/parser.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/logstic_regression.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/logstic_regression.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/models/__pycache__/logstic_regression.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/models/__pycache__/logstic_regression.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/pre_process/data/spectrums.json:
--------------------------------------------------------------------------------
1 | {
2 |   "s1": ["VH", "VV", "HH", "VH_1", "VV_1"],
3 |   "s2" : ["B1", "B2", "B3", "B4", "B5", "B6", "B11", "B12", "B13", "B14", "B15", "B16", "NVDI", "EVI", "SAVI"]
4 | }
5 | 


--------------------------------------------------------------------------------
/_classification/pre_process/dimensionality_reduction/__pycache__/lda.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/dimensionality_reduction/__pycache__/lda.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/pre_process/dimensionality_reduction/__pycache__/lda.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/dimensionality_reduction/__pycache__/lda.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/pre_process/dimensionality_reduction/__pycache__/pca.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/dimensionality_reduction/__pycache__/pca.cpython-311.pyc


--------------------------------------------------------------------------------
/_classification/pre_process/dimensionality_reduction/__pycache__/pca.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parvvaresh/agricultural-products-classification/HEAD/_classification/pre_process/dimensionality_reduction/__pycache__/pca.cpython-313.pyc


--------------------------------------------------------------------------------
/_classification/pre_process/data/parser.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | def get_path_script():
 5 |     return os.path.dirname(os.path.abspath(__file__))
 6 | 
 7 | 
 8 | def get_spectrums():
 9 |     spectrums_file = os.path.join(get_path_script(), "spectrums.json")
10 | 
11 |     with open(spectrums_file, "r") as json_file:
12 |         spectrums = json.load(json_file)
13 |     return spectrums


--------------------------------------------------------------------------------
/classification.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from pre_process import pre_process
 3 | from train_models import train_models
 4 | 
 5 | 
 6 | def classification(df : pd.DataFrame,
 7 |                     class_column : str,
 8 |                     path : str,
 9 |                     name : str) -> None:
10 |     
11 |     x_data , y = pre_process(df, class_column)
12 | 
13 |     train_models(x_data, y, path , name)
14 | 
15 | 
16 | 
17 | 
18 | path_csv = "/home/reza/hamedan_seifi.csv"
19 | df = pd.read_csv(path_csv)
20 | 
21 | def binary(lable, traget):
22 |     if lable != traget:
23 |         return "other"
24 |     return lable
25 | 
26 | df["Name"] = df["Name"].apply(lambda x : binary(x, "wi-wr-br-bi"))
27 | 
28 | df = df.sample(100)
29 | classification(df , "Name", "/home/reza/hamedan_seifi", "hamedan_seifi")


--------------------------------------------------------------------------------
/_classification/models/naive_bayes.py:
--------------------------------------------------------------------------------
 1 | from sklearn.naive_bayes import GaussianNB
 2 | 
 3 | # ******** Get Naive Bayes Function ********
 4 | def get_nb():
 5 |     """
 6 |     Initializes a Gaussian Naive Bayes classifier and provides a parameter grid for hyperparameter tuning.
 7 | 
 8 |     Returns:
 9 |         tuple:
10 |             - GaussianNB object: An instance of the Gaussian Naive Bayes classifier.
11 |             - dict: A dictionary containing hyperparameter options for tuning.
12 |     """
13 | 
14 |     # Define the hyperparameter grid for Gaussian Naive Bayes
15 |     param_naive_bayes = {
16 |         'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5]
17 |         # 'var_smoothing': A smoothing parameter added to the variance to prevent zero probabilities
18 |         # and handle numerical stability issues. Values closer to 1e-9 are typical defaults.
19 |     }
20 | 
21 |     # Initialize a GaussianNB object
22 |     naive_bayes = GaussianNB()
23 | 
24 |     # Return the classifier and the hyperparameter grid
25 |     return naive_bayes, param_naive_bayes
26 | 


--------------------------------------------------------------------------------
/_classification/models/svm.py:
--------------------------------------------------------------------------------
 1 | from sklearn.svm import SVC
 2 | 
 3 | # ******** Get Support Vector Machine (SVM) Function ********
 4 | def get_svm():
 5 |     """
 6 |     Initializes a Support Vector Classifier (SVC) and provides a parameter grid for hyperparameter tuning.
 7 | 
 8 |     Returns:
 9 |         tuple:
10 |             - SVC object: An instance of the Support Vector Classifier.
11 |             - dict: A dictionary containing hyperparameter options for tuning.
12 |     """
13 | 
14 |     # Define the hyperparameter grid for the SVC (Support Vector Classifier)
15 |     param_svm = {
16 |         'C': [0.1, 1, 10, 100, 1000],  # Regularization parameter (larger values mean less regularization)
17 |         'kernel': ['rbf'],  # The kernel type to be used in the algorithm (radial basis function kernel)
18 |         'gamma': [0.001, 0.01, 0.1, 1],  # Kernel coefficient for 'rbf' kernel
19 |     }
20 | 
21 |     # Initialize a Support Vector Classifier object
22 |     svm_classifier = SVC()
23 | 
24 |     # Return the classifier and the hyperparameter grid
25 |     return svm_classifier, param_svm
26 | 


--------------------------------------------------------------------------------
/_classification/models/knn.py:
--------------------------------------------------------------------------------
 1 | from sklearn.neighbors import KNeighborsClassifier
 2 | 
 3 | # ******** Get K-Nearest Neighbors Function ********
 4 | def get_knn():
 5 |     """
 6 |     Initializes a K-Nearest Neighbors (KNN) classifier and provides a parameter grid for hyperparameter tuning.
 7 | 
 8 |     Returns:
 9 |         tuple: 
10 |             - KNeighborsClassifier object: An instance of the K-Nearest Neighbors Classifier.
11 |             - dict: A dictionary containing hyperparameter options for tuning.
12 |     """
13 | 
14 |     # Define the hyperparameter grid for the KNN classifier
15 |     param_knn = {
16 |         'n_neighbors': list(range(1, 15, 2)),  # Number of neighbors to consider, ranging from 1 to 15 with a step of 2
17 |         'weights': ['uniform', 'distance'],  # Weighting scheme: 'uniform' (all points equal) or 'distance' (inverse distance)
18 |         'metric': ['euclidean', 'manhattan', 'minkowski']  # Distance metrics to use for the KNN algorithm
19 |     }
20 | 
21 |     # Initialize a KNeighborsClassifier object
22 |     knn = KNeighborsClassifier()
23 | 
24 |     # Return the classifier and the hyperparameter grid
25 |     return knn, param_knn
26 | 


--------------------------------------------------------------------------------
/_classification/models/NearestCentroid.py:
--------------------------------------------------------------------------------
 1 | from sklearn.neighbors import NearestCentroid
 2 | 
 3 | # ******** Get Nearest Centroid Function ********
 4 | def get_nc():
 5 |     """
 6 |     Initializes a Nearest Centroid classifier and provides a parameter grid for hyperparameter tuning.
 7 | 
 8 |     Returns:
 9 |         tuple:
10 |             - NearestCentroid object: An instance of the Nearest Centroid classifier.
11 |             - dict: A dictionary containing hyperparameter options for tuning.
12 |     """
13 | 
14 |     # Define the hyperparameter grid for the Nearest Centroid classifier
15 |     param_NearestCentroid = {
16 |         'metric': ['euclidean', 'manhattan'],  # Distance metrics to compute nearest centroid
17 |         'shrink_threshold': [None, 0.1, 0.2, 0.5, 0.7, 0.8]
18 |         # 'shrink_threshold': Optional shrinkage threshold to regularize centroids (if not None).
19 |         # Helps to improve robustness with high-dimensional data.
20 |     }
21 | 
22 |     # Initialize a NearestCentroid object
23 |     nearest_centroid = NearestCentroid()
24 | 
25 |     # Return the classifier and the hyperparameter grid
26 |     return nearest_centroid, param_NearestCentroid
27 | 


--------------------------------------------------------------------------------
/_classification/models/logstic_regression.py:
--------------------------------------------------------------------------------
 1 | from sklearn.linear_model import LogisticRegression
 2 | 
 3 | # ******** Get Logistic Regression Function ********
 4 | def get_lr():
 5 |     """
 6 |     Initializes a Logistic Regression classifier and provides a parameter grid for hyperparameter tuning.
 7 | 
 8 |     Returns:
 9 |         tuple:
10 |             - LogisticRegression object: An instance of the Logistic Regression classifier.
11 |             - dict: A dictionary containing hyperparameter options for tuning.
12 |     """
13 | 
14 |     # Define the hyperparameter grid for Logistic Regression
15 |     param_logsticRegression = {
16 |         'penalty': ['l1', 'l2'],  # Regularization techniques: L1 (Lasso) or L2 (Ridge)
17 |         'C': [0.01, 0.1, 1, 10, 100],  # Inverse regularization strength (smaller values = stronger regularization)
18 |         'solver': ['liblinear', 'saga'],  # Optimization solvers for fitting the model
19 |         'max_iter': [100, 200, 300, 500]  # Maximum number of iterations for solver convergence
20 |     }
21 | 
22 |     # Initialize a LogisticRegression object
23 |     logistic_regression = LogisticRegression()
24 | 
25 |     # Return the classifier and the hyperparameter grid
26 |     return logistic_regression, param_logsticRegression
27 | 


--------------------------------------------------------------------------------
/_classification/models/decision_tree.py:
--------------------------------------------------------------------------------
 1 | from sklearn.tree import DecisionTreeClassifier
 2 | 
 3 | # ******** Get Decision Tree Function ********
 4 | def get_dt():
 5 |     """
 6 |     Initializes a Decision Tree Classifier and provides a parameter grid for hyperparameter tuning.
 7 | 
 8 |     Returns:
 9 |         tuple: 
10 |             - DecisionTreeClassifier object: An instance of the Decision Tree Classifier.
11 |             - dict: A dictionary containing hyperparameter options for tuning.
12 |     """
13 | 
14 |     # Define the hyperparameter grid for the Decision Tree Classifier
15 |     param_decisionTree = {
16 |         'criterion': ['gini', 'entropy'],  # Criterion for splitting ('gini' impurity or 'entropy' for information gain)
17 |         'max_depth': [None, 10, 20, 30, 40],  # Maximum depth of the tree (None means unlimited depth)
18 |         'min_samples_split': [2, 5, 10],  # Minimum number of samples required to split an internal node
19 |         'min_samples_leaf': [1, 2, 4]  # Minimum number of samples required to be at a leaf node
20 |     }
21 | 
22 |     # Initialize a DecisionTreeClassifier object
23 |     decision_tree = DecisionTreeClassifier()
24 | 
25 |     # Return the classifier and the hyperparameter grid
26 |     return decision_tree, param_decisionTree
27 | 


--------------------------------------------------------------------------------
/_classification/models/random_forest.py:
--------------------------------------------------------------------------------
 1 | from sklearn.ensemble import RandomForestClassifier
 2 | 
 3 | # ******** Get Random Forest Function ********
 4 | def get_rf():
 5 |     """
 6 |     Initializes a Random Forest classifier and provides a parameter grid for hyperparameter tuning.
 7 | 
 8 |     Returns:
 9 |         tuple:
10 |             - RandomForestClassifier object: An instance of the Random Forest classifier.
11 |             - dict: A dictionary containing hyperparameter options for tuning.
12 |     """
13 | 
14 |     # Define the hyperparameter grid for the Random Forest classifier
15 |     param_randomForest = {
16 |         'n_estimators': [50, 100, 200],  # Number of trees in the forest
17 |         'criterion': ['gini', 'entropy'],  # Splitting criteria: Gini Impurity or Entropy
18 |         'max_depth': [None, 10, 20, 30],  # Maximum depth of the trees (None means no limit)
19 |         'min_samples_split': [2, 5, 10],  # Minimum samples required to split an internal node
20 |         'min_samples_leaf': [1, 2, 4]  # Minimum samples required at a leaf node
21 |     }
22 | 
23 |     # Initialize a RandomForestClassifier object
24 |     random_forest = RandomForestClassifier()
25 | 
26 |     # Return the classifier and the hyperparameter grid
27 |     return random_forest, param_randomForest
28 | 


--------------------------------------------------------------------------------
/_classification/models/perceptron.py:
--------------------------------------------------------------------------------
 1 | from sklearn.linear_model import Perceptron
 2 | 
 3 | # ******** Get Perceptron Function ********
 4 | def get_pr():
 5 |     """
 6 |     Initializes a Perceptron classifier and provides a parameter grid for hyperparameter tuning.
 7 | 
 8 |     Returns:
 9 |         tuple:
10 |             - Perceptron object: An instance of the Perceptron classifier.
11 |             - dict: A dictionary containing hyperparameter options for tuning.
12 |     """
13 | 
14 |     # Define the hyperparameter grid for the Perceptron classifier
15 |     param_perceptron = {
16 |         'penalty': ['l1', 'l2', 'elasticnet'],  # Regularization types to prevent overfitting:
17 |         # - 'l1': Lasso regularization (sparsity of features).
18 |         # - 'l2': Ridge regularization (shrinks coefficients to reduce multicollinearity).
19 |         # - 'elasticnet': Combination of L1 and L2 regularization.
20 |         
21 |         'alpha': [0.0001, 0.001, 0.01, 0.1, 1],  # Regularization strength (smaller values = stronger regularization).
22 |         
23 |         'max_iter': [1000, 2000, 3000]  # Maximum number of passes over the training data.
24 |     }
25 | 
26 |     # Initialize a Perceptron classifier
27 |     perceptron = Perceptron()
28 | 
29 |     # Return the classifier and the hyperparameter grid
30 |     return perceptron, param_perceptron
31 | 


--------------------------------------------------------------------------------
/_classification/models/mlp.py:
--------------------------------------------------------------------------------
 1 | from sklearn.neural_network import MLPClassifier
 2 | 
 3 | # ******** Get Multi-Layer Perceptron (MLP) Classifier Function ********
 4 | def get_mlp():
 5 |     """
 6 |     Initializes a Multi-Layer Perceptron (MLP) classifier and provides a parameter grid for hyperparameter tuning.
 7 | 
 8 |     Returns:
 9 |         tuple:
10 |             - MLPClassifier object: An instance of the Multi-Layer Perceptron classifier.
11 |             - dict: A dictionary containing hyperparameter options for tuning.
12 |     """
13 | 
14 |     # Define the hyperparameter grid for the MLP classifier
15 |     param_mlp = {
16 |         'hidden_layer_sizes': [
17 |             (50,), (100,), (50, 50), (100, 100)
18 |         ],  # Number of neurons in each hidden layer, e.g., single-layer 50 or two-layers 50-50
19 |         'activation': ['tanh', 'relu'],  # Activation functions: 'tanh' or 'ReLU'
20 |         'solver': ['sgd', 'adam'],  # Optimization solvers: Stochastic Gradient Descent (SGD) or Adam
21 |         'alpha': [0.0001, 0.001, 0.01],  # L2 regularization parameter to prevent overfitting
22 |         'learning_rate': ['constant', 'adaptive'],  # Learning rate schedule
23 |         'max_iter': [100, 200, 300, 400, 500],  # Maximum number of iterations to converge
24 |     }
25 | 
26 |     # Initialize an MLPClassifier object
27 |     mlp_classifier = MLPClassifier()
28 | 
29 |     # Return the classifier and the hyperparameter grid
30 |     return mlp_classifier, param_mlp
31 | 


--------------------------------------------------------------------------------
/_classification/models/models.py:
--------------------------------------------------------------------------------
 1 | # Importing model functions from different scripts
 2 | from .decision_tree import get_dt  # Decision Tree Classifier
 3 | from .knn import get_knn  # K-Nearest Neighbors Classifier
 4 | from .logstic_regression import get_lr  # Logistic Regression Classifier
 5 | from .mlp import get_mlp  # Multi-Layer Perceptron Classifier
 6 | from .naive_bayes import get_nb  # Naive Bayes Classifier
 7 | from .perceptron import get_pr  # Perceptron Classifier
 8 | from .random_forest import get_rf  # Random Forest Classifier
 9 | from .svm import get_svm  # Support Vector Machine Classifier
10 | from .NearestCentroid import get_nc  # Nearest Centroid Classifier
11 | 
12 | # ******** Get Details of All Models Function ********
13 | def get_details_models():
14 |     """
15 |     Returns a list of all available classification models and their respective hyperparameter grids.
16 |     
17 |     Each model function is called, which returns the model instance and its hyperparameters. 
18 |     The function returns a list of tuples, where each tuple contains a model and its associated parameter grid.
19 | 
20 |     Returns:
21 |         list: A list of tuples, each containing a classifier and a dictionary of hyperparameters.
22 |     """
23 |     return [
24 |         get_nc(),  # Nearest Centroid
25 |         get_knn(),  # K-Nearest Neighbors
26 |         get_dt(),  # Decision Tree
27 |         get_lr(),  # Logistic Regression
28 |         get_mlp(),  # Multi-Layer Perceptron
29 |         get_nb(),  # Naive Bayes
30 |         get_pr(),  # Perceptron
31 |         get_rf(),  # Random Forest
32 |         get_svm()   # Support Vector Machine
33 |     ]
34 | 


--------------------------------------------------------------------------------
/_classification/utils.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from .pre_process.data.parser import get_spectrums
 4 | 
 5 | 
 6 | def split_data(df: pd.DataFrame) -> list:
 7 |     """
 8 |     Splits the input DataFrame into two subsets (s1 and s2) based on specific conditions.
 9 | 
10 |     Parameters:
11 |         df: pd.DataFrame
12 |             Input DataFrame containing the columns to be split.
13 | 
14 |     Returns:
15 |         list:
16 |             A list containing two NumPy arrays:
17 |             - s1: Subset of the data matching the "s1" criteria.
18 |             - s2: Subset of the data matching the "s2" criteria.
19 |     """
20 | 
21 |     # Lists to store column names for s1 and s2
22 |     s1_columns = []
23 |     s2_columns = []
24 | 
25 |     # Get the predefined spectral data for s1 and s2
26 |     data = get_spectrums()
27 | 
28 |     # Iterate through each column in the input DataFrame
29 |     for column in df.columns:
30 |         # Check if the column belongs to s1 based on the spectral data
31 |         if check_s1_and_s2(column, data["s1"]):
32 |             s1_columns.append(column)
33 |         # Check if the column belongs to s2 based on the spectral data
34 |         elif check_s1_and_s2(column, data["s2"]):
35 |             s2_columns.append(column)
36 | 
37 |     # Concatenate the selected columns for s1 and s2 into separate DataFrames
38 |     s1 = pd.concat([df[col] for col in s1_columns], axis=1)
39 |     s2 = pd.concat([df[col] for col in s2_columns], axis=1)
40 | 
41 |     # Convert the DataFrames to NumPy arrays and return them as a list
42 |     return [
43 |         np.array(s1),  # Subset for s1
44 |         np.array(s2),  # Subset for s2
45 |     ]
46 | 
47 | 
48 | def check_s1_and_s2(column: str, data: list) -> bool:
49 |     """
50 |     Checks if a column name matches any item in the given spectral data list.
51 | 
52 |     Parameters:
53 |         column: str
54 |             The column name to be checked.
55 | 
56 |         data: list
57 |             A list of predefined spectral data for matching.
58 | 
59 |     Returns:
60 |         bool:
61 |             True if the column matches any item in the list (case-insensitive), False otherwise.
62 |     """
63 |     # Convert column to uppercase and check if it matches any item in the data list
64 |     for _column in data:
65 |         if _column in column.upper():
66 |             return True
67 |     return False
68 | 


--------------------------------------------------------------------------------
/_classification/pre_process/dimensionality_reduction/pca.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from sklearn.decomposition import PCA
 4 | 
 5 | # ******** PCA Split Function ********
 6 | def pca_split(X_s1: np.array, X_s2: np.array) -> np.array:
 7 |     """
 8 |     Perform PCA on two datasets separately and combine their transformed components.
 9 | 
10 |     Parameters:
11 |         X_s1 (np.array): The first dataset (2D array) to apply PCA.
12 |         X_s2 (np.array): The second dataset (2D array) to apply PCA.
13 | 
14 |     Returns:
15 |         np.array: A combined array of PCA-transformed features from both datasets.
16 |     """
17 | 
18 |     # Get the best number of components for the first dataset
19 |     best_n_components_s1 = get_best_n_components(X_s1)
20 |     # Get the best number of components for the second dataset
21 |     best_n_components_s2 = get_best_n_components(X_s2)
22 | 
23 |     # Apply PCA to the first dataset with the optimal number of components
24 |     X_pca_s1 = pca(X_s1, best_n_components_s1)
25 |     # Apply PCA to the second dataset with the optimal number of components
26 |     X_pca_s2 = pca(X_s2, best_n_components_s2)
27 | 
28 |     # Combine the PCA-transformed datasets by horizontally stacking them
29 |     X = np.hstack((X_pca_s1, X_pca_s2))
30 |     return X
31 | 
32 | # ******** Get Best Number of Components ********
33 | def get_best_n_components(X: np.array) -> int:
34 |     """
35 |     Determine the optimal number of PCA components that explain at least 95% of the variance.
36 | 
37 |     Parameters:
38 |         X (np.array): The dataset (2D array) to analyze.
39 | 
40 |     Returns:
41 |         int: The optimal number of components.
42 |     """
43 | 
44 |     # Fit PCA on the dataset without specifying the number of components
45 |     pca = PCA().fit(X)
46 | 
47 |     # Compute the cumulative variance explained by each component
48 |     cumulative_variance = np.cumsum(pca.explained_variance_ratio_)
49 | 
50 |     # Find the smallest number of components that explain at least 95% of the variance
51 |     best_n_components = np.argmax(cumulative_variance >= 0.95) + 1
52 |     return best_n_components
53 | 
54 | # ******** PCA Function ********
55 | def pca(X: np.array, n_components_best: int) -> np.array:
56 |     """
57 |     Apply PCA on the dataset with a specified number of components.
58 | 
59 |     Parameters:
60 |         X (np.array): The dataset (2D array) to transform.
61 |         n_components_best (int): The number of components to retain.
62 |     
63 |     Returns:
64 |         np.array: The PCA-transformed dataset.
65 |     """
66 | 
67 |     # Check if the number of components is not provided; compute the best number of components
68 |     if isinstance(n_components_best, type(None)):
69 |         n_components_best = get_best_n_components(X)
70 | 
71 |     # Initialize the PCA object with the specified number of components
72 |     pca = PCA(n_components=n_components_best, svd_solver='auto')
73 | 
74 |     # Fit PCA to the dataset and transform it
75 |     X_pca = pca.fit_transform(X)
76 |     return X_pca
77 | 


--------------------------------------------------------------------------------
/pre_process.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from _classification.pre_process.standardize import standardize
 4 | from _classification.utils import split_data
 5 | from _classification.pre_process.dimensionality_reduction.lda import lda_split, lda
 6 | from _classification.pre_process.dimensionality_reduction.pca import pca_split, pca
 7 | 
 8 | def pre_process(df: pd.DataFrame,
 9 |                 class_column: str) -> list:
10 |     """
11 |     This function preprocesses the input data by standardizing features, 
12 |     performing dimensionality reduction using PCA and LDA, and splitting the data.
13 |     
14 |     Parameters:
15 |         df: pd.DataFrame
16 |             The input DataFrame containing features and the class/target column.
17 |         
18 |         class_column: str
19 |             The name of the column representing the target variable.
20 |     
21 |     Returns:
22 |         x_data: dict
23 |             A dictionary containing the original, PCA, LDA, and split versions of the data.
24 |         
25 |         y: pd.Series
26 |             The target variable extracted from the input DataFrame.
27 |     """
28 |     
29 |     # Separate features (X) and target variable (y)
30 |     X, y = df.drop(class_column, axis=1), df[class_column]
31 | 
32 |     print("📌 Start pre process ...")
33 | 
34 |     # Step 1: Standardize the feature data
35 |     print("--- 📌start standardize")
36 |     standardize_data = standardize(X)  # Standardize the input features
37 |     print("--- ✅finish standardize")
38 | 
39 |     # Step 2: Perform dimensionality reduction (PCA and LDA) and split the data
40 |     print("--- 📌start dimensionality reduction")
41 | 
42 |     # Initialize an empty dictionary to store processed data
43 |     x_data = {}
44 | 
45 |     # Iterate through each standardized dataset (if multiple types of standardization are applied)
46 |     for name, _data in standardize_data.items():
47 |         # Split the standardized data into two parts (e.g., for training and testing)
48 |         s1, s2 = split_data(_data)
49 | 
50 |         # Apply PCA to the entire dataset
51 |         data_pca = pca(_data, None)
52 |         # Apply LDA to the entire dataset
53 |         data_lda = lda(_data, y, None)
54 | 
55 |         # Apply PCA to the split datasets
56 |         split_pca = pca_split(s1, s2)
57 |         # Apply LDA to the split datasets
58 |         split_lda = lda_split(s1, s2, y)
59 | 
60 |         # Create a temporary dictionary to store the original, PCA, LDA, and split data
61 |         temp = {
62 |             "original": _data,    # Original standardized data
63 |             "pca": data_pca,      # PCA-reduced data
64 |             "lda": data_lda,      # LDA-reduced data
65 |             "split pca": split_pca,  # PCA-reduced split data (PCA on S1 | pn S2)
66 |             "split lda": split_lda   # LDA-reduced split data (LDA on S1 | pn S2)
67 |         }
68 | 
69 |         # Add the processed data for the current type of standardization to the main dictionary
70 |         x_data[name] = temp
71 | 
72 |     print("--- ✅finish dimensionality reduction")
73 | 
74 |     print("✅finish pre process ...")
75 | 
76 |     # Return the processed data (x_data) and target variable (y)
77 |     return x_data, y
78 | 


--------------------------------------------------------------------------------
/_classification/pre_process/standardize.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.preprocessing import (
 3 |     StandardScaler,
 4 |     MinMaxScaler,
 5 |     MaxAbsScaler,
 6 |     RobustScaler,
 7 |     Normalizer
 8 | )
 9 | 
10 | # ******** Standardize Function ********
11 | def standardize(df: pd.DataFrame) -> dict:
12 |     """
13 |     Applies various data scaling techniques to the input DataFrame.
14 |     
15 |     Parameters:
16 |         df (pd.DataFrame): The input DataFrame to be scaled.
17 |         
18 |     Returns:
19 |         dict: A dictionary containing the original and scaled DataFrames using different scalers.
20 |     """
21 |     
22 |     # Get the column names of the input DataFrame
23 |     columns = df.columns
24 | 
25 |     # ******** Standard Scaling ********
26 |     # StandardScaler scales features by removing the mean and scaling to unit variance.
27 |     scaler_standard = StandardScaler()  # Initialize the StandardScaler
28 |     X_standard_scaled = scaler_standard.fit_transform(df)  # Fit and transform the data
29 |     df_standard_scaled = pd.DataFrame(X_standard_scaled, columns=columns)  # Create a DataFrame
30 | 
31 |     # ******** Min-Max Scaling ********
32 |     # MinMaxScaler scales features to a range between 0 and 1.
33 |     scaler_minmax = MinMaxScaler()  # Initialize the MinMaxScaler
34 |     X_minmax_scaled = scaler_minmax.fit_transform(df)  # Fit and transform the data
35 |     df_minmax_scaled = pd.DataFrame(X_minmax_scaled, columns=columns)  # Create a DataFrame
36 | 
37 |     # ******** Max-Abs Scaling ********
38 |     # MaxAbsScaler scales each feature by its maximum absolute value, preserving sparsity.
39 |     scaler_maxabs = MaxAbsScaler()  # Initialize the MaxAbsScaler
40 |     X_maxabs_scaled = scaler_maxabs.fit_transform(df)  # Fit and transform the data
41 |     df_maxabs_scaled = pd.DataFrame(X_maxabs_scaled, columns=columns)  # Create a DataFrame
42 | 
43 |     # ******** Robust Scaling ********
44 |     # RobustScaler scales features using statistics that are robust to outliers 
45 |     # (e.g., median and interquartile range).
46 |     scaler_robust = RobustScaler()  # Initialize the RobustScaler
47 |     X_robust_scaled = scaler_robust.fit_transform(df)  # Fit and transform the data
48 |     df_robust_scaled = pd.DataFrame(X_robust_scaled, columns=columns)  # Create a DataFrame
49 | 
50 |     # ******** Normalization ********
51 |     # Normalizer scales each sample (row) to have unit norm, preserving the shape of the data.
52 |     scaler_normalizer = Normalizer()  # Initialize the Normalizer
53 |     X_normalized = scaler_normalizer.fit_transform(df)  # Fit and transform the data
54 |     df_normalized = pd.DataFrame(X_normalized, columns=columns)  # Create a DataFrame
55 | 
56 |     # Return all scaled DataFrames along with the original DataFrame
57 |     return {
58 |         "original": df,  # Original DataFrame
59 |         "standard_scaled": df_standard_scaled,  # Standard-scaled DataFrame
60 |         "minmax_scaled": df_minmax_scaled,  # MinMax-scaled DataFrame
61 |         "maxabs_scaled": df_maxabs_scaled,  # MaxAbs-scaled DataFrame
62 |         "robust_scaled": df_robust_scaled,  # Robust-scaled DataFrame
63 |         "normalized": df_normalized  # Normalized DataFrame
64 |     }
65 | 


--------------------------------------------------------------------------------
/_classification/pre_process/dimensionality_reduction/lda.py:
--------------------------------------------------------------------------------
 1 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 2 | import numpy as np
 3 | 
 4 | # ******** LDA Split Function ********
 5 | def lda_split(X_s1: np.array, X_s2: np.array, y: np.array) -> np.array:
 6 |     """
 7 |     Perform Linear Discriminant Analysis (LDA) on two datasets separately 
 8 |     and combine their transformed components.
 9 | 
10 |     Parameters:
11 |         X_s1 (np.array): The first dataset (2D array) for LDA.
12 |         X_s2 (np.array): The second dataset (2D array) for LDA.
13 |         y (np.array): The target labels (1D array) associated with the datasets.
14 | 
15 |     Returns:
16 |         np.array: A combined array of LDA-transformed features from both datasets.
17 |     """
18 | 
19 |     # Get the best number of components for the first dataset
20 |     best_n_components_s1 = get_best_n_components(X_s1, y)
21 |     # Get the best number of components for the second dataset
22 |     best_n_components_s2 = get_best_n_components(X_s2, y)
23 | 
24 |     # Apply LDA to the first dataset with the optimal number of components
25 |     X_lda_s1 = lda(X_s1, y, best_n_components_s1)
26 |     # Apply LDA to the second dataset with the optimal number of components
27 |     X_lda_s2 = lda(X_s2, y, best_n_components_s2)
28 | 
29 |     # Combine the LDA-transformed datasets by horizontally stacking them
30 |     X = np.hstack((X_lda_s1, X_lda_s2))
31 |     return X
32 | 
33 | # ******** Get Best Number of Components ********
34 | def get_best_n_components(X: np.array, y: np.array) -> int:
35 |     """
36 |     Determine the optimal number of LDA components that explain at least 95% of the variance.
37 | 
38 |     Parameters:
39 |         X (np.array): The dataset (2D array) to analyze.
40 |         y (np.array): The target labels (1D array) associated with the dataset.
41 | 
42 |     Returns:
43 |         int: The optimal number of components.
44 |     """
45 | 
46 |     # Initialize the LDA object
47 |     lda = LinearDiscriminantAnalysis()
48 | 
49 |     # Fit LDA to the data and target labels
50 |     X_lda = lda.fit_transform(X, y)
51 | 
52 |     # Compute the explained variance ratio
53 |     explained_variance_ratio = lda.explained_variance_ratio_
54 | 
55 |     # Compute the cumulative variance ratio
56 |     cumulative_variance_ratio = np.cumsum(explained_variance_ratio)
57 | 
58 |     # Determine the smallest number of components that explain at least 95% of the variance
59 |     n_components = np.argmax(cumulative_variance_ratio >= 0.95) + 1
60 | 
61 |     return n_components
62 | 
63 | # ******** LDA Function ********
64 | def lda(X: np.array, y: np.array, n_components_best: int) -> np.array:
65 |     """
66 |     Apply Linear Discriminant Analysis (LDA) on the dataset with a specified number of components.
67 | 
68 |     Parameters:
69 |         X (np.array): The dataset (2D array) to transform.
70 |         y (np.array): The target labels (1D array) associated with the dataset.
71 |         n_components_best (int): The number of components to retain.
72 | 
73 |     Returns:
74 |         np.array: The LDA-transformed dataset.
75 |     """
76 | 
77 |     # If the number of components is not provided, compute the optimal number of components
78 |     if isinstance(n_components_best, type(None)):
79 |         n_components_best = get_best_n_components(X, y)
80 | 
81 |     # Initialize the LDA object with the specified number of components
82 |     lda = LinearDiscriminantAnalysis(n_components=n_components_best)
83 | 
84 |     # Fit LDA to the data and transform it
85 |     X_lda = lda.fit_transform(X, y)
86 |     return X_lda
87 | 


--------------------------------------------------------------------------------
/_classification/parameter_finder.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from sklearn.model_selection import GridSearchCV
  3 | from sklearn.metrics import (
  4 |     recall_score,
  5 |     accuracy_score,
  6 |     precision_score,
  7 |     f1_score,
  8 |     confusion_matrix,
  9 |     cohen_kappa_score,
 10 |     make_scorer
 11 | )
 12 | import numpy as np
 13 | import pandas as pd
 14 | import time
 15 | import os  # For handling file paths
 16 | import matplotlib.pyplot as plt
 17 | import seaborn as sns  # For a more aesthetic plot
 18 | 
 19 | # Suppress all warnings
 20 | warnings.filterwarnings("ignore")
 21 | 
 22 | 
 23 | def classification_parameter_finder(model,
 24 |                                     parameters: dict,
 25 |                                     X_train: np.array,
 26 |                                     y_train: np.array,
 27 |                                     X_test: np.array,
 28 |                                     y_test: np.array,
 29 |                                     method: str,
 30 |                                     path: str):
 31 | 
 32 | 
 33 |     """
 34 |     This function performs hyperparameter tuning for a given classification model using GridSearchCV,
 35 |     evaluates its performance on training and testing datasets, and visualizes the confusion matrix.
 36 | 
 37 |     Parameters:
 38 |         model: sklearn estimator
 39 |             The machine learning model to be tuned (e.g., RandomForestClassifier, SVC, etc.).
 40 |         
 41 |         parameters: dict
 42 |             The dictionary containing hyperparameters and their possible values for GridSearchCV.
 43 |         
 44 |         X_train: np.array
 45 |             Training feature data.
 46 |         
 47 |         y_train: np.array
 48 |             Training labels.
 49 |         
 50 |         X_test: np.array
 51 |             Testing feature data.
 52 |         
 53 |         y_test: np.array
 54 |             Testing labels.
 55 |         
 56 |         method: str
 57 |             The name of the method or experiment (used for labeling and saving files).
 58 |         
 59 |         path: str
 60 |             Directory path where output files, such as the confusion matrix image, will be saved.
 61 | 
 62 |     Returns:
 63 |         results: pandas.DataFrame
 64 |             A DataFrame summarizing the best model, its hyperparameters, evaluation metrics
 65 |             (accuracy, precision, recall, F1-score, kappa score), and runtime information.
 66 |             It also includes the file path of the saved confusion matrix image.
 67 |     """
 68 | 
 69 | 
 70 | 
 71 |     
 72 |     model_name = str(model).split('(')[0]
 73 | 
 74 |     start = time.time()
 75 | 
 76 | 
 77 |     kappa_scorer = make_scorer(cohen_kappa_score)
 78 | 
 79 |     grid = GridSearchCV(model,
 80 |                         param_grid=parameters,
 81 |                         refit=True,
 82 |                         cv=5,
 83 |                         n_jobs=-1,
 84 |                         scoring=kappa_scorer)
 85 |     grid.fit(X_train, y_train)
 86 | 
 87 |     y_train_pred = grid.predict(X_train)
 88 |     y_test_pred = grid.predict(X_test)
 89 | 
 90 |     train_accuracy = accuracy_score(y_train, y_train_pred)
 91 |     test_accuracy = accuracy_score(y_test, y_test_pred)
 92 |     precision = precision_score(y_test, y_test_pred, average='weighted')
 93 |     recall = recall_score(y_test, y_test_pred, average='weighted')
 94 |     f1 = f1_score(y_test, y_test_pred, average='weighted')
 95 |     kappa = cohen_kappa_score(y_test, y_test_pred)
 96 | 
 97 |     # Confusion matrix
 98 |     conf_matrix = confusion_matrix(y_test, y_test_pred, normalize='true')
 99 |     class_labels = np.unique(y_test)
100 | 
101 |     # Save confusion matrix as an image
102 |     conf_matrix_path = os.path.join(path, f"{model_name}_{method}_confusion_matrix.png")
103 |     plt.figure(figsize=(10, 8))
104 |     sns.heatmap(conf_matrix, annot=True, fmt=".2f", cmap="Blues", xticklabels=class_labels, yticklabels=class_labels)
105 |     plt.title(f"Confusion Matrix - {method}")
106 |     plt.xlabel("Predicted Label")
107 |     plt.ylabel("True Label")
108 |     plt.tight_layout()
109 |     plt.savefig(conf_matrix_path)
110 |     plt.close()  # Close the plot to avoid overwriting in subsequent calls
111 | 
112 | 
113 |     end = time.time()
114 | 
115 |     # Store results in a DataFrame
116 |     results = pd.DataFrame({
117 |         "method": [method],
118 |         "model": [model_name],
119 |         "best_params": [grid.best_params_],
120 |         "train_accuracy": [train_accuracy],
121 |         "test_accuracy": [test_accuracy],
122 |         "precision": [precision],
123 |         "recall": [recall],
124 |         "f1_score": [f1],
125 |         "kappa": [kappa],
126 |         "confusion_matrix_path": [conf_matrix_path],
127 |         "runtime": [end - start],
128 |         "best_model": [grid.best_estimator_]
129 |     })
130 | 
131 |     return results
132 | 


--------------------------------------------------------------------------------
/train_models.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from sklearn.model_selection import train_test_split
  3 | 
  4 | from _classification.parameter_finder import classification_parameter_finder
  5 | from _classification.models.models import get_details_models
  6 | 
  7 | import warnings
  8 | from sklearn.exceptions import ConvergenceWarning
  9 | import os
 10 | 
 11 | # Ignore ConvergenceWarning
 12 | warnings.filterwarnings("ignore", category = ConvergenceWarning)
 13 | 
 14 | 
 15 | def train_models(x_data : dict,
 16 |                  y : pd.DataFrame,
 17 |                  path : str,
 18 |                  name : str):
 19 | 
 20 |     """
 21 |     This function trains multiple machine learning models on various subsets of the input dataset, 
 22 |     performs hyperparameter tuning, evaluates the models, and saves the results.
 23 | 
 24 |     Parameters:
 25 |         x_data: dict
 26 |             A dictionary containing the feature data split into sections and subsections 
 27 |             (e.g., {"section1": {"subsection1": X_data, "subsection2": X_data}}).
 28 |             Each subsection represents a different feature subset for training the models.
 29 |         
 30 |         y: pd.DataFrame
 31 |             The target labels for the dataset.
 32 |         
 33 |         path: str
 34 |             The directory path where output files, such as confusion matrix images, will be saved.
 35 |         
 36 |         name: str
 37 |             A descriptive name for the experiment (currently unused but can be used for logging or saving results).
 38 | 
 39 |     Workflow:
 40 |         - Splits each subsection of data into training and testing sets (80/20 split).
 41 |         - Iterates over a list of models and their respective hyperparameters.
 42 |         - Calls `classification_parameter_finder` to train, tune, and evaluate each model.
 43 |         - Stores the results for each model and dataset combination.
 44 | 
 45 |     Output:
 46 |         - Results are saved in a CSV file named `result.csv` in the local directory.
 47 |         - Confusion matrices and related artifacts are saved to the specified `path`.
 48 |         - Prints progress and completion messages to the console for monitoring.
 49 | 
 50 |     Notes:
 51 |         - This function assumes that the `get_details_models()` function provides a list of tuples, 
 52 |           each containing a model instance and its corresponding hyperparameter grid.
 53 |         - Suppresses warnings related to model convergence (e.g., ConvergenceWarning).
 54 |     """
 55 |     
 56 | 
 57 |     if not os.path.exists(path):
 58 |         os.makedirs(path)
 59 |         print(f"Created directory: {path}")
 60 | 
 61 |         
 62 |     # Retrieve the list of models and their hyperparameter configurations
 63 |     details_models = get_details_models()
 64 | 
 65 |     # Initialize an empty list to store the results of all models
 66 |     results = []
 67 | 
 68 |     print("📌start train model ...")
 69 | 
 70 |     # Iterate through each section of the feature data in x_data
 71 |     for name_section, data_section in x_data.items():
 72 |         # Iterate through each subsection of the section
 73 |         for name_subsection, data_subsection in data_section.items():
 74 |             # Define the method name based on the section and subsection
 75 |             method = f"{name_section} - {name_subsection}"
 76 |             
 77 |             # Split the current subsection data into training and testing sets
 78 |             X_train, X_test, y_train, y_test = train_test_split(data_subsection, y, test_size=0.2, random_state=42)
 79 | 
 80 |             # Iterate through each model and its corresponding parameters
 81 |             for detail_model in details_models:
 82 |                 model, parameters = detail_model
 83 | 
 84 |                 print(f"--- 📌start train <<{model}>> on <<{method}>> data")
 85 | 
 86 |                 # Train and evaluate the model
 87 |                 _result = classification_parameter_finder(model,
 88 |                                                           parameters,
 89 |                                                           X_train,
 90 |                                                           y_train,
 91 |                                                           X_test,
 92 |                                                           y_test,
 93 |                                                           method,
 94 |                                                           path)
 95 |                 
 96 |                 print(f"--- ✅finish train <<{model}>> on <<{method}>> data")
 97 |                 results.append(_result)
 98 | 
 99 |     print("✅finish train model")
100 | 
101 |     # Combine all results into a single DataFrame
102 |     results = pd.concat(results, ignore_index=True)
103 | 
104 |     # Define the full path for saving the result file
105 |     result_file_path = os.path.join(path, f"{name}.csv")
106 |     
107 |     # Save the results DataFrame to the specified path
108 |     results.to_csv(result_file_path, index=False)
109 | 
110 |     print(f"             ✅save result in {result_file_path}✅              ")
111 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Agricultural Products Classification Pipeline
  2 | 
  3 | ## Overview
  4 | This pipeline is designed to classify agricultural products using satellite data from **SENTINEL-1** and **SENTINEL-2**. The pipeline includes the following stages:
  5 | 
  6 | 1. **Data Standardization**: Different standardization techniques are applied to the data to make it suitable for model training.
  7 | 2. **Dimensionality Reduction**: PCA and LDA are applied to reduce the dimensionality of the feature space, with separate models for each satellite's data.
  8 | 3. **Model Training and Hyperparameter Optimization**: Various machine learning models are trained, and hyperparameter optimization is performed using grid search.
  9 | 
 10 | 
 11 | I've added the information you provided to the README. Here's the updated section that includes the satellite data input:
 12 | 
 13 | ---
 14 | 
 15 | ### Satellite Data Input:
 16 | 
 17 | The input dataset contains Earth observation data from **SENTINEL-1** and **SENTINEL-2** satellites, obtained via Google Earth Engine. The data includes various bands from both satellites, as well as additional values relevant for classification tasks.
 18 | 
 19 | #### Example of input data:
 20 | 
 21 | | **Sample** | **0_B1**  | **0_B2**  | **0_B3**  | **0_B4**  | **0_B5**  | **0_B6**  | **0_B7**  | **0_B8**  | **0_B8A** | **0_B9**  | **0_B11** | **0_B12** | **0_VV** |
 22 | |------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|----------|
 23 | | **Sample 1** | 0.050643478 | 0.071909783 | 0.108879348 | 0.140969565 | 0.156472826 | 0.172709783 | 0.185292391 | 0.180054348 | 0.195056522 | 0.205251087 | 0.195241304 | 0.1603 | -1 |
 24 | | **Sample 2** | 0.051273684 | 0.07195 | 0.107911842 | 0.138413158 | 0.156592105 | 0.180571053 | 0.195072368 | 0.189626316 | 0.204071053 | 0.243975 | 0.199786842 | 0.161619737 | -1 |
 25 | | **Sample 3** | 0.064336805 | 0.097296528 | 0.140022222 | 0.176558333 | 0.187975 | 0.19215 | 0.199796528 | 0.203748611 | 0.201070833 | 0.235688194 | 0.202470833 | -15.741307 | -1 |
 26 | | **Sample 4** | 0.070949999 | 0.100846154 | 0.150261539 | 0.196115385 | 0.214473077 | 0.219430769 | 0.227103846 | 0.226692308 | 0.230776923 | 0.23485 | 0.240280769 | 0.209653846 | -1 |
 27 | | **Sample 5** | 0.071380468 | 0.101917188 | 0.151620313 | 0.198378125 | 0.213576563 | 0.215678125 | 0.222285156 | 0.224170313 | 0.224170313 | 0.235323438 | 0.235323438 | 0.208569531 | -1 |
 28 | | **Sample 6** | 0.072846154 | 0.100773077 | 0.150984615 | 0.198823077 | 0.213915385 | 0.217265385 | 0.224673077 | 0.226946154 | 0.226946154 | 0.234361538 | 0.237073077 | 0.206880769 | -1 |
 29 | | **Sample 7** | 0.067707143 | 0.103935714 | 0.152242857 | 0.200014286 | 0.209557143 | 0.213071429 | 0.221978571 | 0.229471429 | 0.223307143 | 0.232307143 | 0.232307143 | 0.205528571 | -1 |
 30 | | **Sample 8** | 0.097139552 | 0.130318657 | 0.162661194 | 0.194323881 | 0.209510448 | 0.212884328 | 0.222468657 | 0.230838806 | 0.230782836 | 0.236003731 | 0.311174627 | 0.283676866 | -1 |
 31 | | **Sample 9** | 0.070247222 | 0.097663194 | 0.129397222 | 0.159320833 | 0.171659722 | 0.17494375 | 0.183878472 | 0.192720833 | 0.193045833 | 0.276390278 | 0.256345833 | 0.249488889 | -1 |
 32 | | **Sample 10** | 0.060408333 | 0.085986806 | 0.121355556 | 0.154906944 | 0.168461111 | 0.1728375 | 0.182507639 | 0.191263889 | 0.192247222 | 0.282597917 | 0.263926389 | 0.249488889 | -1 |
 33 | 
 34 | 
 35 | 
 36 | - The **bands** from **SENTINEL-2** include: `B1`, `B2`, `B3`, `B4`, `B5`, `B6`, `B11`, `B12`, etc.
 37 | - The **SENTINEL-1** data includes polarization bands such as `VV` and `VH`, with additional derived features such as `VV_1` and `VH_1`.
 38 | - Each row represents a specific point in time for the satellite’s data, with `1_VV` marking the timestamp of the observation.
 39 | 
 40 | 
 41 | ---
 42 | 
 43 | 
 44 | ## Pipeline Steps
 45 | 
 46 | ### 1. Data Standardization
 47 | The following standardization methods are applied to the data:
 48 | 
 49 | - **Original**: Raw data without scaling.
 50 | - **Standard Scaled**: Standardization using mean and variance.
 51 | - **MinMax Scaled**: Scales data to a specified range (usually [0,1]).
 52 | - **MaxAbs Scaled**: Scales data to [-1, 1] based on the maximum absolute value.
 53 | - **Robust Scaled**: Scales data using the median and interquartile range.
 54 | - **Normalized**: Scales data to unit norm.
 55 | 
 56 | ### 2. Dimensionality Reduction
 57 | Two dimensionality reduction techniques are used:
 58 | 
 59 | - **PCA (Principal Component Analysis)**: Reduces the feature space by projecting the data into a lower-dimensional space.
 60 | - **LDA (Linear Discriminant Analysis)**: A classification-specific dimensionality reduction technique.
 61 | 
 62 | Note: Each satellite’s data is processed separately due to different band spaces. For **SENTINEL-1**, the bands are:
 63 | - `VH`, `VV`, `HH`, `VH_1`, `VV_1`
 64 | 
 65 | For **SENTINEL-2**, the bands are:
 66 | - `B1`, `B2`, `B3`, `B4`, `B5`, `B6`, `B11`, `B12`, `B13`, `B14`, `B15`, `B16`, `NDVI`, `EVI`, `SAVI`
 67 | 
 68 | ### 3. Model Training & Hyperparameter Optimization
 69 | The following models are trained using grid search for hyperparameter optimization:
 70 | 
 71 | 1. **Decision Tree Classifier**
 72 | 2. **K-Nearest Neighbors (KNN)**
 73 | 3. **Logistic Regression**
 74 | 4. **Multilayer Perceptron (MLP)**
 75 | 5. **Naive Bayes**
 76 | 6. **Nearest Centroid**
 77 | 7. **Perceptron**
 78 | 8. **Random Forest**
 79 | 9. **Support Vector Machine (SVM)**
 80 | 
 81 | Each model is optimized based on its hyperparameter grid.
 82 | 
 83 | ---
 84 | 
 85 | ## Model Hyperparameters
 86 | 
 87 | | Model                  | Hyperparameters                                                                                                                                                       |
 88 | |------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 89 | | **Decision Tree**       | `criterion`: ['gini', 'entropy'], `max_depth`: [None, 10, 20, 30, 40], `min_samples_split`: [2, 5, 10], `min_samples_leaf`: [1, 2, 4]                                  |
 90 | | **K-Nearest Neighbors** | `n_neighbors`: [1, 3, 5, 7, 9, 11, 13], `weights`: ['uniform', 'distance'], `metric`: ['euclidean', 'manhattan', 'minkowski']                                      |
 91 | | **Logistic Regression** | `penalty`: ['l1', 'l2'], `C`: [0.01, 0.1, 1, 10, 100], `solver`: ['liblinear', 'saga'], `max_iter`: [100, 200, 300, 500]                                           |
 92 | | **MLP**                 | `hidden_layer_sizes`: [(50,), (100,), (50, 50), (100, 100)], `activation`: ['tanh', 'relu'], `solver`: ['sgd', 'adam'], `alpha`: [0.0001, 0.001, 0.01], `max_iter`: [100, 200, 300, 400, 500] |
 93 | | **Naive Bayes**         | `var_smoothing`: [1e-9, 1e-8, 1e-7, 1e-6, 1e-5]                                                                                                                     |
 94 | | **Nearest Centroid**    | `metric`: ['euclidean', 'manhattan'], `shrink_threshold`: [None, 0.1, 0.2, 0.5, 0.7, 0.8]                                                                      |
 95 | | **Perceptron**          | `penalty`: ['l1', 'l2', 'elasticnet'], `alpha`: [0.0001, 0.001, 0.01, 0.1, 1], `max_iter`: [1000, 2000, 3000]                                                      |
 96 | | **Random Forest**       | `n_estimators`: [50, 100, 200], `criterion`: ['gini', 'entropy'], `max_depth`: [None, 10, 20, 30], `min_samples_split`: [2, 5, 10], `min_samples_leaf`: [1, 2, 4]   |
 97 | | **SVM**                 | `C`: [0.1, 1, 10, 100, 1000], `kernel`: ['rbf'], `gamma`: [0.001, 0.01, 0.1, 1]                                                                                   |
 98 | 
 99 | ---
100 | 
101 | ## Requirements
102 | 
103 | - Python 3.x
104 | - pandas
105 | - scikit-learn
106 | - numpy
107 | - matplotlib (for plotting and visualization)
108 | 
109 | Here’s the updated **Setup** section with the commands:
110 | 
111 | ---
112 | 
113 | ## Setup
114 | 
115 | 1. **Clone the repository:**
116 | 
117 |     ```bash
118 |     git clone https://github.com/parvvaresh/Classification-of-satellite-images.git
119 |     cd your-repository-name
120 |     ```
121 | 
122 | 2. **Install dependencies:**
123 | 
124 |     First, create a virtual environment (optional but recommended):
125 | 
126 |     ```bash
127 |     python -m venv venv
128 |     source venv/bin/activate  # On Windows: venv\Scripts\activate
129 |     ```
130 | 
131 |     Then, install the required Python libraries:
132 | 
133 |     ```bash
134 |     pip install -r requirements.txt
135 |     ```
136 | 
137 | 
138 | 
139 | 
140 | 
141 | ## Usage
142 | 
143 | The pipeline is designed to handle satellite data, perform preprocessing, apply dimensionality reduction techniques, and train various models with optimized hyperparameters.
144 | 
145 | To use the pipeline, follow the steps below:
146 | 
147 | 1. Prepare your input data as a CSV file with the necessary features and target column.
148 | 2. Modify the input data and parameters in the respective scripts to suit your specific agricultural classification problem.
149 | 
150 | ---
151 | 
152 | ## Example Usage
153 | 
154 | ```python
155 | import pandas as pd
156 | from pre_process import pre_process
157 | from train_models import train_models
158 | 
159 | def classification(df: pd.DataFrame,
160 |                    class_column: str,
161 |                    path: str,
162 |                    name: str) -> None:
163 |     """
164 |     This function takes a DataFrame, preprocesses the data, 
165 |     and trains models on the processed data.
166 |     
167 |     Parameters:
168 |     - df: pandas DataFrame containing the data to be classified.
169 |     - class_column: The column name containing the target variable.
170 |     - path: The path where the trained model and results will be saved.
171 |     - name: The name used to save the model and results.
172 |     """
173 |     # Preprocess the data to separate features and target variable
174 |     x_data, y = pre_process(df, class_column)
175 | 
176 |     # Train models using the processed data
177 |     train_models(x_data, y, path, name)
178 | 
179 | # Example usage
180 | path_csv = "/data.csv"  # Path to your dataset
181 | df = pd.read_csv(path_csv)  # Read the CSV into a DataFrame
182 | 
183 | # Call the classification function
184 | classification(df, "ClassColumn", "/home/reza/data_test", "data_test")
185 | ```
186 | 
187 | ### Explanation:
188 | - `pre_process(df, class_column)` processes the data, separating the features (`x_data`) and target variable (`y`).
189 | - `train_models(x_data, y, path, name)` trains machine learning models and saves the trained models to the specified path (`path`) using the provided name (`name`).
190 | 
191 | ### Output:
192 | After training, the results will be saved into a CSV file containing the following information:
193 | - **Method**: The data standardization and dimensionality reduction method used.
194 | - **Model name**: The name of the model.
195 | - **Best hyperparameters**: The best hyperparameters found during grid search.
196 | - **Train accuracy**: Accuracy on the training dataset.
197 | - **Test accuracy**: Accuracy on the test dataset.
198 | - **Precision**, **Recall**, **F1 Score**, **Kappa**: Metrics for model evaluation.
199 | - **Confusion Matrix path**: Path to the confusion matrix plot.
200 | - **Runtime**: The time taken to train the model.
201 | - **Best model**: The best model with its parameters.
202 | 
203 | ---
204 | 
205 | ### Sample Result Entry:
206 | 
207 | | method              | model                  | best_params                                                                                       | train_accuracy | test_accuracy | precision | recall | f1_score | kappa | confusion_matrix_path | runtime | best_model                                   |
208 | |---------------------|------------------------|--------------------------------------------------------------------------------------------------|----------------|----------------|-----------|--------|----------|-------|------------------------|---------|----------------------------------------------|
209 | | original-original   | KNeighborsClassifier    | `{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}`                                | 1.0            | 0.925          | 0.909     | 0.925  | 0.913    | 0.903 | path_to_matrix.png      | 2.43    | KNeighborsClassifier(metric='euclidean', n_neighbors=1) |
210 | 
211 | ---
212 | 
213 | 


--------------------------------------------------------------------------------
/assets/result.csv:
--------------------------------------------------------------------------------
  1 | ,method,model,best_params,train_accuracy,test_accuracy,precision,recall,f1_score,kappa,confusion_matrix,runtime,best model
  2 | 0,orginal-original,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.925,0.9099206349206348,0.925,0.9134917043740574,0.9031476997578692,"[ 8  0  0  0  0  0  0  7  1  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
  3 |   0  0  1  0  0  0  1  0  0  0  0  7]",2.436521053314209,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)"
  4 | 1,orginal-original,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 40, 'min_samples_leaf': 1, 'min_samples_split': 2}",1.0,0.85,0.8743006993006993,0.85,0.8462672064777328,0.8073836276083467,"[ 8  0  0  0  0  0  0  0  5  1  0  0  0  2  0  0 11  0  1  0  0  0  0  0
  5 |   2  0  0  1  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
  6 |   8]",3.389958620071411,"DecisionTreeClassifier(criterion='entropy', max_depth=40)"
  7 | 2,orginal-pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.95,0.9297008547008547,0.95,0.9379019607843138,0.9356395816572808,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
  8 |   0  0  1  0  0  0  1  0  0  0  0  7]",0.22979021072387695,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)"
  9 | 3,orginal-pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 40, 'min_samples_leaf': 2, 'min_samples_split': 10}",0.89937106918239,0.825,0.8111111111111111,0.825,0.8144117647058824,0.7745571658615137,"[ 6  0  0  0  0  2  0  8  0  0  0  0  0  1 11  0  0  0  0  0  1  2  0  0
 10 |   0  0  0  0  0  1  2  0  0  0  0  6]",0.6804001331329346,"DecisionTreeClassifier(criterion='entropy', max_depth=40, min_samples_leaf=2,
 11 |                        min_samples_split=10)"
 12 | 4,orginal-lda,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
 13 |   0  0  0  0  1  0  0  0  0  0  0  8]",0.11695146560668945,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)"
 14 | 5,orginal-lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 10}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
 15 |   0  0  0  0  1  0  0  0  0  0  0  8]",0.188154935836792,"DecisionTreeClassifier(criterion='entropy', max_depth=30, min_samples_split=10)"
 16 | 6,orginal-split pca,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}",0.6666666666666666,0.625,0.6489010989010989,0.625,0.6327002801120448,0.5230524642289349,"[6 1 0 0 1 0 0 0 5 2 1 0 0 0 0 0 8 0 0 1 3 0 0 1 2 0 0 0 0 0 0 0 0 0 0 1 0
 17 |  0 0 0 0 0 2 0 2 0 0 0 4]",0.11127662658691406,"KNeighborsClassifier(metric='euclidean', n_neighbors=7)"
 18 | 7,orginal-split pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 30, 'min_samples_leaf': 4, 'min_samples_split': 2}",0.7735849056603774,0.55,0.6736904761904762,0.55,0.5898268398268398,0.4533029612756265,"[6 1 0 0 1 0 0 0 5 1 2 0 0 0 0 0 7 0 0 4 1 0 0 1 2 0 0 0 0 0 0 0 0 0 0 1 0
 19 |  0 0 0 0 0 1 1 1 0 0 3 2]",0.1759507656097412,"DecisionTreeClassifier(criterion='entropy', max_depth=30, min_samples_leaf=4)"
 20 | 8,orginal-split lda,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.875,0.8535714285714284,0.875,0.861923076923077,0.8385794995964487,"[ 8  0  0  0  0  0  0  7  0  1  0  0  0  0 12  0  0  0  0  1  0  2  0  0
 21 |   0  0  0  0  0  1  0  0  2  0  0  6]",0.10943460464477539,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)"
 22 | 9,orginal-split lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10}",0.9245283018867925,0.875,0.9199999999999999,0.875,0.889345238095238,0.8427672955974843,"[ 7  0  0  0  1  0  0  0  6  0  2  0  0  0  0  0 11  0  0  0  1  0  0  0
 23 |   3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  1  0  0  0
 24 |   7]",0.18211960792541504,"DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_leaf=4,
 25 |                        min_samples_split=10)"
 26 | 10,standard_scaled-original,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.975,0.951923076923077,0.975,0.9630000000000001,0.9678197908286403,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
 27 |   0  0  1  0  0  0  0  0  0  0  0  8]",0.5977652072906494,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)"
 28 | 11,standard_scaled-original,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 5}",0.9937106918238994,0.85,0.840967365967366,0.85,0.8352781954887218,0.8061389337641357,"[ 8  0  0  0  0  0  0  5  1  0  0  2  0  1 11  0  0  0  0  0  0  2  0  1
 29 |   0  0  1  0  0  0  0  0  0  0  0  8]",3.612668752670288,"DecisionTreeClassifier(criterion='entropy', max_depth=20, min_samples_split=5)"
 30 | 12,standard_scaled-pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}",1.0,0.95,0.93125,0.95,0.9392857142857143,0.9361022364217253,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
 31 |   0  0  0  0  0  1  0  0  0  1  0  7]",0.27402448654174805,"KNeighborsClassifier(metric='manhattan', n_neighbors=3, weights='distance')"
 32 | 13,standard_scaled-pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 10}",0.9245283018867925,0.8,0.8541666666666666,0.8,0.8071794871794872,0.7446129289704708,"[ 7  0  0  0  1  0  0  0  5  2  1  0  0  0  0  0 12  0  0  0  0  0  0  0
 33 |   2  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  2  0  0  1
 34 |   5]",0.7710146903991699,"DecisionTreeClassifier(criterion='entropy', min_samples_leaf=2,
 35 |                        min_samples_split=10)"
 36 | 14,standard_scaled-lda,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
 37 |   0  0  0  0  1  0  0  0  0  0  0  8]",0.11973190307617188,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)"
 38 | 15,standard_scaled-lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 40, 'min_samples_leaf': 1, 'min_samples_split': 2}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
 39 |   0  0  0  0  1  0  0  0  0  0  0  8]",0.19898748397827148,"DecisionTreeClassifier(criterion='entropy', max_depth=40)"
 40 | 16,standard_scaled-split pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 13, 'weights': 'uniform'}",0.7295597484276729,0.7,0.7158882783882783,0.7,0.7021288515406162,0.6175298804780877,"[ 6  0  0  0  0  2  0  6  0  2  0  0  0  0 10  0  1  1  0  0  1  2  0  0
 41 |   1  0  0  0  0  0  2  0  2  0  0  4]",0.12825870513916016,"KNeighborsClassifier(metric='manhattan', n_neighbors=13)"
 42 | 17,standard_scaled-split pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 2}",0.7987421383647799,0.575,0.6310389610389611,0.575,0.589945652173913,0.47571318427139553,"[5 1 0 0 1 0 1 0 5 1 2 0 0 0 0 0 8 0 0 3 1 0 0 0 3 0 0 0 0 0 0 0 0 0 0 1 0
 43 |  0 0 0 0 0 2 1 2 0 0 1 2]",0.22144865989685059,"DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_leaf=4)"
 44 | 18,standard_scaled-split lda,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.875,0.8535714285714284,0.875,0.861923076923077,0.8385794995964487,"[ 8  0  0  0  0  0  0  7  0  1  0  0  0  0 12  0  0  0  0  1  0  2  0  0
 45 |   0  0  0  0  0  1  0  0  2  0  0  6]",0.11701035499572754,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)"
 46 | 19,standard_scaled-split lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 20, 'min_samples_leaf': 4, 'min_samples_split': 10}",0.9245283018867925,0.875,0.9199999999999999,0.875,0.889345238095238,0.8427672955974843,"[ 7  0  0  0  1  0  0  0  6  0  2  0  0  0  0  0 11  0  0  0  1  0  0  0
 47 |   3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  1  0  0  0
 48 |   7]",0.1955115795135498,"DecisionTreeClassifier(criterion='entropy', max_depth=20, min_samples_leaf=4,
 49 |                        min_samples_split=10)"
 50 | 20,minmax_scaled-original,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}",1.0,0.95,0.9297008547008547,0.95,0.9379019607843138,0.9356395816572808,"[ 8  0  0  0  0  0  0  7  1  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
 51 |   0  0  0  0  0  1  0  0  0  0  0  8]",0.6892731189727783,"KNeighborsClassifier(metric='manhattan', n_neighbors=3, weights='distance')"
 52 | 21,minmax_scaled-original,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5}",0.9748427672955975,0.825,0.8366666666666667,0.825,0.8223015873015873,0.7738287560581584,"[ 8  0  0  0  0  0  0  0  5  3  0  0  0  0  0  1 11  0  0  0  0  0  0  0
 53 |   2  0  0  1  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  1  0
 54 |   7]",3.1425938606262207,"DecisionTreeClassifier(criterion='entropy', min_samples_leaf=2,
 55 |                        min_samples_split=5)"
 56 | 22,minmax_scaled-pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}",1.0,0.95,0.93125,0.95,0.9392857142857143,0.9361022364217253,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
 57 |   0  0  0  0  0  1  0  0  0  1  0  7]",0.2655339241027832,"KNeighborsClassifier(metric='manhattan', n_neighbors=3, weights='distance')"
 58 | 23,minmax_scaled-pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 40, 'min_samples_leaf': 2, 'min_samples_split': 5}",0.9685534591194969,0.775,0.7986263736263737,0.775,0.785,0.7140587768069897,"[ 7  0  0  0  1  0  0  0  5  1  2  0  0  0  0  1 10  0  1  0  0  0  0  2
 59 |   1  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
 60 |   8]",0.5482699871063232,"DecisionTreeClassifier(criterion='entropy', max_depth=40, min_samples_leaf=2,
 61 |                        min_samples_split=5)"
 62 | 24,minmax_scaled-lda,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
 63 |   0  0  0  0  1  0  0  0  0  0  0  8]",0.09939718246459961,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)"
 64 | 25,minmax_scaled-lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
 65 |   0  0  0  0  1  0  0  0  0  0  0  8]",0.15778470039367676,"DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_leaf=2)"
 66 | 26,minmax_scaled-split pca,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 11, 'weights': 'uniform'}",0.7735849056603774,0.7,0.7283333333333333,0.7,0.7012726244343892,0.620253164556962,"[ 6  0  0  0  0  2  0  5  0  2  0  1  0  0 10  0  1  1  0  0  0  3  0  0
 67 |   1  0  0  0  0  0  2  0  2  0  0  4]",0.09162116050720215,"KNeighborsClassifier(metric='euclidean', n_neighbors=11)"
 68 | 27,minmax_scaled-split pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10}",0.8238993710691824,0.625,0.659047619047619,0.625,0.6393582887700535,0.5297805642633229,[6 0 0 0 0 2 0 6 1 1 0 0 0 0 8 0 2 2 0 1 0 2 0 0 1 0 0 0 0 0 2 1 1 0 1 3],0.14446592330932617,"DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_leaf=4,
 69 |                        min_samples_split=10)"
 70 | 28,minmax_scaled-split lda,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.875,0.8535714285714284,0.875,0.861923076923077,0.8385794995964487,"[ 8  0  0  0  0  0  0  7  0  1  0  0  0  0 12  0  0  0  0  1  0  2  0  0
 71 |   0  0  0  0  0  1  0  0  2  0  0  6]",0.09004521369934082,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)"
 72 | 29,minmax_scaled-split lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 40, 'min_samples_leaf': 4, 'min_samples_split': 2}",0.9245283018867925,0.9,0.9199999999999999,0.9,0.9026785714285716,0.8734177215189873,"[ 8  0  0  0  0  0  0  6  0  2  0  0  0  0 11  0  0  1  0  0  0  3  0  0
 73 |   0  0  0  0  1  0  0  0  1  0  0  7]",0.16330218315124512,"DecisionTreeClassifier(criterion='entropy', max_depth=40, min_samples_leaf=4)"
 74 | 30,maxabs_scaled-original,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.9,0.89625,0.9,0.8890476190476191,0.8709677419354839,"[ 8  0  0  0  0  0  0  6  1  1  0  0  0  0 12  0  0  0  0  0  0  3  0  0
 75 |   0  0  1  0  0  0  0  0  1  0  0  7]",0.5694336891174316,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)"
 76 | 31,maxabs_scaled-original,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 20, 'min_samples_leaf': 2, 'min_samples_split': 5}",0.9748427672955975,0.8,0.8366666666666667,0.8,0.8089682539682539,0.7431781701444622,"[ 7  0  0  0  1  0  0  0  5  3  0  0  0  0  0  1 11  0  0  0  0  0  0  0
 77 |   2  0  0  1  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  1  0
 78 |   7]",3.2607643604278564,"DecisionTreeClassifier(criterion='entropy', max_depth=20, min_samples_leaf=2,
 79 |                        min_samples_split=5)"
 80 | 32,maxabs_scaled-pca,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.925,0.908173076923077,0.925,0.9139523809523811,0.9038461538461539,"[ 8  0  0  0  0  0  0  7  0  1  0  0  0  0 12  0  0  0  0  0  0  3  0  0
 81 |   0  1  0  0  0  0  0  0  1  0  0  7]",0.2538721561431885,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)"
 82 | 33,maxabs_scaled-pca,DecisionTreeClassifier,"{'criterion': 'gini', 'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 5}",0.9685534591194969,0.675,0.7726190476190476,0.675,0.6906759906759906,0.5950155763239875,"[ 8  0  0  0  0  0  0  0  2  2  2  2  0  0  0  1 10  1  0  0  0  0  0  1
 83 |   2  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  1  0  0  2
 84 |   5]",0.5452220439910889,"DecisionTreeClassifier(max_depth=20, min_samples_split=5)"
 85 | 34,maxabs_scaled-lda,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
 86 |   0  0  0  0  1  0  0  0  0  0  0  8]",0.09782814979553223,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)"
 87 | 35,maxabs_scaled-lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 10}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
 88 |   0  0  0  0  1  0  0  0  0  0  0  8]",0.15465188026428223,"DecisionTreeClassifier(criterion='entropy', min_samples_split=10)"
 89 | 36,maxabs_scaled-split pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 13, 'weights': 'uniform'}",0.7547169811320755,0.7,0.7091025641025641,0.7,0.6912726244343892,0.6169193934557063,"[ 6  0  0  0  0  2  0  5  1  2  0  0  0  0 10  0  0  2  0  0  0  3  0  0
 90 |   1  0  0  0  0  0  2  0  2  0  0  4]",0.08480620384216309,"KNeighborsClassifier(metric='manhattan', n_neighbors=13)"
 91 | 37,maxabs_scaled-split pca,DecisionTreeClassifier,"{'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10}",0.8238993710691824,0.65,0.6654761904761904,0.65,0.6533882783882784,0.5534290271132376,"[ 5  0  1  0  1  0  1  0  5  2  1  0  0  0  0  0 10  0  0  1  1  0  1  0
 92 |   2  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  2  1  1  0  0  0
 93 |   4]",0.15716028213500977,"DecisionTreeClassifier(max_depth=10, min_samples_leaf=4, min_samples_split=10)"
 94 | 38,maxabs_scaled-split lda,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.875,0.8535714285714284,0.875,0.861923076923077,0.8385794995964487,"[ 8  0  0  0  0  0  0  7  0  1  0  0  0  0 12  0  0  0  0  1  0  2  0  0
 95 |   0  0  0  0  0  1  0  0  2  0  0  6]",0.09535479545593262,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)"
 96 | 39,maxabs_scaled-split lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 2}",0.9245283018867925,0.875,0.9199999999999999,0.875,0.889345238095238,0.8427672955974843,"[ 7  0  0  0  1  0  0  0  6  0  2  0  0  0  0  0 11  0  0  0  1  0  0  0
 97 |   3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  1  0  0  0
 98 |   7]",0.1549670696258545,"DecisionTreeClassifier(criterion='entropy', min_samples_leaf=4)"
 99 | 40,robust_scaled-original,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.975,0.951923076923077,0.975,0.9630000000000001,0.9678197908286403,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
100 |   0  0  1  0  0  0  0  0  0  0  0  8]",0.6036934852600098,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)"
101 | 41,robust_scaled-original,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 2}",1.0,0.85,0.9,0.85,0.8576556776556776,0.8067632850241546,"[ 7  0  0  0  1  0  0  0  5  3  0  0  0  0  0  0 12  0  0  0  0  0  0  0
102 |   2  1  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
103 |   8]",3.1773626804351807,"DecisionTreeClassifier(criterion='entropy', max_depth=30)"
104 | 42,robust_scaled-pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.95,0.9321428571428572,0.95,0.9385897435897436,0.9354317998385795,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
105 |   0  0  1  0  0  0  0  0  1  0  0  7]",0.13072633743286133,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)"
106 | 43,robust_scaled-pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 20, 'min_samples_leaf': 2, 'min_samples_split': 5}",0.949685534591195,0.825,0.875,0.825,0.8324175824175825,0.7752808988764045,"[ 7  0  0  0  1  0  0  0  6  1  1  0  0  0  0  0 12  0  0  0  0  0  0  1
107 |   2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  1  0  2  0  0  0
108 |   5]",0.6361377239227295,"DecisionTreeClassifier(criterion='entropy', max_depth=20, min_samples_leaf=2,
109 |                        min_samples_split=5)"
110 | 44,robust_scaled-lda,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
111 |   0  0  0  0  1  0  0  0  0  0  0  8]",0.09947061538696289,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)"
112 | 45,robust_scaled-lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
113 |   0  0  0  0  1  0  0  0  0  0  0  8]",0.15237903594970703,"DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=5)"
114 | 46,robust_scaled-split pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 9, 'weights': 'uniform'}",0.6981132075471698,0.7,0.7063644688644689,0.7,0.6983193277310925,0.6175298804780877,"[ 6  1  0  0  0  1  0  6  0  2  0  0  0  0 10  0  1  1  0  0  1  2  0  0
115 |   1  0  0  0  0  0  2  0  2  0  0  4]",0.0860593318939209,"KNeighborsClassifier(metric='manhattan', n_neighbors=9)"
116 | 47,robust_scaled-split pca,DecisionTreeClassifier,"{'criterion': 'gini', 'max_depth': 20, 'min_samples_leaf': 4, 'min_samples_split': 5}",0.7861635220125787,0.55,0.5792582417582418,0.55,0.5605714285714286,0.4330708661417323,"[4 1 0 1 1 0 1 0 5 2 1 0 0 0 0 0 8 0 0 2 2 0 0 1 2 0 0 0 0 0 0 0 0 0 0 1 0
117 |  0 0 0 0 0 2 1 2 0 0 0 3]",0.15172386169433594,"DecisionTreeClassifier(max_depth=20, min_samples_leaf=4, min_samples_split=5)"
118 | 48,robust_scaled-split lda,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.875,0.8535714285714284,0.875,0.861923076923077,0.8385794995964487,"[ 8  0  0  0  0  0  0  7  0  1  0  0  0  0 12  0  0  0  0  1  0  2  0  0
119 |   0  0  0  0  0  1  0  0  2  0  0  6]",0.09231424331665039,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)"
120 | 49,robust_scaled-split lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 5}",0.9245283018867925,0.875,0.9199999999999999,0.875,0.889345238095238,0.8427672955974843,"[ 7  0  0  0  1  0  0  0  6  0  2  0  0  0  0  0 11  0  0  0  1  0  0  0
121 |   3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  1  0  0  0
122 |   7]",0.16288161277770996,"DecisionTreeClassifier(criterion='entropy', min_samples_leaf=4,
123 |                        min_samples_split=5)"
124 | 50,normalized-original,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.9,0.8927777777777777,0.9,0.8879971988795518,0.8704453441295547,"[ 8  0  0  0  0  0  0  7  1  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
125 |   0  0  1  0  0  0  1  0  1  0  0  6]",0.6826939582824707,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)"
126 | 51,normalized-original,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 40, 'min_samples_leaf': 2, 'min_samples_split': 2}",0.9622641509433962,0.85,0.8616666666666667,0.85,0.848968253968254,0.8067632850241546,"[ 8  0  0  0  0  0  0  5  3  0  0  0  0  1 11  0  0  0  0  0  0  3  0  0
127 |   0  0  1  0  0  0  0  0  0  0  1  7]",2.462327241897583,"DecisionTreeClassifier(criterion='entropy', max_depth=40, min_samples_leaf=2)"
128 | 52,normalized-pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.9,0.8927777777777777,0.9,0.8879971988795518,0.8704453441295547,"[ 8  0  0  0  0  0  0  7  1  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
129 |   0  0  1  0  0  0  1  0  1  0  0  6]",0.12588214874267578,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)"
130 | 53,normalized-pca,DecisionTreeClassifier,"{'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 5}",0.9182389937106918,0.8,0.8333333333333333,0.8,0.7824102368220015,0.7415185783521809,"[ 6  0  2  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
131 |   0  0  1  0  0  0  3  0  1  0  1  3]",0.46621108055114746,"DecisionTreeClassifier(max_depth=10, min_samples_leaf=4, min_samples_split=5)"
132 | 54,normalized-lda,KNeighborsClassifier,"{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,1.0,1.0,1.0,1.0,1.0,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
133 |   0  0  0  0  1  0  0  0  0  0  0  8]",0.09972310066223145,"KNeighborsClassifier(metric='euclidean', n_neighbors=1)"
134 | 55,normalized-lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5}",1.0,0.95,0.96,0.95,0.9492063492063492,0.9362041467304625,"[ 8  0  0  0  0  0  0  8  0  0  0  0  0  0 12  0  0  0  0  0  0  3  0  0
135 |   0  0  0  0  1  0  2  0  0  0  0  6]",0.14361000061035156,"DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=5)"
136 | 56,normalized-split pca,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 11, 'weights': 'uniform'}",0.6792452830188679,0.7,0.7159722222222222,0.7,0.6861344537815126,0.6141479099678457,"[ 7  0  0  0  1  0  0  0  5  1  2  0  0  0  0  0 11  0  0  0  1  0  0  1
137 |   2  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  1  1  3  0  0  0
138 |   3]",0.09440398216247559,"KNeighborsClassifier(metric='manhattan', n_neighbors=11)"
139 | 57,normalized-split pca,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 10}",0.7735849056603774,0.55,0.5639682539682539,0.55,0.5500188536953242,0.4339622641509434,"[5 1 0 1 1 0 0 0 5 1 2 0 0 0 0 0 8 0 0 1 3 0 0 1 2 0 0 0 0 0 0 0 0 0 0 1 0
140 |  0 0 0 0 0 1 3 2 0 0 0 2]",0.1540205478668213,"DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=10)"
141 | 58,normalized-split lda,KNeighborsClassifier,"{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}",1.0,0.825,0.8419642857142857,0.825,0.8219780219780219,0.7763578274760383,"[ 8  0  0  0  0  0  0  0  5  1  1  1  0  0  0  0 11  0  0  0  1  0  0  0
142 |   3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  2  0  0  0
143 |   6]",0.0857245922088623,"KNeighborsClassifier(metric='manhattan', n_neighbors=1)"
144 | 59,normalized-split lda,DecisionTreeClassifier,"{'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5}",0.9748427672955975,0.875,0.8904545454545454,0.875,0.8755422647527912,0.8415213946117274,[8 0 0 0 0 0 0 7 0 1 0 0 0 0 9 0 0 3 0 0 1 2 0 0 0 0 0 0 1 0 0 0 0 0 0 8],0.16163039207458496,"DecisionTreeClassifier(criterion='entropy', min_samples_split=5)"
145 | 


--------------------------------------------------------------------------------