├── DBSCAN
    └── sample1.py
├── DeepLearning
    ├── customer_categorical_order_prediction
    │   ├── README.md
    │   ├── best_model.keras
    │   ├── main.py
    │   ├── models
    │   │   └── saved
    │   │   │   ├── category_1_model.keras
    │   │   │   ├── category_2_model.keras
    │   │   │   ├── category_3_model.keras
    │   │   │   ├── category_4_model.keras
    │   │   │   ├── category_5_model.keras
    │   │   │   ├── category_6_model.keras
    │   │   │   ├── category_7_model.keras
    │   │   │   └── category_8_model.keras
    │   ├── reports
    │   │   ├── confusion_matrices
    │   │   │   ├── confusion_matrix_Category_1.png
    │   │   │   ├── confusion_matrix_Category_2.png
    │   │   │   ├── confusion_matrix_Category_3.png
    │   │   │   ├── confusion_matrix_Category_4.png
    │   │   │   ├── confusion_matrix_Category_5.png
    │   │   │   ├── confusion_matrix_Category_6.png
    │   │   │   ├── confusion_matrix_Category_7.png
    │   │   │   └── confusion_matrix_Category_8.png
    │   │   ├── evaluation_reports
    │   │   │   ├── evaluation_report_Category_1.txt
    │   │   │   ├── evaluation_report_Category_2.txt
    │   │   │   ├── evaluation_report_Category_3.txt
    │   │   │   ├── evaluation_report_Category_4.txt
    │   │   │   ├── evaluation_report_Category_5.txt
    │   │   │   ├── evaluation_report_Category_6.txt
    │   │   │   ├── evaluation_report_Category_7.txt
    │   │   │   └── evaluation_report_Category_8.txt
    │   │   ├── precision_recall_curves
    │   │   │   ├── precision_recall_curve_Category_1.png
    │   │   │   ├── precision_recall_curve_Category_2.png
    │   │   │   ├── precision_recall_curve_Category_3.png
    │   │   │   ├── precision_recall_curve_Category_4.png
    │   │   │   ├── precision_recall_curve_Category_5.png
    │   │   │   ├── precision_recall_curve_Category_6.png
    │   │   │   ├── precision_recall_curve_Category_7.png
    │   │   │   └── precision_recall_curve_Category_8.png
    │   │   ├── roc_curves
    │   │   │   ├── roc_curve_Category_1.png
    │   │   │   ├── roc_curve_Category_2.png
    │   │   │   ├── roc_curve_Category_3.png
    │   │   │   ├── roc_curve_Category_4.png
    │   │   │   ├── roc_curve_Category_5.png
    │   │   │   ├── roc_curve_Category_6.png
    │   │   │   ├── roc_curve_Category_7.png
    │   │   │   └── roc_curve_Category_8.png
    │   │   └── training_history
    │   │   │   ├── training_history_Category_1.png
    │   │   │   ├── training_history_Category_2.png
    │   │   │   ├── training_history_Category_3.png
    │   │   │   ├── training_history_Category_4.png
    │   │   │   ├── training_history_Category_5.png
    │   │   │   ├── training_history_Category_6.png
    │   │   │   ├── training_history_Category_7.png
    │   │   │   └── training_history_Category_8.png
    │   ├── requirements.txt
    │   ├── setup.py
    │   ├── src
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-312.pyc
    │   │   │   └── config.cpython-312.pyc
    │   │   ├── config.py
    │   │   ├── data
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-312.pyc
    │   │   │   │   ├── database.cpython-312.pyc
    │   │   │   │   └── feature_engineering.cpython-312.pyc
    │   │   │   ├── database.py
    │   │   │   └── feature_engineering.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-312.pyc
    │   │   │   │   ├── model_evaluation.cpython-312.pyc
    │   │   │   │   └── neural_network.cpython-312.pyc
    │   │   │   ├── model_evaluation.py
    │   │   │   └── neural_network.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │       ├── __init__.cpython-312.pyc
    │   │   │       └── helpers.cpython-312.pyc
    │   │   │   └── helpers.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── test_data.py
    │   │   └── test_models.py
    ├── customer_order_prediction
    │   ├── data
    │   │   └── query.sql
    │   ├── main.py
    │   ├── outputs
    │   │   └── model.h5
    │   └── src
    │   │   ├── __pycache__
    │   │       ├── config.cpython-312.pyc
    │   │       ├── data_loader.cpython-312.pyc
    │   │       ├── evaluate.cpython-312.pyc
    │   │       ├── model.cpython-312.pyc
    │   │       ├── preprocessing.cpython-312.pyc
    │   │       └── train.cpython-312.pyc
    │   │   ├── config.py
    │   │   ├── data_loader.py
    │   │   ├── evaluate.py
    │   │   ├── model.py
    │   │   ├── preprocessing.py
    │   │   └── train.py
    ├── customer_order_risk
    │   ├── __pycache__
    │   │   └── config.cpython-312.pyc
    │   ├── best_model.keras
    │   ├── config.py
    │   ├── main.py
    │   ├── requirements.txt
    │   └── src
    │   │   ├── __pycache__
    │   │       ├── database.cpython-312.pyc
    │   │       ├── feature_engineering.cpython-312.pyc
    │   │       └── model.cpython-312.pyc
    │   │   ├── database.py
    │   │   ├── feature_engineering.py
    │   │   └── model.py
    ├── project1.py
    ├── sample1.py
    └── sample2.py
├── DesicionTrees
    ├── __pycache__
    │   └── main.cpython-313.pyc
    ├── credit_model.pkl
    ├── main.py
    └── sample1.py
├── KMeans
    └── sample1.py
├── KNN
    ├── knn_model.pkl
    ├── sample1.py
    ├── sample2.py
    └── sample3.py
├── NaiveBayes
    └── sample1.py
├── RandomForest
    └── sample1.py
└── SVM
    ├── sample1.py
    └── sample2.py


/DBSCAN/sample1.py:
--------------------------------------------------------------------------------
 1 | #Müşterilerin alışveriş davranışlarına göre gruplanması ve aykırı verilerin keşfi
 2 | 
 3 | #order_details,customers,orders
 4 | 
 5 | import pandas as pd
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | import psycopg2
 9 | from sqlalchemy import create_engine
10 | from sklearn.preprocessing import StandardScaler
11 | from sklearn.cluster import DBSCAN
12 | from sklearn.neighbors import NearestNeighbors
13 | from kneed import KneeLocator
14 | 
15 | user = "postgres"
16 | password = "12345"
17 | host = "localhost"
18 | port = "5432"
19 | database = "northwind"
20 | 
21 | engine = create_engine(f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}")
22 | 
23 | query = """
24 | select 
25 | c.customer_id,
26 | count(o.order_id) as total_orders,
27 | sum(od.unit_price*od.quantity) as total_spent,
28 | avg(od.unit_price*od.quantity) as avg_order_value
29 | from customers c inner join orders o
30 | on c.customer_id =o.customer_id
31 | inner join order_details od
32 | on o.order_id = od.order_id
33 | group by c.customer_id
34 | having count(o.order_id)>0
35 | """
36 | 
37 | df = pd.read_sql_query(query,engine)
38 | print(df.head())
39 | 
40 | X = df[["total_orders","total_spent","avg_order_value"]]
41 | 
42 | scaler = StandardScaler()
43 | X_scaled = scaler.fit_transform(X)
44 | 
45 | def find_optimal_eps(X_scaled,min_samples=3):
46 |     neighbors = NearestNeighbors(n_neighbors=min_samples).fit(X_scaled)
47 |     distances,_  = neighbors.kneighbors(X_scaled)
48 | 
49 |     distances = np.sort(distances[:,min_samples-1])
50 | 
51 |     kneedle = KneeLocator(range(len(distances)), distances, curve='convex', direction='increasing')
52 |     optimal_eps = distances[kneedle.elbow]
53 | 
54 |     plt.figure(figsize=(10, 6))
55 |     plt.plot(distances)
56 |     plt.axvline(x=kneedle.elbow, color='r', linestyle='--', label=f'Optimal eps: {optimal_eps:.2f}')
57 |     plt.xlabel('Points sorted by distance')
58 |     plt.ylabel(f'{min_samples}-th nearest neighbor distance')
59 |     plt.title('Elbow Method for Optimal eps')
60 |     plt.legend()
61 |     plt.grid(True)
62 |     plt.show()
63 | 
64 |     return optimal_eps
65 | 
66 | optimal_eps = find_optimal_eps(X_scaled)
67 | dbscan = DBSCAN(eps=optimal_eps,min_samples=3)
68 | 
69 | df["cluster"] = dbscan.fit_predict(X_scaled)
70 | 
71 | plt.figure(figsize=(10, 6))
72 | plt.scatter(df['total_orders'], df['total_spent'], c=df['cluster'], cmap='plasma', s=60)
73 | plt.xlabel("Toplam Sipariş Sayısı")
74 | plt.ylabel("Toplam Harcama")
75 | plt.title("Müşteri Segmentasyonu (DBSCAN)")
76 | plt.grid(True)
77 | plt.colorbar(label='Küme No')
78 | plt.show()
79 | 
80 | outliers = df[df["cluster"]==-1]
81 | print("Aykırı veri sayısı : ", len(outliers))
82 | print(outliers[["customer_id","total_orders","total_spent"]])


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/README.md:
--------------------------------------------------------------------------------
 1 | # Customer Category Purchase Prediction
 2 | 
 3 | Bu proje, müşterilerin geçmiş satın alma davranışlarına dayanarak yeni ürün kategorilerinde satın alma olasılıklarını tahmin eden bir derin öğrenme modeli içerir.
 4 | 
 5 | ## Proje Yapısı
 6 | 
 7 | ```
 8 | customer_categorical_order_prediction/
 9 | ├── src/
10 | │   ├── data/
11 | │   │   ├── __init__.py
12 | │   │   ├── database.py
13 | │   │   └── feature_engineering.py
14 | │   ├── models/
15 | │   │   ├── __init__.py
16 | │   │   ├── neural_network.py
17 | │   │   └── model_evaluation.py
18 | │   ├── utils/
19 | │   │   ├── __init__.py
20 | │   │   └── helpers.py
21 | │   └── config.py
22 | ├── tests/
23 | │   ├── __init__.py
24 | │   ├── test_data.py
25 | │   └── test_models.py
26 | ├── notebooks/
27 | │   └── exploratory_analysis.ipynb
28 | ├── .env.example
29 | ├── requirements.txt
30 | └── README.md
31 | ```
32 | 
33 | ## Kurulum
34 | 
35 | 1. Sanal ortam oluşturun:
36 | ```bash
37 | python -m venv venv
38 | source venv/bin/activate  # Linux/Mac
39 | venv\Scripts\activate     # Windows
40 | ```
41 | 
42 | 2. Gerekli paketleri yükleyin:
43 | ```bash
44 | pip install -r requirements.txt
45 | ```
46 | 
47 | 3. `.env.example` dosyasını `.env` olarak kopyalayın ve veritabanı bağlantı bilgilerinizi girin.
48 | 
49 | ## Kullanım
50 | 
51 | 1. Veri hazırlama:
52 | ```bash
53 | python src/data/feature_engineering.py
54 | ```
55 | 
56 | 2. Model eğitimi:
57 | ```bash
58 | python src/models/neural_network.py
59 | ```
60 | 
61 | ## Test
62 | 
63 | ```bash
64 | pytest tests/
65 | ```
66 | 
67 | ## Kod Kalitesi
68 | 
69 | - Black ile kod formatlaması
70 | - Flake8 ile kod analizi
71 | - MyPy ile tip kontrolü
72 | - Pytest ile birim testleri 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/best_model.keras:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/best_model.keras


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Main script for customer category prediction.
  3 | """
  4 | import os
  5 | import logging
  6 | from src.data.database import get_customer_order_history
  7 | from src.data.feature_engineering import (
  8 |     create_customer_features,
  9 |     create_category_features,
 10 |     create_time_based_features,
 11 |     prepare_model_data
 12 | )
 13 | from src.models.neural_network import CustomerCategoryPredictor
 14 | from src.models.model_evaluation import (
 15 |     plot_training_history,
 16 |     plot_roc_curve,
 17 |     plot_confusion_matrix,
 18 |     generate_evaluation_report
 19 | )
 20 | from src.config import MODEL_CONFIG, FEATURE_CONFIG
 21 | 
 22 | def main():
 23 |     """
 24 |     Main function to run the customer category prediction model.
 25 |     """
 26 |     # Get raw data
 27 |     raw_data = get_customer_order_history()
 28 |     print("Raw data shape:", raw_data.shape)
 29 |     print("Raw data columns:", raw_data.columns.tolist())
 30 |     
 31 |     # Create features
 32 |     customer_features = create_customer_features(raw_data)
 33 |     print("\nCustomer features shape:", customer_features.shape)
 34 |     print("Customer features columns:", customer_features.columns.tolist())
 35 |     
 36 |     category_features = create_category_features(raw_data)
 37 |     print("\nCategory features shape:", category_features.shape)
 38 |     print("Category features columns:", category_features.columns.tolist())
 39 |     
 40 |     time_features = create_time_based_features(raw_data)
 41 |     print("\nTime features shape:", time_features.shape)
 42 |     print("Time features columns:", time_features.columns.tolist())
 43 |     
 44 |     # Merge features
 45 |     # Start with unique customer-category pairs
 46 |     df = raw_data[['customer_id', 'category_id']].drop_duplicates()
 47 |     
 48 |     # Merge customer features
 49 |     df = df.merge(customer_features, on='customer_id', how='left')
 50 |     
 51 |     # Merge category features
 52 |     df = df.merge(category_features, on=['customer_id', 'category_id'], how='left')
 53 |     
 54 |     # Merge time features (using only the latest time features for each customer)
 55 |     latest_time_features = time_features.sort_values('order_date').groupby('customer_id').last()
 56 |     df = df.merge(latest_time_features, on='customer_id', how='left')
 57 |     
 58 |     # Drop duplicate columns
 59 |     df = df.loc[:, ~df.columns.duplicated()]
 60 |     
 61 |     print("\nFinal features shape:", df.shape)
 62 |     print("Final features columns:", df.columns.tolist())
 63 |     
 64 |     # Prepare model data for each target category
 65 |     for target_category in FEATURE_CONFIG['target_categories']:
 66 |         print(f"\nTraining model for category {target_category}")
 67 |         
 68 |         # Prepare data
 69 |         X_train, X_test, y_train, y_test = prepare_model_data(df, target_category)
 70 |         
 71 |         # Initialize and train model
 72 |         model = CustomerCategoryPredictor(
 73 |             input_dim=X_train.shape[1],
 74 |             hidden_layers=MODEL_CONFIG['hidden_layers'],
 75 |             dropout_rate=MODEL_CONFIG['dropout_rate'],
 76 |             learning_rate=MODEL_CONFIG['learning_rate']
 77 |         )
 78 |         
 79 |         # Train model
 80 |         history = model.train(
 81 |             X_train, y_train,
 82 |             batch_size=MODEL_CONFIG['batch_size'],
 83 |             epochs=MODEL_CONFIG['epochs'],
 84 |             validation_split=MODEL_CONFIG['validation_split']
 85 |         )
 86 |         
 87 |         # Plot training history
 88 |         plot_training_history(history, f"Category_{target_category}")
 89 |         
 90 |         # Make predictions on test set
 91 |         y_pred = model.predict(X_test)
 92 |         
 93 |         # Plot ROC curve
 94 |         plot_roc_curve(y_test, y_pred, f"Category_{target_category}")
 95 |         
 96 |         # Plot confusion matrix
 97 |         plot_confusion_matrix(y_test, y_pred, f"Category_{target_category}")
 98 |         
 99 |         # Generate evaluation report
100 |         generate_evaluation_report(
101 |             y_true=y_test,
102 |             y_pred=y_pred,
103 |             y_pred_proba=y_pred,
104 |             category_name=f"Category_{target_category}",
105 |             threshold=0.5
106 |         )
107 |         
108 |         # Save model
109 |         model_path = os.path.join('models', 'saved', f'category_{target_category}_model.keras')
110 |         model.save(model_path)
111 |         print(f"Model saved to {model_path}")
112 | 
113 | if __name__ == "__main__":
114 |     main() 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/models/saved/category_1_model.keras:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_1_model.keras


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/models/saved/category_2_model.keras:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_2_model.keras


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/models/saved/category_3_model.keras:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_3_model.keras


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/models/saved/category_4_model.keras:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_4_model.keras


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/models/saved/category_5_model.keras:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_5_model.keras


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/models/saved/category_6_model.keras:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_6_model.keras


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/models/saved/category_7_model.keras:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_7_model.keras


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/models/saved/category_8_model.keras:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_8_model.keras


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_1.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_2.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_3.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_4.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_5.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_6.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_7.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_8.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_1.txt:
--------------------------------------------------------------------------------
 1 | Evaluation Report for Category_1
 2 | ==================================================
 3 | 
 4 | Classification threshold: 0.5
 5 | 
 6 |               precision    recall  f1-score   support
 7 | 
 8 |            0       0.86      1.00      0.92       103
 9 |            1       0.00      0.00      0.00        17
10 | 
11 |     accuracy                           0.86       120
12 |    macro avg       0.43      0.50      0.46       120
13 | weighted avg       0.74      0.86      0.79       120
14 | 
15 | Additional Metrics:
16 | --------------------
17 | Number of samples: 120
18 | Number of positive samples: 17
19 | Number of predicted positive samples: [0]
20 | Positive class ratio: 14.17%
21 | 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_2.txt:
--------------------------------------------------------------------------------
 1 | Evaluation Report for Category_2
 2 | ==================================================
 3 | 
 4 | Classification threshold: 0.5
 5 | 
 6 |               precision    recall  f1-score   support
 7 | 
 8 |            0       0.88      1.00      0.94       106
 9 |            1       0.00      0.00      0.00        14
10 | 
11 |     accuracy                           0.88       120
12 |    macro avg       0.44      0.50      0.47       120
13 | weighted avg       0.78      0.88      0.83       120
14 | 
15 | Additional Metrics:
16 | --------------------
17 | Number of samples: 120
18 | Number of positive samples: 14
19 | Number of predicted positive samples: [0]
20 | Positive class ratio: 11.67%
21 | 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_3.txt:
--------------------------------------------------------------------------------
 1 | Evaluation Report for Category_3
 2 | ==================================================
 3 | 
 4 | Classification threshold: 0.5
 5 | 
 6 |               precision    recall  f1-score   support
 7 | 
 8 |            0       0.87      1.00      0.93       104
 9 |            1       0.00      0.00      0.00        16
10 | 
11 |     accuracy                           0.87       120
12 |    macro avg       0.43      0.50      0.46       120
13 | weighted avg       0.75      0.87      0.80       120
14 | 
15 | Additional Metrics:
16 | --------------------
17 | Number of samples: 120
18 | Number of positive samples: 16
19 | Number of predicted positive samples: [0]
20 | Positive class ratio: 13.33%
21 | 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_4.txt:
--------------------------------------------------------------------------------
 1 | Evaluation Report for Category_4
 2 | ==================================================
 3 | 
 4 | Classification threshold: 0.5
 5 | 
 6 |               precision    recall  f1-score   support
 7 | 
 8 |            0       0.87      1.00      0.93       104
 9 |            1       0.00      0.00      0.00        16
10 | 
11 |     accuracy                           0.87       120
12 |    macro avg       0.43      0.50      0.46       120
13 | weighted avg       0.75      0.87      0.80       120
14 | 
15 | Additional Metrics:
16 | --------------------
17 | Number of samples: 120
18 | Number of positive samples: 16
19 | Number of predicted positive samples: [0]
20 | Positive class ratio: 13.33%
21 | 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_5.txt:
--------------------------------------------------------------------------------
 1 | Evaluation Report for Category_5
 2 | ==================================================
 3 | 
 4 | Classification threshold: 0.5
 5 | 
 6 |               precision    recall  f1-score   support
 7 | 
 8 |            0       0.88      1.00      0.94       106
 9 |            1       0.00      0.00      0.00        14
10 | 
11 |     accuracy                           0.88       120
12 |    macro avg       0.44      0.50      0.47       120
13 | weighted avg       0.78      0.88      0.83       120
14 | 
15 | Additional Metrics:
16 | --------------------
17 | Number of samples: 120
18 | Number of positive samples: 14
19 | Number of predicted positive samples: [0]
20 | Positive class ratio: 11.67%
21 | 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_6.txt:
--------------------------------------------------------------------------------
 1 | Evaluation Report for Category_6
 2 | ==================================================
 3 | 
 4 | Classification threshold: 0.5
 5 | 
 6 |               precision    recall  f1-score   support
 7 | 
 8 |            0       0.88      1.00      0.94       106
 9 |            1       0.00      0.00      0.00        14
10 | 
11 |     accuracy                           0.88       120
12 |    macro avg       0.44      0.50      0.47       120
13 | weighted avg       0.78      0.88      0.83       120
14 | 
15 | Additional Metrics:
16 | --------------------
17 | Number of samples: 120
18 | Number of positive samples: 14
19 | Number of predicted positive samples: [0]
20 | Positive class ratio: 11.67%
21 | 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_7.txt:
--------------------------------------------------------------------------------
 1 | Evaluation Report for Category_7
 2 | ==================================================
 3 | 
 4 | Classification threshold: 0.5
 5 | 
 6 |               precision    recall  f1-score   support
 7 | 
 8 |            0       0.89      1.00      0.94       107
 9 |            1       0.00      0.00      0.00        13
10 | 
11 |     accuracy                           0.89       120
12 |    macro avg       0.45      0.50      0.47       120
13 | weighted avg       0.80      0.89      0.84       120
14 | 
15 | Additional Metrics:
16 | --------------------
17 | Number of samples: 120
18 | Number of positive samples: 13
19 | Number of predicted positive samples: [0]
20 | Positive class ratio: 10.83%
21 | 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_8.txt:
--------------------------------------------------------------------------------
 1 | Evaluation Report for Category_8
 2 | ==================================================
 3 | 
 4 | Classification threshold: 0.5
 5 | 
 6 |               precision    recall  f1-score   support
 7 | 
 8 |            0       0.86      1.00      0.92       103
 9 |            1       0.00      0.00      0.00        17
10 | 
11 |     accuracy                           0.86       120
12 |    macro avg       0.43      0.50      0.46       120
13 | weighted avg       0.74      0.86      0.79       120
14 | 
15 | Additional Metrics:
16 | --------------------
17 | Number of samples: 120
18 | Number of positive samples: 17
19 | Number of predicted positive samples: [0]
20 | Positive class ratio: 14.17%
21 | 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_1.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_2.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_3.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_4.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_5.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_6.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_7.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_8.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_1.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_2.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_3.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_4.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_5.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_6.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_7.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_8.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_1.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_2.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_3.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_4.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_5.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_6.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_7.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_8.png


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy>=1.26.0
 2 | pandas>=2.1.0
 3 | scikit-learn>=1.3.2
 4 | tensorflow>=2.15.0
 5 | psycopg2-binary>=2.9.9
 6 | python-dotenv>=1.0.0
 7 | pytest>=7.4.3
 8 | black>=23.11.0
 9 | flake8>=6.1.0
10 | mypy>=1.7.0
11 | matplotlib>=3.8.0
12 | seaborn>=0.13.0 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name="customer_category_prediction",
 5 |     version="0.1.0",
 6 |     packages=find_packages(),
 7 |     install_requires=[
 8 |         "numpy>=1.26.0",
 9 |         "pandas>=2.1.0",
10 |         "scikit-learn>=1.3.2",
11 |         "tensorflow>=2.15.0",
12 |         "psycopg2-binary>=2.9.9",
13 |         "python-dotenv>=1.0.0",
14 |         "pytest>=7.4.3",
15 |         "black>=23.11.0",
16 |         "flake8>=6.1.0",
17 |         "mypy>=1.7.0",
18 |         "matplotlib>=3.8.0",
19 |         "seaborn>=0.13.0",
20 |         "sqlalchemy>=2.0.0"
21 |     ],
22 |     python_requires=">=3.12.0",
23 | ) 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Customer Category Prediction package.
3 | """ 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/__pycache__/config.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/__pycache__/config.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Configuration settings for the project.
 3 | """
 4 | import os
 5 | from typing import Dict, Any
 6 | from dotenv import load_dotenv
 7 | 
 8 | # Load environment variables
 9 | load_dotenv()
10 | 
11 | # Database configuration
12 | DB_CONFIG = {
13 |     'host': os.getenv('DB_HOST', 'localhost'),
14 |     'port': os.getenv('DB_PORT', '5432'),
15 |     'database': os.getenv('DB_NAME', 'northwind'),
16 |     'user': os.getenv('DB_USER', 'postgres'),
17 |     'password': os.getenv('DB_PASSWORD', '12345')  # Empty default password
18 | }
19 | 
20 | # Model configuration
21 | MODEL_CONFIG = {
22 |     'input_dim': 20,  # Number of input features
23 |     'hidden_layers': [64, 32, 16],  # Hidden layer sizes
24 |     'dropout_rate': 0.3,
25 |     'learning_rate': 0.001,
26 |     'batch_size': 32,
27 |     'epochs': 100,
28 |     'early_stopping_patience': 10,
29 |     'validation_split': 0.2
30 | }
31 | 
32 | # Feature engineering configuration
33 | FEATURE_CONFIG = {
34 |     'time_windows': [7, 30, 90, 180],  # Days for rolling features
35 |     'lag_periods': [1, 3, 7, 14],  # Days for lag features
36 |     'percentiles': [0.25, 0.5, 0.75],  # Percentiles for feature calculation
37 |     'min_purchase_count': 3,  # Minimum purchases for customer analysis
38 |     'target_categories': [1, 2, 3, 4, 5, 6, 7, 8]  # Categories to predict
39 | }
40 | 
41 | # Data processing configuration
42 | DATA_CONFIG = {
43 |     'train_test_split': 0.2,
44 |     'random_state': 42,
45 |     'missing_value_strategy': 'mean',  # Options: 'mean', 'median', 'mode', 'drop'
46 |     'feature_scaling': 'standard',  # Options: 'standard', 'minmax', 'robust'
47 |     'categorical_encoding': 'onehot'  # Options: 'onehot', 'label', 'target'
48 | }
49 | 
50 | # Evaluation configuration
51 | EVAL_CONFIG = {
52 |     'metrics': ['accuracy', 'precision', 'recall', 'f1', 'auc'],
53 |     'threshold': 0.5,
54 |     'cv_folds': 5,
55 |     'confidence_threshold': 0.8
56 | }
57 | 
58 | # Path configuration
59 | PATH_CONFIG = {
60 |     'data_dir': 'data',
61 |     'raw_data_dir': 'data/raw',
62 |     'processed_data_dir': 'data/processed',
63 |     'model_dir': 'models',
64 |     'report_dir': 'reports',
65 |     'log_dir': 'logs'
66 | }
67 | 
68 | # Logging configuration
69 | LOG_CONFIG = {
70 |     'level': 'INFO',
71 |     'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
72 |     'date_format': '%Y-%m-%d %H:%M:%S'
73 | }
74 | 
75 | def get_config() -> Dict[str, Any]:
76 |     """
77 |     Get all configuration settings.
78 |     
79 |     Returns:
80 |         Dict[str, Any]: Configuration dictionary
81 |     """
82 |     return {
83 |         'db': DB_CONFIG,
84 |         'model': MODEL_CONFIG,
85 |         'feature': FEATURE_CONFIG,
86 |         'data': DATA_CONFIG,
87 |         'eval': EVAL_CONFIG,
88 |         'path': PATH_CONFIG,
89 |         'log': LOG_CONFIG
90 |     } 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/data/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Data processing and feature engineering package.
3 | """ 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/data/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/data/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/data/__pycache__/database.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/data/__pycache__/database.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/data/__pycache__/feature_engineering.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/data/__pycache__/feature_engineering.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/data/database.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Database connection and query module.
  3 | """
  4 | import os
  5 | from typing import List, Dict, Any, Optional
  6 | import pandas as pd
  7 | from sqlalchemy import create_engine, text
  8 | from sqlalchemy.engine import Engine
  9 | from dotenv import load_dotenv
 10 | from src.config import DB_CONFIG
 11 | 
 12 | # Load environment variables
 13 | load_dotenv()
 14 | 
 15 | def get_database_connection() -> Engine:
 16 |     """
 17 |     Create database connection using environment variables.
 18 |     
 19 |     Returns:
 20 |         Engine: SQLAlchemy database engine
 21 |     """
 22 |     # Get database credentials from config
 23 |     db_host = DB_CONFIG['host']
 24 |     db_port = DB_CONFIG['port']
 25 |     db_name = DB_CONFIG['database']
 26 |     db_user = DB_CONFIG['user']
 27 |     db_password = DB_CONFIG['password']
 28 |     
 29 |     # Create connection string
 30 |     connection_string = f"postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
 31 |     
 32 |     # Create engine
 33 |     engine = create_engine(connection_string)
 34 |     
 35 |     return engine
 36 | 
 37 | def execute_query(query: str, params: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
 38 |     """
 39 |     Execute SQL query and return results as DataFrame.
 40 |     
 41 |     Args:
 42 |         query (str): SQL query to execute
 43 |         params (Dict[str, Any], optional): Query parameters
 44 |         
 45 |     Returns:
 46 |         pd.DataFrame: Query results
 47 |     """
 48 |     engine = get_database_connection()
 49 |     
 50 |     try:
 51 |         with engine.connect() as connection:
 52 |             result = connection.execute(text(query), params or {})
 53 |             return pd.DataFrame(result.fetchall(), columns=result.keys())
 54 |     except Exception as e:
 55 |         raise Exception(f"Error executing query: {str(e)}")
 56 |     finally:
 57 |         engine.dispose()
 58 | 
 59 | def get_customer_category_data() -> pd.DataFrame:
 60 |     """
 61 |     Get customer category purchase data.
 62 |     
 63 |     Returns:
 64 |         pd.DataFrame: Customer category data
 65 |     """
 66 |     query = """
 67 |     WITH customer_category_stats AS (
 68 |         SELECT 
 69 |             c.customer_id,
 70 |             p.category_id,
 71 |             COUNT(DISTINCT o.order_id) as order_count,
 72 |             SUM(od.unit_price * od.quantity * (1 - od.discount)) as total_amount,
 73 |             MAX(o.order_date) as last_order_date
 74 |         FROM customers c
 75 |         JOIN orders o ON c.customer_id = o.customer_id
 76 |         JOIN order_details od ON o.order_id = od.order_id
 77 |         JOIN products p ON od.product_id = p.product_id
 78 |         GROUP BY c.customer_id, p.category_id
 79 |     )
 80 |     SELECT 
 81 |         ccs.customer_id,
 82 |         c.company_name,
 83 |         cat.category_name,
 84 |         ccs.order_count,
 85 |         ccs.total_amount,
 86 |         ccs.last_order_date
 87 |     FROM customer_category_stats ccs
 88 |     JOIN customers c ON ccs.customer_id = c.customer_id
 89 |     JOIN categories cat ON ccs.category_id = cat.category_id
 90 |     ORDER BY ccs.customer_id, ccs.total_amount DESC;
 91 |     """
 92 |     
 93 |     return execute_query(query)
 94 | 
 95 | def get_customer_order_history() -> pd.DataFrame:
 96 |     """
 97 |     Get detailed customer order history.
 98 |     
 99 |     Returns:
100 |         pd.DataFrame: Customer order history
101 |     """
102 |     query = """
103 |     SELECT 
104 |         c.customer_id,
105 |         c.company_name,
106 |         o.order_id,
107 |         o.order_date,
108 |         p.category_id,
109 |         cat.category_name,
110 |         od.unit_price * od.quantity * (1 - od.discount) as total_amount
111 |     FROM customers c
112 |     JOIN orders o ON c.customer_id = o.customer_id
113 |     JOIN order_details od ON o.order_id = od.order_id
114 |     JOIN products p ON od.product_id = p.product_id
115 |     JOIN categories cat ON p.category_id = cat.category_id
116 |     ORDER BY c.customer_id, o.order_date;
117 |     """
118 |     
119 |     df = execute_query(query)
120 |     print("Veritabanından gelen sütunlar:", df.columns.tolist())
121 |     print("\nİlk 5 satır:")
122 |     print(df.head())
123 |     return df
124 | 
125 | def get_category_metrics() -> pd.DataFrame:
126 |     """
127 |     Get category-level metrics.
128 |     
129 |     Returns:
130 |         pd.DataFrame: Category metrics
131 |     """
132 |     query = """
133 |     WITH category_stats AS (
134 |         SELECT 
135 |             p.category_id,
136 |             COUNT(DISTINCT o.customer_id) as unique_customers,
137 |             COUNT(DISTINCT o.order_id) as total_orders,
138 |             SUM(od.unit_price * od.quantity * (1 - od.discount)) as total_revenue,
139 |             AVG(od.unit_price * od.quantity * (1 - od.discount)) as avg_order_value
140 |         FROM orders o
141 |         JOIN order_details od ON o.order_id = od.order_id
142 |         JOIN products p ON od.product_id = p.product_id
143 |         GROUP BY p.category_id
144 |     )
145 |     SELECT 
146 |         cat.category_id,
147 |         cat.category_name,
148 |         cs.unique_customers,
149 |         cs.total_orders,
150 |         cs.total_revenue,
151 |         cs.avg_order_value
152 |     FROM categories cat
153 |     JOIN category_stats cs ON cat.category_id = cs.category_id
154 |     ORDER BY cs.total_revenue DESC;
155 |     """
156 |     
157 |     return execute_query(query) 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/data/feature_engineering.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Feature engineering module for customer category prediction.
  3 | """
  4 | import pandas as pd
  5 | import numpy as np
  6 | from typing import List, Dict, Any, Tuple
  7 | from datetime import datetime, timedelta
  8 | from sklearn.preprocessing import StandardScaler
  9 | from sklearn.model_selection import train_test_split
 10 | from .database import get_customer_category_data
 11 | 
 12 | def create_customer_features(df: pd.DataFrame) -> pd.DataFrame:
 13 |     """
 14 |     Create customer-level features.
 15 |     
 16 |     Args:
 17 |         df (pd.DataFrame): Input DataFrame with customer data
 18 |         
 19 |     Returns:
 20 |         pd.DataFrame: DataFrame with customer features
 21 |     """
 22 |     # Convert order_date to datetime if it's not already
 23 |     if not pd.api.types.is_datetime64_any_dtype(df['order_date']):
 24 |         df['order_date'] = pd.to_datetime(df['order_date'])
 25 |     
 26 |     # Group by customer and calculate metrics
 27 |     customer_features = df.groupby('customer_id').agg({
 28 |         'order_id': 'count',
 29 |         'total_amount': ['sum', 'mean', 'std'],
 30 |         'order_date': ['min', 'max'],
 31 |         'category_id': 'nunique'  # Number of unique categories purchased
 32 |     }).reset_index()
 33 |     
 34 |     # Flatten column names
 35 |     customer_features.columns = ['customer_id', 'total_orders',
 36 |                                'total_spent', 'avg_order_value',
 37 |                                'std_order_value', 'first_order_date',
 38 |                                'last_order_date', 'unique_categories']
 39 |     
 40 |     # Calculate time-based features
 41 |     customer_features['customer_lifetime'] = (
 42 |         customer_features['last_order_date'] - customer_features['first_order_date']
 43 |     ).dt.days
 44 |     
 45 |     customer_features['avg_days_between_orders'] = (
 46 |         customer_features['customer_lifetime'] / customer_features['total_orders']
 47 |     )
 48 |     
 49 |     # Calculate category diversity
 50 |     customer_features['category_diversity'] = (
 51 |         customer_features['unique_categories'] / customer_features['total_orders']
 52 |     )
 53 |     
 54 |     return customer_features
 55 | 
 56 | def create_category_features(df: pd.DataFrame) -> pd.DataFrame:
 57 |     """
 58 |     Create category-level features.
 59 |     
 60 |     Args:
 61 |         df (pd.DataFrame): Input DataFrame with category data
 62 |         
 63 |     Returns:
 64 |         pd.DataFrame: DataFrame with category features
 65 |     """
 66 |     # Convert order_date to datetime if it's not already
 67 |     if not pd.api.types.is_datetime64_any_dtype(df['order_date']):
 68 |         df['order_date'] = pd.to_datetime(df['order_date'])
 69 |     
 70 |     # Group by customer and category
 71 |     category_features = df.groupby(['customer_id', 'category_id']).agg({
 72 |         'order_id': 'count',
 73 |         'total_amount': ['sum', 'mean'],
 74 |         'order_date': ['min', 'max']
 75 |     }).reset_index()
 76 |     
 77 |     # Flatten column names
 78 |     category_features.columns = ['customer_id', 'category_id',
 79 |                                'category_orders', 'category_spent',
 80 |                                'avg_category_order', 'first_category_order',
 81 |                                'last_category_order']
 82 |     
 83 |     # Calculate category-specific metrics
 84 |     category_features['category_lifetime'] = (
 85 |         category_features['last_category_order'] - category_features['first_category_order']
 86 |     ).dt.days
 87 |     
 88 |     category_features['category_order_frequency'] = (
 89 |         category_features['category_lifetime'] / category_features['category_orders']
 90 |     )
 91 |     
 92 |     return category_features
 93 | 
 94 | def create_time_based_features(df: pd.DataFrame) -> pd.DataFrame:
 95 |     """
 96 |     Create time-based features.
 97 |     
 98 |     Args:
 99 |         df (pd.DataFrame): Input DataFrame with temporal data
100 |         
101 |     Returns:
102 |         pd.DataFrame: DataFrame with time-based features
103 |     """
104 |     # Convert order_date to datetime if it's not already
105 |     if not pd.api.types.is_datetime64_any_dtype(df['order_date']):
106 |         df['order_date'] = pd.to_datetime(df['order_date'])
107 |     
108 |     # Extract time components
109 |     df['order_year'] = df['order_date'].dt.year
110 |     df['order_month'] = df['order_date'].dt.month
111 |     df['order_day'] = df['order_date'].dt.day
112 |     df['order_dayofweek'] = df['order_date'].dt.dayofweek
113 |     df['order_quarter'] = df['order_date'].dt.quarter
114 |     
115 |     # Calculate time since last order
116 |     df['days_since_last_order'] = df.groupby('customer_id')['order_date'].diff().dt.days
117 |     
118 |     return df
119 | 
120 | def calculate_customer_metrics(df: pd.DataFrame) -> pd.DataFrame:
121 |     """
122 |     Calculate customer-level metrics.
123 |     
124 |     Args:
125 |         df (pd.DataFrame): Input DataFrame with customer data
126 |         
127 |     Returns:
128 |         pd.DataFrame: DataFrame with customer metrics
129 |     """
130 |     # Convert order_date to datetime if it's not already
131 |     if not pd.api.types.is_datetime64_any_dtype(df['order_date']):
132 |         df['order_date'] = pd.to_datetime(df['order_date'])
133 |     
134 |     # Calculate RFM metrics
135 |     current_date = df['order_date'].max()
136 |     
137 |     rfm = df.groupby('customer_id').agg({
138 |         'order_date': lambda x: (current_date - x.max()).days,  # Recency
139 |         'order_id': 'count',  # Frequency
140 |         'total_amount': 'sum'  # Monetary
141 |     }).reset_index()
142 |     
143 |     rfm.columns = ['customer_id', 'recency', 'frequency', 'monetary']
144 |     
145 |     # Calculate additional metrics
146 |     rfm['avg_order_value'] = rfm['monetary'] / rfm['frequency']
147 |     rfm['purchase_rate'] = rfm['frequency'] / (
148 |         (current_date - df.groupby('customer_id')['order_date'].min()).dt.days
149 |     )
150 |     
151 |     return rfm
152 | 
153 | def prepare_model_data(df: pd.DataFrame,
154 |                       target_category: int,
155 |                       test_size: float = 0.2,
156 |                       random_state: int = 42) -> Tuple[np.ndarray, np.ndarray,
157 |                                                      np.ndarray, np.ndarray]:
158 |     """
159 |     Prepare data for model training.
160 |     
161 |     Args:
162 |         df (pd.DataFrame): Input DataFrame
163 |         target_category (int): Target category ID
164 |         test_size (float): Test set size
165 |         random_state (int): Random state for reproducibility
166 |         
167 |     Returns:
168 |         Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: X_train, X_test, y_train, y_test
169 |     """
170 |     # Use category_id_x as the main category_id column
171 |     df['target'] = (df['category_id_x'] == target_category).astype(int)
172 |     
173 |     # Select features
174 |     feature_cols = [col for col in df.columns if col not in
175 |                    ['customer_id', 'category_id_x', 'category_id_y', 'order_id', 'order_date',
176 |                     'target', 'first_order_date', 'last_order_date',
177 |                     'first_category_order', 'last_category_order',
178 |                     'company_name', 'category_name']]
179 |     
180 |     X = df[feature_cols]
181 |     y = df['target']
182 |     
183 |     # Split data
184 |     X_train, X_test, y_train, y_test = train_test_split(
185 |         X, y, test_size=test_size, random_state=random_state,
186 |         stratify=y
187 |     )
188 |     
189 |     # Scale features
190 |     scaler = StandardScaler()
191 |     X_train = scaler.fit_transform(X_train)
192 |     X_test = scaler.transform(X_test)
193 |     
194 |     return X_train, X_test, y_train, y_test
195 | 
196 | def handle_missing_values(df: pd.DataFrame,
197 |                          strategy: str = 'mean') -> pd.DataFrame:
198 |     """
199 |     Handle missing values in the dataset.
200 |     
201 |     Args:
202 |         df (pd.DataFrame): Input DataFrame
203 |         strategy (str): Strategy for handling missing values
204 |         
205 |     Returns:
206 |         pd.DataFrame: DataFrame with handled missing values
207 |     """
208 |     if strategy == 'mean':
209 |         return df.fillna(df.mean())
210 |     elif strategy == 'median':
211 |         return df.fillna(df.median())
212 |     elif strategy == 'mode':
213 |         return df.fillna(df.mode().iloc[0])
214 |     elif strategy == 'drop':
215 |         return df.dropna()
216 |     else:
217 |         raise ValueError(f"Unknown strategy: {strategy}")
218 | 
219 | def get_train_test_split(df: pd.DataFrame,
220 |                         target_col: str,
221 |                         test_size: float = 0.2,
222 |                         random_state: int = 42) -> Tuple[pd.DataFrame,
223 |                                                        pd.DataFrame,
224 |                                                        pd.Series,
225 |                                                        pd.Series]:
226 |     """
227 |     Split data into training and test sets.
228 |     
229 |     Args:
230 |         df (pd.DataFrame): Input DataFrame
231 |         target_col (str): Target column name
232 |         test_size (float): Test set size
233 |         random_state (int): Random state for reproducibility
234 |         
235 |     Returns:
236 |         Tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]: X_train, X_test, y_train, y_test
237 |     """
238 |     X = df.drop(columns=[target_col])
239 |     y = df[target_col]
240 |     
241 |     return train_test_split(
242 |         X, y, test_size=test_size, random_state=random_state,
243 |         stratify=y
244 |     ) 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/models/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Neural network models and evaluation package.
3 | """ 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/models/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/models/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/models/__pycache__/model_evaluation.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/models/__pycache__/model_evaluation.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/models/__pycache__/neural_network.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/models/__pycache__/neural_network.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/models/model_evaluation.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Model evaluation and visualization module.
  3 | """
  4 | import os
  5 | import numpy as np
  6 | import pandas as pd
  7 | import matplotlib.pyplot as plt
  8 | import seaborn as sns
  9 | from sklearn.metrics import (
 10 |     confusion_matrix,
 11 |     classification_report,
 12 |     roc_curve,
 13 |     auc,
 14 |     precision_recall_curve
 15 | )
 16 | from typing import Dict, Any, Tuple
 17 | import json
 18 | 
 19 | def plot_training_history(history: Dict[str, Any], category_name: str):
 20 |     """
 21 |     Plot training history metrics.
 22 |     
 23 |     Args:
 24 |         history (Dict[str, Any]): Training history dictionary
 25 |         category_name (str): Name of the category being predicted
 26 |     """
 27 |     # Create reports directory if it doesn't exist
 28 |     reports_dir = os.path.join('reports', 'training_history')
 29 |     os.makedirs(reports_dir, exist_ok=True)
 30 |     
 31 |     # Plot metrics
 32 |     plt.figure(figsize=(12, 4))
 33 |     
 34 |     # Plot loss
 35 |     plt.subplot(1, 2, 1)
 36 |     plt.plot(history['loss'], label='Training Loss')
 37 |     plt.plot(history['val_loss'], label='Validation Loss')
 38 |     plt.title(f'Loss - {category_name}')
 39 |     plt.xlabel('Epoch')
 40 |     plt.ylabel('Loss')
 41 |     plt.legend()
 42 |     
 43 |     # Plot accuracy
 44 |     plt.subplot(1, 2, 2)
 45 |     plt.plot(history['accuracy'], label='Training Accuracy')
 46 |     plt.plot(history['val_accuracy'], label='Validation Accuracy')
 47 |     plt.title(f'Accuracy - {category_name}')
 48 |     plt.xlabel('Epoch')
 49 |     plt.ylabel('Accuracy')
 50 |     plt.legend()
 51 |     
 52 |     plt.tight_layout()
 53 |     
 54 |     # Save plot
 55 |     save_path = os.path.join(reports_dir, f'training_history_{category_name}.png')
 56 |     plt.savefig(save_path)
 57 |     plt.close()
 58 | 
 59 | def plot_confusion_matrix(y_true: np.ndarray,
 60 |                          y_pred: np.ndarray,
 61 |                          category_name: str,
 62 |                          threshold: float = 0.5):
 63 |     """
 64 |     Plot confusion matrix.
 65 |     
 66 |     Args:
 67 |         y_true (np.ndarray): True labels
 68 |         y_pred (np.ndarray): Predicted probabilities
 69 |         category_name (str): Name of the category being predicted
 70 |         threshold (float): Classification threshold for converting probabilities to binary predictions
 71 |     """
 72 |     # Create reports directory if it doesn't exist
 73 |     reports_dir = os.path.join('reports', 'confusion_matrices')
 74 |     os.makedirs(reports_dir, exist_ok=True)
 75 |     
 76 |     # Convert probabilities to binary predictions
 77 |     y_pred_binary = (y_pred >= threshold).astype(int)
 78 |     
 79 |     # Calculate confusion matrix
 80 |     cm = confusion_matrix(y_true, y_pred_binary)
 81 |     
 82 |     # Plot confusion matrix
 83 |     plt.figure(figsize=(8, 6))
 84 |     sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
 85 |     plt.title(f'Confusion Matrix - {category_name} (threshold={threshold})')
 86 |     plt.xlabel('Predicted')
 87 |     plt.ylabel('True')
 88 |     
 89 |     # Save plot
 90 |     save_path = os.path.join(reports_dir, f'confusion_matrix_{category_name}.png')
 91 |     plt.savefig(save_path)
 92 |     plt.close()
 93 | 
 94 | def plot_roc_curve(y_true: np.ndarray,
 95 |                    y_pred_proba: np.ndarray,
 96 |                    category_name: str):
 97 |     """
 98 |     Plot ROC curve.
 99 |     
100 |     Args:
101 |         y_true (np.ndarray): True labels
102 |         y_pred_proba (np.ndarray): Predicted probabilities
103 |         category_name (str): Name of the category being predicted
104 |     """
105 |     # Create reports directory if it doesn't exist
106 |     reports_dir = os.path.join('reports', 'roc_curves')
107 |     os.makedirs(reports_dir, exist_ok=True)
108 |     
109 |     # Calculate ROC curve
110 |     fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
111 |     roc_auc = auc(fpr, tpr)
112 |     
113 |     # Plot ROC curve
114 |     plt.figure(figsize=(8, 6))
115 |     plt.plot(fpr, tpr, color='darkorange', lw=2,
116 |              label=f'ROC curve (AUC = {roc_auc:.2f})')
117 |     plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
118 |     plt.xlim([0.0, 1.0])
119 |     plt.ylim([0.0, 1.05])
120 |     plt.xlabel('False Positive Rate')
121 |     plt.ylabel('True Positive Rate')
122 |     plt.title(f'ROC Curve - {category_name}')
123 |     plt.legend(loc="lower right")
124 |     
125 |     # Save plot
126 |     save_path = os.path.join(reports_dir, f'roc_curve_{category_name}.png')
127 |     plt.savefig(save_path)
128 |     plt.close()
129 | 
130 | def plot_precision_recall_curve(y_true: np.ndarray,
131 |                               y_pred_proba: np.ndarray,
132 |                               category_name: str):
133 |     """
134 |     Plot precision-recall curve.
135 |     
136 |     Args:
137 |         y_true (np.ndarray): True labels
138 |         y_pred_proba (np.ndarray): Predicted probabilities
139 |         category_name (str): Name of the category being predicted
140 |     """
141 |     # Create reports directory if it doesn't exist
142 |     reports_dir = os.path.join('reports', 'precision_recall_curves')
143 |     os.makedirs(reports_dir, exist_ok=True)
144 |     
145 |     # Calculate precision-recall curve
146 |     precision, recall, _ = precision_recall_curve(y_true, y_pred_proba)
147 |     
148 |     # Plot precision-recall curve
149 |     plt.figure(figsize=(8, 6))
150 |     plt.plot(recall, precision, color='blue', lw=2)
151 |     plt.xlabel('Recall')
152 |     plt.ylabel('Precision')
153 |     plt.title(f'Precision-Recall Curve - {category_name}')
154 |     plt.grid(True)
155 |     
156 |     # Save plot
157 |     save_path = os.path.join(reports_dir, f'precision_recall_curve_{category_name}.png')
158 |     plt.savefig(save_path)
159 |     plt.close()
160 | 
161 | def generate_evaluation_report(y_true: np.ndarray,
162 |                              y_pred: np.ndarray,
163 |                              y_pred_proba: np.ndarray,
164 |                              category_name: str,
165 |                              threshold: float = 0.5) -> Dict[str, float]:
166 |     """
167 |     Generate comprehensive evaluation report.
168 |     
169 |     Args:
170 |         y_true (np.ndarray): True labels
171 |         y_pred (np.ndarray): Predicted probabilities
172 |         y_pred_proba (np.ndarray): Predicted probabilities (same as y_pred)
173 |         category_name (str): Name of the category being predicted
174 |         threshold (float): Classification threshold for converting probabilities to binary predictions
175 |         
176 |     Returns:
177 |         Dict[str, float]: Dictionary of evaluation metrics
178 |     """
179 |     # Create reports directory if it doesn't exist
180 |     reports_dir = os.path.join('reports', 'evaluation_reports')
181 |     os.makedirs(reports_dir, exist_ok=True)
182 |     
183 |     # Convert probabilities to binary predictions
184 |     y_pred_binary = (y_pred >= threshold).astype(int)
185 |     
186 |     # Generate plots
187 |     plot_confusion_matrix(y_true, y_pred, category_name, threshold)
188 |     plot_roc_curve(y_true, y_pred_proba, category_name)
189 |     plot_precision_recall_curve(y_true, y_pred_proba, category_name)
190 |     
191 |     # Calculate metrics with zero_division=0
192 |     report = classification_report(y_true, y_pred_binary, output_dict=True, zero_division=0)
193 |     
194 |     # Save report
195 |     save_path = os.path.join(reports_dir, f'evaluation_report_{category_name}.txt')
196 |     with open(save_path, 'w') as f:
197 |         f.write(f"Evaluation Report for {category_name}\n")
198 |         f.write("=" * 50 + "\n\n")
199 |         f.write(f"Classification threshold: {threshold}\n\n")
200 |         f.write(classification_report(y_true, y_pred_binary, zero_division=0))
201 |         
202 |         # Add additional metrics
203 |         f.write("\nAdditional Metrics:\n")
204 |         f.write("-" * 20 + "\n")
205 |         f.write(f"Number of samples: {len(y_true)}\n")
206 |         f.write(f"Number of positive samples: {sum(y_true)}\n")
207 |         f.write(f"Number of predicted positive samples: {sum(y_pred_binary)}\n")
208 |         f.write(f"Positive class ratio: {sum(y_true)/len(y_true):.2%}\n")
209 |     
210 |     return report
211 | 
212 | def analyze_feature_importance(model, feature_names: list,
213 |                              save_path: str = None) -> pd.DataFrame:
214 |     """
215 |     Analyze feature importance using model weights.
216 |     
217 |     Args:
218 |         model: Trained neural network model
219 |         feature_names (list): List of feature names
220 |         save_path (str, optional): Path to save the plot
221 |         
222 |     Returns:
223 |         pd.DataFrame: Feature importance scores
224 |     """
225 |     # Get weights from first layer
226 |     weights = np.abs(model.layers[0].get_weights()[0])
227 |     
228 |     # Calculate feature importance
229 |     importance = np.mean(weights, axis=1)
230 |     
231 |     # Create DataFrame
232 |     importance_df = pd.DataFrame({
233 |         'feature': feature_names,
234 |         'importance': importance
235 |     })
236 |     importance_df = importance_df.sort_values('importance', ascending=False)
237 |     
238 |     # Plot feature importance
239 |     plt.figure(figsize=(10, 6))
240 |     sns.barplot(x='importance', y='feature', data=importance_df)
241 |     plt.title('Feature Importance')
242 |     plt.tight_layout()
243 |     
244 |     if save_path:
245 |         os.makedirs(os.path.dirname(save_path), exist_ok=True)
246 |         plt.savefig(save_path, bbox_inches='tight', dpi=300)
247 |     plt.close()
248 |     
249 |     return importance_df 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/models/neural_network.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Neural network model for customer category prediction.
  3 | """
  4 | import os
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from tensorflow.keras.models import Sequential, load_model
  8 | from tensorflow.keras.layers import Dense, Dropout
  9 | from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
 10 | from tensorflow.keras.metrics import AUC
 11 | from typing import Tuple, Dict, Any
 12 | import json
 13 | 
 14 | from src.data.feature_engineering import (
 15 |     create_customer_features,
 16 |     prepare_model_data,
 17 |     get_train_test_split
 18 | )
 19 | 
 20 | class CustomerCategoryPredictor:
 21 |     """
 22 |     Neural network model for predicting customer category purchases.
 23 |     """
 24 |     def __init__(self,
 25 |                  input_dim: int,
 26 |                  hidden_layers: list = [64, 32, 16],
 27 |                  dropout_rate: float = 0.3,
 28 |                  learning_rate: float = 0.001):
 29 |         """
 30 |         Initialize the model.
 31 |         
 32 |         Args:
 33 |             input_dim (int): Number of input features
 34 |             hidden_layers (list): List of hidden layer sizes
 35 |             dropout_rate (float): Dropout rate for regularization
 36 |             learning_rate (float): Learning rate for optimizer
 37 |         """
 38 |         self.input_dim = input_dim
 39 |         self.hidden_layers = hidden_layers
 40 |         self.dropout_rate = dropout_rate
 41 |         self.learning_rate = learning_rate
 42 |         self.model = self._build_model()
 43 |         
 44 |     def _build_model(self) -> Sequential:
 45 |         """
 46 |         Build the neural network model.
 47 |         
 48 |         Returns:
 49 |             Sequential: Compiled Keras model
 50 |         """
 51 |         model = Sequential()
 52 |         
 53 |         # Input layer
 54 |         model.add(Dense(self.hidden_layers[0], activation='relu', input_dim=self.input_dim))
 55 |         model.add(Dropout(self.dropout_rate))
 56 |         
 57 |         # Hidden layers
 58 |         for units in self.hidden_layers[1:]:
 59 |             model.add(Dense(units, activation='relu'))
 60 |             model.add(Dropout(self.dropout_rate))
 61 |         
 62 |         # Output layer
 63 |         model.add(Dense(1, activation='sigmoid'))
 64 |         
 65 |         # Compile model
 66 |         model.compile(
 67 |             optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate),
 68 |             loss='binary_crossentropy',
 69 |             metrics=['accuracy', AUC(name='auc')]
 70 |         )
 71 |         
 72 |         return model
 73 |     
 74 |     def train(self,
 75 |               X_train: np.ndarray,
 76 |               y_train: np.ndarray,
 77 |               batch_size: int = 32,
 78 |               epochs: int = 100,
 79 |               validation_split: float = 0.2) -> dict:
 80 |         """
 81 |         Train the model.
 82 |         
 83 |         Args:
 84 |             X_train (np.ndarray): Training features
 85 |             y_train (np.ndarray): Training labels
 86 |             batch_size (int): Batch size for training
 87 |             epochs (int): Number of training epochs
 88 |             validation_split (float): Validation split ratio
 89 |             
 90 |         Returns:
 91 |             dict: Training history
 92 |         """
 93 |         # Create callbacks
 94 |         callbacks = [
 95 |             EarlyStopping(
 96 |                 monitor='val_loss',
 97 |                 patience=10,
 98 |                 restore_best_weights=True,
 99 |                 mode='min'
100 |             ),
101 |             ModelCheckpoint(
102 |                 'best_model.keras',
103 |                 monitor='val_loss',
104 |                 save_best_only=True,
105 |                 mode='min'
106 |             )
107 |         ]
108 |         
109 |         # Train model
110 |         history = self.model.fit(
111 |             X_train, y_train,
112 |             batch_size=batch_size,
113 |             epochs=epochs,
114 |             validation_split=validation_split,
115 |             callbacks=callbacks,
116 |             verbose=1
117 |         )
118 |         
119 |         return history.history
120 |     
121 |     def predict(self, X: np.ndarray) -> np.ndarray:
122 |         """
123 |         Make predictions.
124 |         
125 |         Args:
126 |             X (np.ndarray): Input features
127 |             
128 |         Returns:
129 |             np.ndarray: Predicted probabilities
130 |         """
131 |         return self.model.predict(X, verbose=0)
132 |     
133 |     def evaluate(self, X: np.ndarray, y: np.ndarray) -> dict:
134 |         """
135 |         Evaluate the model.
136 |         
137 |         Args:
138 |             X (np.ndarray): Test features
139 |             y (np.ndarray): Test labels
140 |             
141 |         Returns:
142 |             dict: Evaluation metrics
143 |         """
144 |         return dict(zip(self.model.metrics_names,
145 |                        self.model.evaluate(X, y, verbose=0)))
146 |     
147 |     def save(self, filepath: str):
148 |         """
149 |         Save the model.
150 |         
151 |         Args:
152 |             filepath (str): Path to save the model
153 |         """
154 |         # Ensure filepath ends with .keras
155 |         if not filepath.endswith('.keras'):
156 |             filepath = f"{filepath}.keras"
157 |             
158 |         # Create directory if it doesn't exist
159 |         os.makedirs(os.path.dirname(filepath), exist_ok=True)
160 |         
161 |         # Save model without specifying save_format
162 |         self.model.save(filepath)
163 |     
164 |     @classmethod
165 |     def load(cls, filepath: str) -> 'CustomerCategoryPredictor':
166 |         """
167 |         Load a saved model.
168 |         
169 |         Args:
170 |             filepath (str): Path to the saved model
171 |             
172 |         Returns:
173 |             CustomerCategoryPredictor: Loaded model
174 |         """
175 |         # Ensure filepath ends with .keras
176 |         if not filepath.endswith('.keras'):
177 |             filepath = f"{filepath}.keras"
178 |             
179 |         model = load_model(filepath, compile=True)
180 |         instance = cls(input_dim=model.input_shape[1])
181 |         instance.model = model
182 |         return instance
183 | 
184 | def main():
185 |     """Main training script."""
186 |     # Prepare data
187 |     df = create_customer_features()
188 |     X, y = prepare_model_data(df)
189 |     X_train, X_test, y_train, y_test = get_train_test_split(X, y)
190 |     
191 |     # Split training data into train and validation
192 |     X_train, X_val, y_train, y_val = get_train_test_split(X_train, y_train, test_size=0.2)
193 |     
194 |     # Initialize and train model
195 |     model = CustomerCategoryPredictor(input_dim=X.shape[1])
196 |     history = model.train(X_train, y_train, X_val, y_val)
197 |     
198 |     # Evaluate model
199 |     metrics = model.evaluate(X_test, y_test)
200 |     print("\nTest Metrics:")
201 |     for metric_name, value in metrics.items():
202 |         print(f"{metric_name}: {value:.4f}")
203 |     
204 |     # Save model
205 |     model.save('models/saved/customer_category_predictor.h5')
206 | 
207 | if __name__ == '__main__':
208 |     main() 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Utility functions package.
3 | """ 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/utils/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/utils/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/utils/__pycache__/helpers.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/utils/__pycache__/helpers.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/src/utils/helpers.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Helper functions for the project.
  3 | """
  4 | import os
  5 | import json
  6 | import logging
  7 | from typing import Dict, Any, List, Optional
  8 | import numpy as np
  9 | import pandas as pd
 10 | from datetime import datetime
 11 | 
 12 | # Configure logging
 13 | logging.basicConfig(
 14 |     level=logging.INFO,
 15 |     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 16 | )
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | def setup_directories() -> None:
 20 |     """
 21 |     Create necessary directories for the project.
 22 |     """
 23 |     directories = [
 24 |         'data/raw',
 25 |         'data/processed',
 26 |         'models',
 27 |         'reports',
 28 |         'logs'
 29 |     ]
 30 |     
 31 |     for directory in directories:
 32 |         os.makedirs(directory, exist_ok=True)
 33 |         logger.info(f"Created directory: {directory}")
 34 | 
 35 | def save_json(data: Dict[str, Any], filepath: str) -> None:
 36 |     """
 37 |     Save data to a JSON file.
 38 |     
 39 |     Args:
 40 |         data (Dict[str, Any]): Data to save
 41 |         filepath (str): Path to save the file
 42 |     """
 43 |     try:
 44 |         with open(filepath, 'w', encoding='utf-8') as f:
 45 |             json.dump(data, f, indent=4, ensure_ascii=False)
 46 |         logger.info(f"Saved data to {filepath}")
 47 |     except Exception as e:
 48 |         logger.error(f"Error saving data to {filepath}: {str(e)}")
 49 |         raise
 50 | 
 51 | def load_json(filepath: str) -> Dict[str, Any]:
 52 |     """
 53 |     Load data from a JSON file.
 54 |     
 55 |     Args:
 56 |         filepath (str): Path to the file
 57 |         
 58 |     Returns:
 59 |         Dict[str, Any]: Loaded data
 60 |     """
 61 |     try:
 62 |         with open(filepath, 'r', encoding='utf-8') as f:
 63 |             data = json.load(f)
 64 |         logger.info(f"Loaded data from {filepath}")
 65 |         return data
 66 |     except Exception as e:
 67 |         logger.error(f"Error loading data from {filepath}: {str(e)}")
 68 |         raise
 69 | 
 70 | def save_dataframe(df: pd.DataFrame, filepath: str) -> None:
 71 |     """
 72 |     Save DataFrame to a file.
 73 |     
 74 |     Args:
 75 |         df (pd.DataFrame): DataFrame to save
 76 |         filepath (str): Path to save the file
 77 |     """
 78 |     try:
 79 |         df.to_csv(filepath, index=False)
 80 |         logger.info(f"Saved DataFrame to {filepath}")
 81 |     except Exception as e:
 82 |         logger.error(f"Error saving DataFrame to {filepath}: {str(e)}")
 83 |         raise
 84 | 
 85 | def load_dataframe(filepath: str) -> pd.DataFrame:
 86 |     """
 87 |     Load DataFrame from a file.
 88 |     
 89 |     Args:
 90 |         filepath (str): Path to the file
 91 |         
 92 |     Returns:
 93 |         pd.DataFrame: Loaded DataFrame
 94 |     """
 95 |     try:
 96 |         df = pd.read_csv(filepath)
 97 |         logger.info(f"Loaded DataFrame from {filepath}")
 98 |         return df
 99 |     except Exception as e:
100 |         logger.error(f"Error loading DataFrame from {filepath}: {str(e)}")
101 |         raise
102 | 
103 | def calculate_time_features(df: pd.DataFrame,
104 |                           date_column: str) -> pd.DataFrame:
105 |     """
106 |     Calculate time-based features from a date column.
107 |     
108 |     Args:
109 |         df (pd.DataFrame): Input DataFrame
110 |         date_column (str): Name of the date column
111 |         
112 |     Returns:
113 |         pd.DataFrame: DataFrame with additional time features
114 |     """
115 |     df = df.copy()
116 |     
117 |     # Convert to datetime if not already
118 |     if not pd.api.types.is_datetime64_any_dtype(df[date_column]):
119 |         df[date_column] = pd.to_datetime(df[date_column])
120 |     
121 |     # Extract time components
122 |     df[f'{date_column}_year'] = df[date_column].dt.year
123 |     df[f'{date_column}_month'] = df[date_column].dt.month
124 |     df[f'{date_column}_day'] = df[date_column].dt.day
125 |     df[f'{date_column}_dayofweek'] = df[date_column].dt.dayofweek
126 |     df[f'{date_column}_quarter'] = df[date_column].dt.quarter
127 |     
128 |     return df
129 | 
130 | def calculate_rolling_features(df: pd.DataFrame,
131 |                              group_col: str,
132 |                              value_col: str,
133 |                              windows: List[int]) -> pd.DataFrame:
134 |     """
135 |     Calculate rolling window features.
136 |     
137 |     Args:
138 |         df (pd.DataFrame): Input DataFrame
139 |         group_col (str): Column to group by
140 |         value_col (str): Column to calculate rolling features for
141 |         windows (List[int]): List of window sizes
142 |         
143 |     Returns:
144 |         pd.DataFrame: DataFrame with rolling features
145 |     """
146 |     df = df.copy()
147 |     
148 |     for window in windows:
149 |         # Calculate rolling mean
150 |         df[f'{value_col}_rolling_mean_{window}'] = (
151 |             df.groupby(group_col)[value_col]
152 |             .transform(lambda x: x.rolling(window, min_periods=1).mean())
153 |         )
154 |         
155 |         # Calculate rolling std
156 |         df[f'{value_col}_rolling_std_{window}'] = (
157 |             df.groupby(group_col)[value_col]
158 |             .transform(lambda x: x.rolling(window, min_periods=1).std())
159 |         )
160 |     
161 |     return df
162 | 
163 | def calculate_lag_features(df: pd.DataFrame,
164 |                           group_col: str,
165 |                           value_col: str,
166 |                           lags: List[int]) -> pd.DataFrame:
167 |     """
168 |     Calculate lag features.
169 |     
170 |     Args:
171 |         df (pd.DataFrame): Input DataFrame
172 |         group_col (str): Column to group by
173 |         value_col (str): Column to calculate lag features for
174 |         lags (List[int]): List of lag periods
175 |         
176 |     Returns:
177 |         pd.DataFrame: DataFrame with lag features
178 |     """
179 |     df = df.copy()
180 |     
181 |     for lag in lags:
182 |         df[f'{value_col}_lag_{lag}'] = (
183 |             df.groupby(group_col)[value_col]
184 |             .transform(lambda x: x.shift(lag))
185 |         )
186 |     
187 |     return df
188 | 
189 | def calculate_ratio_features(df: pd.DataFrame,
190 |                            numerator_col: str,
191 |                            denominator_col: str,
192 |                            prefix: str = '') -> pd.DataFrame:
193 |     """
194 |     Calculate ratio features.
195 |     
196 |     Args:
197 |         df (pd.DataFrame): Input DataFrame
198 |         numerator_col (str): Numerator column
199 |         denominator_col (str): Denominator column
200 |         prefix (str): Prefix for the new column name
201 |         
202 |     Returns:
203 |         pd.DataFrame: DataFrame with ratio features
204 |     """
205 |     df = df.copy()
206 |     
207 |     # Calculate ratio
208 |     ratio_col = f'{prefix}ratio' if prefix else 'ratio'
209 |     df[ratio_col] = df[numerator_col] / df[denominator_col]
210 |     
211 |     # Handle division by zero
212 |     df[ratio_col] = df[ratio_col].replace([np.inf, -np.inf], np.nan)
213 |     
214 |     return df
215 | 
216 | def calculate_percentile_features(df: pd.DataFrame,
217 |                                 group_col: str,
218 |                                 value_col: str,
219 |                                 percentiles: List[float]) -> pd.DataFrame:
220 |     """
221 |     Calculate percentile features.
222 |     
223 |     Args:
224 |         df (pd.DataFrame): Input DataFrame
225 |         group_col (str): Column to group by
226 |         value_col (str): Column to calculate percentiles for
227 |         percentiles (List[float]): List of percentiles to calculate
228 |         
229 |     Returns:
230 |         pd.DataFrame: DataFrame with percentile features
231 |     """
232 |     df = df.copy()
233 |     
234 |     for percentile in percentiles:
235 |         df[f'{value_col}_percentile_{int(percentile*100)}'] = (
236 |             df.groupby(group_col)[value_col]
237 |             .transform(lambda x: x.quantile(percentile))
238 |         )
239 |     
240 |     return df 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Test package.
3 | """ 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/tests/test_data.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tests for data processing functions.
 3 | """
 4 | import pytest
 5 | import pandas as pd
 6 | import numpy as np
 7 | from datetime import datetime, timedelta
 8 | from src.data.feature_engineering import (
 9 |     create_customer_features,
10 |     prepare_model_data,
11 |     get_train_test_split
12 | )
13 | from src.utils.helpers import (
14 |     handle_missing_values,
15 |     calculate_customer_metrics,
16 |     create_time_based_features
17 | )
18 | 
19 | @pytest.fixture
20 | def sample_data():
21 |     """Create sample data for testing."""
22 |     data = {
23 |         'customer_id': ['C1', 'C1', 'C2', 'C2', 'C3'],
24 |         'category_name': ['A', 'B', 'A', 'C', 'B'],
25 |         'purchase_count': [2, 1, 3, 1, 2],
26 |         'total_spent': [100, 50, 150, 75, 80],
27 |         'last_purchase_date': [
28 |             datetime.now() - timedelta(days=x)
29 |             for x in [1, 2, 3, 4, 5]
30 |         ]
31 |     }
32 |     return pd.DataFrame(data)
33 | 
34 | def test_create_customer_features(sample_data):
35 |     """Test customer feature creation."""
36 |     df = create_customer_features()
37 |     assert isinstance(df, pd.DataFrame)
38 |     assert not df.empty
39 |     assert 'category_spend_ratio' in df.columns
40 |     assert 'category_purchase_ratio' in df.columns
41 | 
42 | def test_prepare_model_data(sample_data):
43 |     """Test model data preparation."""
44 |     X, y = prepare_model_data(sample_data)
45 |     assert isinstance(X, np.ndarray)
46 |     assert isinstance(y, np.ndarray)
47 |     assert len(X) == len(y)
48 |     assert X.shape[1] > 0
49 | 
50 | def test_get_train_test_split(sample_data):
51 |     """Test train-test split function."""
52 |     X, y = prepare_model_data(sample_data)
53 |     X_train, X_test, y_train, y_test = get_train_test_split(X, y, test_size=0.2)
54 |     
55 |     assert len(X_train) + len(X_test) == len(X)
56 |     assert len(y_train) + len(y_test) == len(y)
57 |     assert X_train.shape[1] == X_test.shape[1]
58 | 
59 | def test_handle_missing_values(sample_data):
60 |     """Test missing value handling."""
61 |     # Add some missing values
62 |     sample_data.loc[0, 'total_spent'] = np.nan
63 |     sample_data.loc[1, 'category_name'] = None
64 |     
65 |     # Test different strategies
66 |     df_mean = handle_missing_values(sample_data, strategy='mean')
67 |     df_median = handle_missing_values(sample_data, strategy='median')
68 |     df_zero = handle_missing_values(sample_data, strategy='zero')
69 |     
70 |     assert not df_mean.isnull().any().any()
71 |     assert not df_median.isnull().any().any()
72 |     assert not df_zero.isnull().any().any()
73 | 
74 | def test_calculate_customer_metrics(sample_data):
75 |     """Test customer metrics calculation."""
76 |     metrics = calculate_customer_metrics(
77 |         sample_data,
78 |         customer_id_col='customer_id',
79 |         date_col='last_purchase_date',
80 |         value_col='total_spent'
81 |     )
82 |     
83 |     assert isinstance(metrics, pd.DataFrame)
84 |     assert 'days_since_first_purchase' in metrics.columns
85 |     assert 'days_since_last_purchase' in metrics.columns
86 |     assert 'purchase_frequency' in metrics.columns
87 | 
88 | def test_create_time_based_features(sample_data):
89 |     """Test time-based feature creation."""
90 |     df = create_time_based_features(sample_data, 'last_purchase_date')
91 |     
92 |     assert 'year' in df.columns
93 |     assert 'month' in df.columns
94 |     assert 'day' in df.columns
95 |     assert 'dayofweek' in df.columns
96 |     assert 'month_sin' in df.columns
97 |     assert 'month_cos' in df.columns 


--------------------------------------------------------------------------------
/DeepLearning/customer_categorical_order_prediction/tests/test_models.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tests for model functions.
  3 | """
  4 | import pytest
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from src.models.neural_network import CustomerCategoryPredictor
  8 | from src.models.model_evaluation import (
  9 |     plot_training_history,
 10 |     plot_roc_curve,
 11 |     plot_precision_recall_curve,
 12 |     plot_confusion_matrix,
 13 |     generate_evaluation_report,
 14 |     analyze_feature_importance
 15 | )
 16 | 
 17 | @pytest.fixture
 18 | def sample_data():
 19 |     """Create sample data for testing."""
 20 |     np.random.seed(42)
 21 |     X = np.random.randn(100, 10)
 22 |     y = np.random.randint(0, 2, 100)
 23 |     return X, y
 24 | 
 25 | @pytest.fixture
 26 | def sample_model(sample_data):
 27 |     """Create a sample model for testing."""
 28 |     X, _ = sample_data
 29 |     model = CustomerCategoryPredictor(input_dim=X.shape[1])
 30 |     return model
 31 | 
 32 | def test_model_initialization(sample_data):
 33 |     """Test model initialization."""
 34 |     X, _ = sample_data
 35 |     model = CustomerCategoryPredictor(input_dim=X.shape[1])
 36 |     
 37 |     assert isinstance(model.model, tf.keras.Sequential)
 38 |     assert model.history is None
 39 | 
 40 | def test_model_training(sample_model, sample_data):
 41 |     """Test model training."""
 42 |     X, y = sample_data
 43 |     X_train, X_val = X[:80], X[80:]
 44 |     y_train, y_val = y[:80], y[80:]
 45 |     
 46 |     history = sample_model.train(
 47 |         X_train, y_train,
 48 |         X_val, y_val,
 49 |         batch_size=32,
 50 |         epochs=2
 51 |     )
 52 |     
 53 |     assert isinstance(history, dict)
 54 |     assert 'loss' in history
 55 |     assert 'accuracy' in history
 56 | 
 57 | def test_model_evaluation(sample_model, sample_data):
 58 |     """Test model evaluation."""
 59 |     X, y = sample_data
 60 |     X_train, X_test = X[:80], X[80:]
 61 |     y_train, y_test = y[:80], y[80:]
 62 |     
 63 |     # Train model
 64 |     sample_model.train(X_train, y_train, X_test, y_test, epochs=2)
 65 |     
 66 |     # Evaluate model
 67 |     metrics = sample_model.evaluate(X_test, y_test)
 68 |     
 69 |     assert isinstance(metrics, dict)
 70 |     assert 'loss' in metrics
 71 |     assert 'accuracy' in metrics
 72 | 
 73 | def test_model_prediction(sample_model, sample_data):
 74 |     """Test model prediction."""
 75 |     X, _ = sample_data
 76 |     predictions = sample_model.predict(X)
 77 |     
 78 |     assert isinstance(predictions, np.ndarray)
 79 |     assert predictions.shape[0] == X.shape[0]
 80 |     assert predictions.shape[1] == 1
 81 | 
 82 | def test_model_save_load(sample_model, sample_data, tmp_path):
 83 |     """Test model saving and loading."""
 84 |     X, y = sample_data
 85 |     X_train, X_val = X[:80], X[80:]
 86 |     y_train, y_val = y[:80], y[80:]
 87 |     
 88 |     # Train model
 89 |     sample_model.train(X_train, y_train, X_val, y_val, epochs=2)
 90 |     
 91 |     # Save model
 92 |     save_path = tmp_path / "test_model.h5"
 93 |     sample_model.save_model(str(save_path))
 94 |     
 95 |     # Load model
 96 |     loaded_model = CustomerCategoryPredictor.load_model(str(save_path))
 97 |     
 98 |     # Compare predictions
 99 |     original_preds = sample_model.predict(X)
100 |     loaded_preds = loaded_model.predict(X)
101 |     
102 |     np.testing.assert_array_almost_equal(original_preds, loaded_preds)
103 | 
104 | def test_plot_training_history(sample_model, sample_data, tmp_path):
105 |     """Test training history plotting."""
106 |     X, y = sample_data
107 |     X_train, X_val = X[:80], X[80:]
108 |     y_train, y_val = y[:80], y[80:]
109 |     
110 |     # Train model
111 |     history = sample_model.train(X_train, y_train, X_val, y_val, epochs=2)
112 |     
113 |     # Plot history
114 |     save_path = tmp_path / "history.png"
115 |     plot_training_history(history, str(save_path))
116 |     
117 |     assert save_path.exists()
118 | 
119 | def test_plot_roc_curve(sample_model, sample_data, tmp_path):
120 |     """Test ROC curve plotting."""
121 |     X, y = sample_data
122 |     X_train, X_test = X[:80], X[80:]
123 |     y_train, y_test = y[:80], y[80:]
124 |     
125 |     # Train model
126 |     sample_model.train(X_train, y_train, X_test, y_test, epochs=2)
127 |     
128 |     # Get predictions
129 |     y_pred = sample_model.predict(X_test)
130 |     
131 |     # Plot ROC curve
132 |     save_path = tmp_path / "roc.png"
133 |     plot_roc_curve(y_test, y_pred, str(save_path))
134 |     
135 |     assert save_path.exists()
136 | 
137 | def test_plot_confusion_matrix(sample_model, sample_data, tmp_path):
138 |     """Test confusion matrix plotting."""
139 |     X, y = sample_data
140 |     X_train, X_test = X[:80], X[80:]
141 |     y_train, y_test = y[:80], y[80:]
142 |     
143 |     # Train model
144 |     sample_model.train(X_train, y_train, X_test, y_test, epochs=2)
145 |     
146 |     # Get predictions
147 |     y_pred = sample_model.predict(X_test)
148 |     
149 |     # Plot confusion matrix
150 |     save_path = tmp_path / "confusion.png"
151 |     plot_confusion_matrix(y_test, y_pred, threshold=0.5, save_path=str(save_path))
152 |     
153 |     assert save_path.exists()
154 | 
155 | def test_generate_evaluation_report(sample_model, sample_data, tmp_path):
156 |     """Test evaluation report generation."""
157 |     X, y = sample_data
158 |     X_train, X_test = X[:80], X[80:]
159 |     y_train, y_test = y[:80], y[80:]
160 |     
161 |     # Train model
162 |     sample_model.train(X_train, y_train, X_test, y_test, epochs=2)
163 |     
164 |     # Get predictions
165 |     y_pred = sample_model.predict(X_test)
166 |     
167 |     # Generate report
168 |     save_dir = tmp_path / "reports"
169 |     report = generate_evaluation_report(
170 |         y_test, y_pred,
171 |         threshold=0.5,
172 |         save_dir=str(save_dir)
173 |     )
174 |     
175 |     assert isinstance(report, dict)
176 |     assert save_dir.exists()
177 |     assert (save_dir / "metrics.json").exists()
178 | 
179 | def test_analyze_feature_importance(sample_model, sample_data, tmp_path):
180 |     """Test feature importance analysis."""
181 |     X, y = sample_data
182 |     feature_names = [f"feature_{i}" for i in range(X.shape[1])]
183 |     
184 |     # Train model
185 |     sample_model.train(X[:80], y[:80], X[80:], y[80:], epochs=2)
186 |     
187 |     # Analyze feature importance
188 |     save_path = tmp_path / "importance.png"
189 |     importance_df = analyze_feature_importance(
190 |         sample_model.model,
191 |         feature_names,
192 |         save_path=str(save_path)
193 |     )
194 |     
195 |     assert isinstance(importance_df, pd.DataFrame)
196 |     assert 'feature' in importance_df.columns
197 |     assert 'importance' in importance_df.columns
198 |     assert save_path.exists() 


--------------------------------------------------------------------------------
/DeepLearning/customer_order_prediction/data/query.sql:
--------------------------------------------------------------------------------
 1 | with last_order_date as
 2 | (
 3 |   select max(order_date) as max_date from orders
 4 | ),
 5 | customer_order_stats as (
 6 | select 
 7 | c.customer_id,
 8 | count(o.order_id) as total_orders,
 9 | sum(od.unit_price*od.quantity) as total_spent,
10 | avg(od.unit_price*od.quantity) as avg_order_value
11 | from orders o
12 | inner join customers c
13 | on o.customer_id = c.customer_id
14 | inner join order_details od
15 | on od.order_id = o.order_id
16 | group by c.customer_id),
17 | label_data as(
18 |  select c.customer_id,
19 | 	case when exists(
20 | 	 select 1 from orders o2,last_order_date lod
21 | 		where o2.customer_id = c.customer_id
22 | 		and o2.order_date>(lod.max_date-Interval '6 months')
23 | 	)
24 | 	then 1 else 0
25 | 	end as will_order_again
26 | 	from customers c
27 | )
28 | select 
29 | s.customer_id,
30 | s.total_orders,
31 | s.total_spent,
32 | s.avg_order_value,
33 | l.will_order_again
34 | from customer_order_stats s join label_data l
35 | on s.customer_id = l.customer_id


--------------------------------------------------------------------------------
/DeepLearning/customer_order_prediction/main.py:
--------------------------------------------------------------------------------
 1 | from src.data_loader import load_data
 2 | from src.preprocessing import preprocess_data
 3 | from src.model import build_model
 4 | from src.train import train_model
 5 | from src.evaluate import evaluate_model
 6 | 
 7 | 
 8 | def main():
 9 |   print("Loading data")
10 |   df = load_data("data/query.sql")
11 | 
12 |   print("Preprocessing data")
13 |   X_train,X_test,y_train,y_test =  preprocess_data(df,class_imbalance=2)
14 | 
15 |   print("Building model")
16 |   model = build_model(input_shape=X_train.shape[1])
17 | 
18 |   print("Training model")
19 |   model = train_model(model,X_train,y_train, X_test,y_test)
20 | 
21 |   print("Evaluating model")
22 |   evaluate_model(model,X_test,y_test)
23 | 
24 | 
25 | 
26 | if __name__ =="__main__":
27 |   main()


--------------------------------------------------------------------------------
/DeepLearning/customer_order_prediction/outputs/model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_prediction/outputs/model.h5


--------------------------------------------------------------------------------
/DeepLearning/customer_order_prediction/src/__pycache__/config.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_prediction/src/__pycache__/config.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_order_prediction/src/__pycache__/data_loader.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_prediction/src/__pycache__/data_loader.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_order_prediction/src/__pycache__/evaluate.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_prediction/src/__pycache__/evaluate.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_order_prediction/src/__pycache__/model.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_prediction/src/__pycache__/model.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_order_prediction/src/__pycache__/preprocessing.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_prediction/src/__pycache__/preprocessing.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_order_prediction/src/__pycache__/train.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_prediction/src/__pycache__/train.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_order_prediction/src/config.py:
--------------------------------------------------------------------------------
1 | DB_CONFIG = {
2 |     "host":"localhost",
3 |     "dbname":"northwind",
4 |     "user":"postgres",
5 |     "password":"12345",
6 |     "port":5432
7 | }


--------------------------------------------------------------------------------
/DeepLearning/customer_order_prediction/src/data_loader.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import psycopg2
 3 | from src.config import DB_CONFIG
 4 | 
 5 | 
 6 | def load_data(sql_path):
 7 |     
 8 |     with open(sql_path,"r") as file:
 9 |         query = file.read()
10 | 
11 |     connection = psycopg2.connect(**DB_CONFIG)
12 |     df = pd.read_sql(query,connection)
13 |     connection.close()
14 |     return df


--------------------------------------------------------------------------------
/DeepLearning/customer_order_prediction/src/evaluate.py:
--------------------------------------------------------------------------------
1 | from sklearn.metrics import classification_report
2 | 
3 | def evaluate_model(model,X_test,y_test):
4 |     y_pred = model.predict(X_test)
5 |     y_pred_labels = (y_pred>0.5).astype("int32")
6 | 
7 |     print(classification_report(y_test,y_pred_labels))


--------------------------------------------------------------------------------
/DeepLearning/customer_order_prediction/src/model.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | def build_model(input_shape):
 4 | 
 5 |   model = tf.keras.Sequential(
 6 |       [
 7 |           tf.keras.layers.Dense(8,activation="relu",input_shape=(input_shape,)),
 8 |           tf.keras.layers.Dense(4,activation="relu"),
 9 |           tf.keras.layers.Dense(1,activation="sigmoid")
10 |       ]
11 |   )
12 | 
13 |   model.compile(optimizer="adam",loss="mean_squared_error",metrics=["accuracy"])
14 |   return model


--------------------------------------------------------------------------------
/DeepLearning/customer_order_prediction/src/preprocessing.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.model_selection import train_test_split
 3 | from sklearn.preprocessing import StandardScaler
 4 | from imblearn.over_sampling import SMOTE
 5 | from imblearn.over_sampling import RandomOverSampler
 6 | from sklearn.utils.class_weight import compute_class_weight
 7 | import numpy as np
 8 | 
 9 | def preprocess_data(df,test_size=0.2,class_imbalance=0):
10 |     X = df[["total_orders","total_spent","avg_order_value"]]
11 |     y = df["will_order_again"]
12 | 
13 |     scaler = StandardScaler()
14 |     X_scaled = scaler.fit_transform(X)
15 | 
16 |     X_train,X_test,y_train,y_test = train_test_split(X_scaled,y,test_size=test_size,random_state=42)
17 | 
18 |     if class_imbalance==0:
19 |         return X_train,X_test,y_train,y_test
20 |     
21 |     #smote
22 |     if class_imbalance == 1:
23 |         print("Random Oversampling uygulanıyor...")
24 |         ros = RandomOverSampler(random_state=42)
25 |         X_resampled, y_resampled = ros.fit_resample(X_train, y_train)
26 |         return X_resampled, X_test, y_resampled, y_test
27 |     
28 |     if class_imbalance == 2:
29 |         print("Class Weight uygulanıyor...")
30 |         class_weights = compute_class_weight(
31 |             class_weight='balanced',
32 |             classes=np.unique(y_train),
33 |             y=y_train
34 |         )
35 | 
36 |         class_weight_dict = dict(zip(np.unique(y_train), class_weights))
37 |         print(f"Sınıf ağırlıkları: {class_weight_dict}")
38 | 
39 |         global class_weights_dict
40 |         class_weights_dict = class_weight_dict
41 |         return X_train, X_test, y_train, y_test


--------------------------------------------------------------------------------
/DeepLearning/customer_order_prediction/src/train.py:
--------------------------------------------------------------------------------
1 | def train_model(model,X_train,y_train,X_test,y_test):
2 |   model.fit(X_train,y_train,epochs=50,validation_data=(X_test,y_test),verbose=1)
3 |   model.save("outputs/model.h5")
4 |   return model


--------------------------------------------------------------------------------
/DeepLearning/customer_order_risk/__pycache__/config.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_risk/__pycache__/config.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_order_risk/best_model.keras:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_risk/best_model.keras


--------------------------------------------------------------------------------
/DeepLearning/customer_order_risk/config.py:
--------------------------------------------------------------------------------
 1 | from dotenv import load_dotenv
 2 | import os
 3 | 
 4 | load_dotenv()
 5 | 
 6 | DB_CONFIG = {
 7 |    "dbname": os.getenv("DB_NAME","northwind"),
 8 |    "user":os.getenv("DB_USER","postgres"),
 9 |    "password":os.getenv("DB_PASSWORD","12345"),
10 |    "host":os.getenv("DB_HOST","localhost"),
11 |    "port":os.getenv("DB_PORT",5432)
12 | }
13 | 
14 | MODEL_CONFIG = {
15 |     "test_size":0.2,
16 |     "random_state":42,
17 |     "epochs":50
18 | }
19 | 
20 | FEATURE_CONFIG = {
21 |     "high_discount_threshold":0.75,    #75th percentile means high discount begins
22 |     "low_amount_threshold": 0.25
23 | }
24 | 


--------------------------------------------------------------------------------
/DeepLearning/customer_order_risk/main.py:
--------------------------------------------------------------------------------
 1 | from src.database import DatabaseManager
 2 | from src.feature_engineering import FeatureEngineer
 3 | from src.model import ReturnRiskModel
 4 | 
 5 | from sklearn.model_selection import train_test_split
 6 | from config import MODEL_CONFIG
 7 | import pandas as pd
 8 | import numpy as np
 9 | 
10 | 
11 | def main():
12 |     db_manager = None
13 | 
14 |     try:
15 |         db_manager = DatabaseManager()
16 |         feature_engineer = FeatureEngineer()
17 |         model = ReturnRiskModel()
18 | 
19 |         print("Fetching order data")
20 |         df = db_manager.get_order_data()
21 | 
22 |         print("Creating features")
23 |         df_processed = feature_engineer.create_feautures(df)
24 | 
25 |         X, y = feature_engineer.prepare_model_data(df_processed)
26 |         feature_names = [
27 |             "unit_price", "quantity", "discount", "total_amount", "discount_amount",
28 |             "avg_order_amount", "std_order_amount", "total_spent", "avg_discount",
29 |             "max_discount", "avg_quantity", "total_quantity"
30 |         ]
31 | 
32 |         X_train, X_test, y_train, y_test = model.split_data(X, y)
33 |         
34 |         model.build_model(input_dim=X_train.shape[1])
35 |         model.train(X_train, y_train, X_test, y_test)
36 |         loss, accuracy = model.evaluate(X_test, y_test)
37 | 
38 |         print(f"Test accuracy: {accuracy}")
39 | 
40 |         # Riskli bulunan siparişleri belirle
41 |         predictions = model.predict(X_test)
42 |         risky_orders = X_test[predictions.flatten() > 0.5]  # 0.5'ten büyük tahminleri riskli kabul et
43 | 
44 |         if len(risky_orders) > 0:
45 |             print("\nRiskli bulunan siparişlerin açıklaması:")
46 |             shap_df, feature_importance = model.explain_prediction(risky_orders, feature_names)
47 |             
48 |             print("\nEn önemli özellikler (SHAP değerlerine göre):")
49 |             print(feature_importance.head())
50 |             
51 |             print("\nİlk riskli sipariş için özellik katkıları:")
52 |             first_risky = shap_df.iloc[0]
53 |             for feature, value in first_risky.items():
54 |                 if abs(value) > 0.01:  # Sadece önemli katkıları göster
55 |                     direction = "arttırdı" if value > 0 else "azalttı"
56 |                     print(f"{feature}: {value:.4f} ({direction})")
57 | 
58 |     except Exception as e:
59 |         print(e)
60 |     finally:
61 |         if db_manager is not None:
62 |             db_manager.disconnect()
63 | 
64 | if __name__ == "__main__":
65 |     main()


--------------------------------------------------------------------------------
/DeepLearning/customer_order_risk/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas>=2.1.0
2 | psycopg2-binary>=2.9.9
3 | tensorflow>=2.15.0
4 | scikit-learn>=1.3.0
5 | lime>=0.2.0.1
6 | numpy>=1.24.0 


--------------------------------------------------------------------------------
/DeepLearning/customer_order_risk/src/__pycache__/database.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_risk/src/__pycache__/database.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_order_risk/src/__pycache__/feature_engineering.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_risk/src/__pycache__/feature_engineering.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_order_risk/src/__pycache__/model.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_risk/src/__pycache__/model.cpython-312.pyc


--------------------------------------------------------------------------------
/DeepLearning/customer_order_risk/src/database.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import psycopg2
 3 | from config import DB_CONFIG
 4 | 
 5 | class DatabaseManager:
 6 |     def __init__(self):
 7 |         self.conn = None
 8 |         self.connect()
 9 | 
10 |     def connect(self):
11 |         try:
12 |             self.conn = psycopg2.connect(**DB_CONFIG)
13 |             print("Database connection established")
14 |         except Exception as e:
15 |             print(f"Error connecting to the database : {e}")
16 |             raise
17 |     
18 |     def disconnect(self):
19 |         if self.conn:
20 |             self.conn.close()
21 |             print("Connection closed")
22 |     
23 |     def get_order_data(self):
24 |         query = """
25 |         select 
26 |         od.order_id,
27 |         od.product_id,
28 |         od.unit_price,
29 |         od.quantity,
30 |         od.discount,
31 |         o.customer_id,
32 |         o.order_date,
33 |         p.category_id,
34 |         c.company_name
35 |         from
36 |         orders o inner join order_details od
37 |         on o.order_id=od.order_id
38 |         inner join products p
39 |         on p.product_id=od.product_id
40 |         inner join customers c
41 |         on c.customer_id=o.customer_id
42 |         """
43 | 
44 |         try:
45 |           df = pd.read_sql_query(query,self.conn)
46 |           return df
47 |         except Exception as e:
48 |           print(f"Error {e}")
49 |           raise
50 | 
51 |     


--------------------------------------------------------------------------------
/DeepLearning/customer_order_risk/src/feature_engineering.py:
--------------------------------------------------------------------------------
 1 | from sklearn.preprocessing import StandardScaler
 2 | import pandas as pd
 3 | import numpy as np
 4 | from config import MODEL_CONFIG
 5 | from config import FEATURE_CONFIG
 6 | 
 7 | class FeatureEngineer:
 8 |     
 9 |     def __init__(self):
10 |         self.scaler = StandardScaler()
11 |         self.customer_features = None
12 | 
13 |     def create_feautures(self,df):
14 |         df["total_amount"] = df["unit_price"]*df["quantity"]*(1-df["discount"])
15 |         df["discount_amount"] = df["unit_price"]*df["quantity"]*df["discount"]
16 | 
17 |         self.customer_features = df.groupby("customer_id").agg({
18 |             "total_amount":["mean","std","sum"],
19 |             "discount":["mean","max"],
20 |             "quantity":["mean","sum"]
21 |         }).reset_index()
22 | 
23 |         self.customer_features.columns = ["customer_id","avg_order_amount","std_order_amount","total_spent","avg_discount","max_discount","avg_quantity","total_quantity"]
24 | 
25 |         df = df.merge(self.customer_features, on ="customer_id",how="left")
26 | 
27 |         high_discount = df["discount"]>df["discount"].quantile(FEATURE_CONFIG["high_discount_threshold"])
28 |         low_amount = df["total_amount"]<df["total_amount"].quantile(FEATURE_CONFIG["low_amount_threshold"])
29 |         df["return_risk"] = (high_discount & low_amount).astype(int)
30 | 
31 |         return df 
32 | 
33 |     def prepare_model_data(self,df):
34 |         feature_columns = [
35 |             "unit_price","quantity","discount","total_amount","discount_amount",
36 |             "avg_order_amount","std_order_amount","total_spent","avg_discount",
37 |             "max_discount","avg_quantity","total_quantity"
38 |         ]
39 | 
40 |         X = df[feature_columns]
41 |         y = df["return_risk"]
42 | 
43 |         X_scaled = self.scaler.fit_transform(X)
44 |         return X_scaled,y
45 |     
46 |     


--------------------------------------------------------------------------------
/DeepLearning/customer_order_risk/src/model.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.keras.models  import Sequential,load_model
  3 | from tensorflow.keras.layers import Dense,Dropout
  4 | from tensorflow.keras.optimizers import Adam
  5 | from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint
  6 | import shap
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | from sklearn.model_selection import train_test_split
 11 | from config import MODEL_CONFIG
 12 | 
 13 | class ReturnRiskModel:
 14 |     def __init__(self):
 15 |         self.model = None
 16 |         self.history = None
 17 | 
 18 |     def build_model(self,input_dim):
 19 |         model = Sequential([
 20 |             Dense(64,activation="relu",input_shape=(input_dim,)),
 21 |             Dropout(0.3),
 22 |             Dense(32,activation="relu"),
 23 |             Dropout(0.2),
 24 |             Dense(16,activation="relu"),
 25 |             Dense(1,activation="sigmoid")
 26 |         ])
 27 | 
 28 |         model.compile(
 29 |             optimizer =Adam(),
 30 |             loss = "binary_crossentropy",
 31 |             metrics = ["accuracy"]
 32 |         )
 33 | 
 34 |         self.model = model
 35 | 
 36 |     def split_data(self,X,y):
 37 |         X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=MODEL_CONFIG["test_size"],random_state=MODEL_CONFIG["random_state"])
 38 |         return X_train,X_test,y_train,y_test
 39 | 
 40 |     def train(self,X_train,y_train,X_test,y_test):
 41 |         
 42 | 
 43 |         callbacks = [
 44 |            EarlyStopping(monitor="val_loss",patience=5,restore_best_weights=True),
 45 |            ModelCheckpoint("best_model.keras",monitor="val_loss",save_best_only=True)
 46 |         ]
 47 | 
 48 |         self.history = self.model.fit(
 49 |             X_train,y_train,
 50 |             epochs = MODEL_CONFIG["epochs"],
 51 |             validation_data = (X_test,y_test),
 52 |             callbacks = callbacks,
 53 |             verbose=1
 54 |         )
 55 | 
 56 |         return X_train,X_test,y_train,y_test
 57 | 
 58 |     def evaluate(self,X_test,y_test):
 59 |         return self.model.evaluate(X_test,y_test)
 60 |     
 61 |     def predict(self,X):
 62 |         return self.model.predict(X)
 63 |     
 64 |     def save_model(self,filepath):
 65 |         if not filepath:
 66 |             print("Wrong file path")
 67 |             return
 68 |         self.model.save(filepath)
 69 |         print(f"Model saved to {filepath}")
 70 | 
 71 |     def load_model(self,filepath):
 72 |         if not filepath:
 73 |             print("Wrong file path")
 74 |             return
 75 |         self.model = load_model(filepath)
 76 |         print(f"Model loaded from {filepath}")
 77 | 
 78 |     def explain_prediction(self, X, feature_names):
 79 |         """
 80 |         SHAP değerlerini kullanarak model tahminlerini açıklar.
 81 |         
 82 |         Args:
 83 |             X: Açıklanacak örnekler (numpy array)
 84 |             feature_names: Özellik isimleri listesi
 85 |             
 86 |         Returns:
 87 |             DataFrame: Her özelliğin SHAP değerlerini içeren DataFrame
 88 |         """
 89 |         # SHAP değerlerini hesapla
 90 |         explainer = shap.DeepExplainer(self.model, X[:100])  # İlk 100 örneği background olarak kullan
 91 |         shap_values = explainer.shap_values(X)
 92 |         
 93 |         # SHAP değerlerini DataFrame'e dönüştür
 94 |         if isinstance(shap_values, list):
 95 |             shap_values = shap_values[0]  # Binary classification için ilk değerleri al
 96 |             
 97 |         shap_df = pd.DataFrame(shap_values, columns=feature_names)
 98 |         
 99 |         # Her özelliğin mutlak etkisini hesapla
100 |         feature_importance = pd.DataFrame({
101 |             'feature': feature_names,
102 |             'importance': np.abs(shap_values).mean(axis=0)
103 |         })
104 |         feature_importance = feature_importance.sort_values('importance', ascending=False)
105 |         
106 |         return shap_df, feature_importance


--------------------------------------------------------------------------------
/DeepLearning/project1.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import psycopg2
 4 | import tensorflow as tf
 5 | from sklearn.model_selection import train_test_split
 6 | from sklearn.preprocessing import StandardScaler
 7 | 
 8 | #1 connect to the database
 9 | connection = psycopg2.connect(host="localhost", dbname="northwind", user="postgres", password="12345", port=5432)
10 | 
11 | query = """
12 | with last_order_date as
13 | (
14 |   select max(order_date) as max_date from orders
15 | ),
16 | customer_order_stats as (
17 | select 
18 | c.customer_id,
19 | count(o.order_id) as total_orders,
20 | sum(od.unit_price*od.quantity) as total_spent,
21 | avg(od.unit_price*od.quantity) as avg_order_value
22 | from orders o
23 | inner join customers c
24 | on o.customer_id = c.customer_id
25 | inner join order_details od
26 | on od.order_id = o.order_id
27 | group by c.customer_id),
28 | label_data as(
29 |  select c.customer_id,
30 | 	case when exists(
31 | 	 select 1 from orders o2,last_order_date lod
32 | 		where o2.customer_id = c.customer_id
33 | 		and o2.order_date>(lod.max_date-Interval '6 months')
34 | 	)
35 | 	then 1 else 0
36 | 	end as will_order_again
37 | 	from customers c
38 | )
39 | select 
40 | s.customer_id,
41 | s.total_orders,
42 | s.total_spent,
43 | s.avg_order_value,
44 | l.will_order_again
45 | from customer_order_stats s join label_data l
46 | on s.customer_id = l.customer_id
47 | """
48 | 
49 | df = pd.read_sql(query,connection)
50 | connection.close()
51 | 
52 | X = df[["total_orders","total_spent","avg_order_value"]]
53 | y = df["will_order_again"]
54 | 
55 | scaler = StandardScaler()
56 | X_scaled = scaler.fit_transform(X)
57 | 
58 | X_train,X_test,y_train,y_test = train_test_split(X_scaled,y,test_size=0.2,random_state=42)
59 | 
60 | model = tf.keras.Sequential(
61 |     [
62 |         tf.keras.layers.Dense(8,activation="relu",input_shape=(X_train.shape[1],)),
63 |         tf.keras.layers.Dense(4,activation="relu"),
64 |         tf.keras.layers.Dense(1,activation="sigmoid")
65 |     ]
66 | )
67 | 
68 | model.compile(optimizer="adam",loss="mean_squared_error",metrics=["accuracy"])
69 | 
70 | model.fit(X_train,y_train,epochs=50,validation_data=(X_test,y_test),verbose=1)
71 | 
72 | loss,acc = model.evaluate(X_test,y_test)
73 | print(acc)


--------------------------------------------------------------------------------
/DeepLearning/sample1.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | #inputs
 4 | temperature = 5
 5 | humidity = 60
 6 | 
 7 | X = np.array([temperature,humidity])
 8 | 
 9 | #nöron  #weights
10 | weights = np.array([0.4,0.6])
11 | 
12 | #eşik değer
13 | bias = -20
14 | 
15 | #noron çıktısı(output)
16 | 
17 | output = np.dot(X,weights) + bias
18 | 
19 | print("Nöronun ham çıktısı", output)
20 | 
21 | def sigmoid(x):
22 |     return 1/(1+np.exp(-x))
23 | 
24 | activated_output = sigmoid(output)
25 | 
26 | print("Nöronun aktivasyon sonrası çıktısı : ", activated_output)


--------------------------------------------------------------------------------
/DeepLearning/sample2.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from sklearn.preprocessing import StandardScaler
 5 | 
 6 | 
 7 | #ages
 8 | X = np.array([5,6,7,8,9,10],dtype=float).reshape(-1,1)
 9 | 
10 | #heights
11 | Y = np.array([110,116,123,130,136,142],dtype=float).reshape(-1,1)
12 | 
13 | x_scaler = StandardScaler()
14 | y_scaler = StandardScaler()
15 | 
16 | X_scaled = x_scaler.fit_transform(X)
17 | Y_scaled = y_scaler.fit_transform(Y)
18 | 
19 | model = tf.keras.Sequential([
20 |     tf.keras.layers.Dense(units=10, activation="relu",input_shape=[1]),
21 |     tf.keras.layers.Dense(units=1)
22 | ])
23 | 
24 | model.compile(optimizer="adam",loss="mean_squared_error")
25 | 
26 | model.fit(X_scaled,Y_scaled,epochs=500, verbose=0)
27 | 
28 | 
29 | test_age = np.array([[7.5]])
30 | test_age_scaled = x_scaler.transform(test_age)
31 | 
32 | 
33 | 
34 | predicted_height_scaled = model.predict(np.array([test_age_scaled]))
35 | 
36 | predicted_height = y_scaler.inverse_transform(predicted_height_scaled)
37 | 
38 | print(f"{test_age} için boy tahmini = {predicted_height}")


--------------------------------------------------------------------------------
/DesicionTrees/__pycache__/main.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DesicionTrees/__pycache__/main.cpython-313.pyc


--------------------------------------------------------------------------------
/DesicionTrees/credit_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DesicionTrees/credit_model.pkl


--------------------------------------------------------------------------------
/DesicionTrees/main.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from sklearn.tree import DecisionTreeClassifier
 4 | from sklearn.model_selection import train_test_split
 5 | from sklearn.metrics import accuracy_score
 6 | from sklearn import tree
 7 | import random
 8 | import joblib
 9 | from fastapi import FastAPI
10 | from pydantic import BaseModel
11 | 
12 | def generateData(m=1000):
13 |     data=[]
14 |     for _ in range(1000):
15 |         age = random.randint(20,65)
16 |         income = round(random.uniform(2.5,15.0),2)
17 |         credit_score = random.randint(300,800)
18 |         has_default = random.choice([0,1])
19 |         approved = 1 if credit_score>650 and income>5 and not has_default else 0
20 |         data.append([age,income,credit_score,has_default,approved])
21 |     return pd.DataFrame(data,columns=["age","income","credit_score","has_default","approved"])
22 | 
23 | df = generateData()
24 | 
25 | X = df[["age","income","credit_score","has_default"]]
26 | y = df["approved"]
27 | 
28 | model = DecisionTreeClassifier(random_state=42)
29 | model.fit(X,y)
30 | 
31 | joblib.dump(model,"credit_model.pkl")
32 | 
33 | app = FastAPI(title="Credit Approval API",description="Credit Approval API using Desicion Trees")
34 | 
35 | 
36 | class Applicant(BaseModel):
37 |     age:int
38 |     income:float
39 |     credit_score:int
40 |     has_default:int
41 | 
42 | @app.post("/predict",tags=["prediction"])
43 | def predict_approval(applicant:Applicant):
44 |   data_model = joblib.load("credit_model.pkl")
45 |   input_data = [[applicant.age,applicant.income,applicant.credit_score,applicant.has_default]]
46 |   prediction = data_model.predict(input_data)[0]
47 |   result = "Approved" if prediction==1 else "Rejected"
48 | 
49 |   return {
50 |       "prediction": result,
51 |       "details":{
52 |           "age" : applicant.age,
53 |           "income": applicant.income,
54 |           "credit_score":applicant.credit_score,
55 |           "has_default":applicant.has_default
56 |       }
57 |   }
58 | 
59 | #Ödev 1 - ARGE : DesicionTrees'de gini yerine alternatif ne kullanılabilir? Farkı nedir?
60 | #Ödev 2 - ARGE : Pydantic ile başka neler yapılabilir?
61 | #Ödev 3 - ARGE : Faker kütüphanesi ne işe yarar? Detaylı araştırınız.


--------------------------------------------------------------------------------
/DesicionTrees/sample1.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from sklearn.tree import DecisionTreeClassifier
 4 | from sklearn.model_selection import train_test_split
 5 | from sklearn.metrics import accuracy_score
 6 | from sklearn import tree
 7 | import matplotlib.pyplot as plt
 8 | import random
 9 | 
10 | def generateData(m=1000):
11 |     data=[]
12 |     for _ in range(1000):
13 |         age = random.randint(20,65)
14 |         income = round(random.uniform(2.5,15.0),2)
15 |         credit_score = random.randint(300,800)
16 |         has_default = random.choice([0,1])
17 |         approved = 1 if credit_score>650 and income>5 and not has_default else 0
18 |         data.append([age,income,credit_score,has_default,approved])
19 |     return pd.DataFrame(data,columns=["age","income","credit_score","has_default","approved"])
20 | 
21 | df = generateData()
22 | 
23 | X = df[["age","income","credit_score","has_default"]]
24 | y = df["approved"]
25 | 
26 | X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
27 | 
28 | model = DecisionTreeClassifier(random_state=42)
29 | model.fit(X_train,y_train)
30 | 
31 | y_prediction = model.predict(X_test)
32 | print("Accuracy : ", accuracy_score(y_test,y_prediction))
33 | 
34 | plt.figure(figsize=(12, 6))
35 | tree.plot_tree(model, feature_names=X.columns, class_names=["Rejected", "Approved"], filled=True)
36 | plt.title("Karar Ağacı Görselleştirmesi")
37 | plt.show()


--------------------------------------------------------------------------------
/KMeans/sample1.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from sklearn.cluster import KMeans
 4 | 
 5 | #müşteri gelir ve harcama listesi
 6 | X = np.array([
 7 |     [15,39],[16,50],[25,5],[85,59],[89,60],[75,39],[10,8],[150,29],[130,19],[24,79],[88,62],[85,49],[85,45],
 8 | ])
 9 | 
10 | kmeans = KMeans(n_clusters=4, random_state=42)
11 | 
12 | kmeans.fit(X)
13 | labels = kmeans.labels_
14 | 
15 | plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='rainbow')
16 | plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=200, marker='X', c='black')
17 | plt.xlabel("Gelir")
18 | plt.ylabel("Harcama")
19 | plt.title("K-means ile Müşteri Segmentasyonu")
20 | plt.show()


--------------------------------------------------------------------------------
/KNN/knn_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/KNN/knn_model.pkl


--------------------------------------------------------------------------------
/KNN/sample1.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from sklearn.neighbors import KNeighborsClassifier
 4 | from sklearn.model_selection import train_test_split
 5 | from sklearn.metrics import accuracy_score
 6 | 
 7 | #midterm,final
 8 | data = [
 9 |     [30,40],
10 |     [60,70],
11 |     [90,80],
12 |     [20,45],
13 |     [30,49],
14 |     [60,54],
15 |     [90,64],
16 |     [100,78],
17 |     [10,40],
18 |     [20,100],
19 |     [80,60],
20 |     [70,100],
21 |     [70,90],
22 |     [50,80],
23 |     [50,77],
24 | ]
25 | 
26 | def calculate(mid,final):
27 |     average = mid*0.4+final*0.6
28 |     return 1 if average>=50 else 0
29 | 
30 | labels = [calculate(x[0],x[1]) for x in data]
31 | 
32 | df = pd.DataFrame(data, columns=["mid","final"])
33 | df["status"] = labels
34 | 
35 | X = df[["mid","final"]]
36 | y = df["status"]
37 | 
38 | X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
39 | 
40 | model = KNeighborsClassifier(n_neighbors=3)
41 | model.fit(X_train,y_train)
42 | 
43 | y_prediction = model.predict(X_test)
44 | 
45 | print(accuracy_score(y_test,y_prediction))
46 | 
47 | student = np.array([[0,70]])
48 | prediction = model.predict(student)
49 | 
50 | print("Geçti" if prediction[0]==1 else "Kaldı")


--------------------------------------------------------------------------------
/KNN/sample2.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from sklearn.neighbors import KNeighborsClassifier
 4 | from sklearn.model_selection import train_test_split
 5 | from sklearn.metrics import accuracy_score
 6 | 
 7 | #1 eğitim seviyesi 0=lise , 1 lisans, 2 YL
 8 | #2 tecrübe yılı
 9 | #3 hired?
10 | data = [
11 |     [0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0],
12 |     [1, 0, 0], [1, 2, 0], [1, 2, 1], [1, 2, 0],
13 |     [1, 4, 1], [1, 5, 1], [2, 0, 0], [2, 1, 1],
14 |     [2, 2, 1], [2, 3, 1], [2, 4, 1], [2, 5, 1],
15 |     [2, 6, 1], [2, 7, 1], [2, 8, 1], [2, 9, 1]
16 | ]
17 | 
18 | df = pd.DataFrame(data, columns=["school","year","hired"])
19 | 
20 | X = df[["school","year"]] #features
21 | y = df["hired"] #target
22 | 
23 | X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
24 | 
25 | k_values = range(1,16)
26 | scores =[]
27 | 
28 | for k in k_values:
29 |   model = KNeighborsClassifier(n_neighbors=k)
30 |   model.fit(X_train,y_train)
31 |   y_prediction = model.predict(X_test)
32 |   accuracy = accuracy_score(y_test,y_prediction)
33 |   scores.append(accuracy)
34 | 
35 | print(scores)
36 | 


--------------------------------------------------------------------------------
/KNN/sample3.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from sklearn.neighbors import KNeighborsClassifier
 4 | from sklearn.model_selection import train_test_split
 5 | from sklearn.metrics import accuracy_score
 6 | import joblib
 7 | from flask import Flask,request,jsonify
 8 | 
 9 | #1 eğitim seviyesi 0=lise , 1 lisans, 2 YL
10 | #2 tecrübe yılı
11 | #3 hired?
12 | data = [
13 |     [0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0],
14 |     [1, 0, 0], [1, 2, 0], [1, 2, 1], [1, 2, 0],
15 |     [1, 4, 1], [1, 5, 1], [2, 0, 0], [2, 1, 1],
16 |     [2, 2, 1], [2, 3, 1], [2, 4, 1], [2, 5, 1],
17 |     [2, 6, 1], [2, 7, 1], [2, 8, 1], [2, 9, 1]
18 | ]
19 | 
20 | df = pd.DataFrame(data, columns=["school","year","hired"])
21 | 
22 | X = df[["school","year"]] #features
23 | y = df["hired"] #target
24 | 
25 | model = KNeighborsClassifier(n_neighbors=5)
26 | model.fit(X,y)
27 | 
28 | joblib.dump(model,"knn_model.pkl")
29 | 
30 | 
31 | app = Flask(__name__) #json,restful
32 | 
33 | 
34 | model = joblib.load("knn_model.pkl")
35 | 
36 | @app.route("/")
37 | def home():
38 |     return "KNN API hazır 🚀"    #http://localhost:5000/
39 | 
40 | @app.route("/prediction", methods=["POST"]) #http://localhost:5000/prediction  POST
41 | def predict():
42 |     data = request.get_json()
43 |     try:
44 |         school = int(data["school"])
45 |         year = int(data["year"])
46 | 
47 |         testData = np.array([[school, year]])
48 |         result = model.predict(testData)[0]
49 | 
50 |         return jsonify({
51 |             "school": school,
52 |             "year": year,
53 |             "hired": "Alındı" if result == 1 else "Alınmadı"
54 |         })
55 | 
56 |     except Exception as e:
57 |         return jsonify({"hata": str(e)}), 400
58 | 
59 | if __name__ == "__main__":
60 |     app.run(debug=True)


--------------------------------------------------------------------------------
/NaiveBayes/sample1.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.feature_extraction.text import CountVectorizer
 3 | from sklearn.naive_bayes import MultinomialNB
 4 | from sklearn.model_selection import train_test_split
 5 | from sklearn.metrics import accuracy_score
 6 | 
 7 | data = {
 8 |     'text': [
 9 |         'Kredi kartı borcunuzu hemen ödeyin',
10 |         'Tebrikler! Kazandınız. Hemen tıklayın!', 
11 |         'Yarın toplantıyı unutma',
12 |         'Bedava hediye seni bekliyor',
13 |         'Önemli bir fatura bildirimi var',
14 |         'Bu hafta sonu kahve içelim mi?',
15 |         'Ücretsiz tatil kazandınız!',
16 |         'Bu ay çok çalıştın, tebrikler'
17 |     ],
18 |     'label': [1, 1, 0, 1, 0, 0, 1, 0]  # 1: spam, 0: normal
19 | }
20 | 
21 | df =pd.DataFrame(data)
22 | 
23 | vectorizer = CountVectorizer()
24 | X = vectorizer.fit_transform(df["text"])
25 | 
26 | y = df["label"]
27 | 
28 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
29 | 
30 | model = MultinomialNB()
31 | model.fit(X_train,y_train)
32 | 
33 | y_pred = model.predict(X_test)
34 | 
35 | print("Accuracy : ", accuracy_score(y_test,y_pred))


--------------------------------------------------------------------------------
/RandomForest/sample1.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from sklearn.ensemble import RandomForestRegressor
 4 | from sklearn.model_selection import train_test_split
 5 | from sklearn.metrics import mean_squared_error,r2_score
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | np.random.seed(42)
 9 | 
10 | n_samples = 200000
11 | area = np.random.randint(50,250,n_samples)
12 | rooms = np.random.randint(1,6,n_samples)
13 | age = np.random.randint(0,50,n_samples)
14 | location_score = np.random.uniform(0,10,n_samples)
15 | 
16 | noise = np.random.normal(0,20000,n_samples)
17 | price = (area * 3000) + (rooms*50000) - (age*1000) + (location_score*10000) + noise
18 | 
19 | df = pd.DataFrame({
20 |     "area":area,
21 |     "rooms":rooms,
22 |     "age":age,
23 |     "location_score":location_score,
24 |     "price":price
25 | })
26 | 
27 | X = df[["area","rooms","age","location_score"]]
28 | y = df["price"]
29 | 
30 | X_train ,X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
31 | 
32 | model = RandomForestRegressor(n_estimators=50,random_state=42)
33 | model.fit(X_train,y_train)
34 | y_prediction = model.predict(X_test)
35 | 
36 | rmse = np.sqrt(mean_squared_error(y_test,y_prediction))
37 | print("RMSE - Root Mean Squared Error : ", rmse)
38 | 
39 | plt.figure(figsize=(8,6))
40 | plt.scatter(y_test, y_prediction, alpha=0.5)
41 | plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
42 | plt.xlabel('Gerçek Fiyat')
43 | plt.ylabel('Tahmin Edilen Fiyat')
44 | plt.title('Gerçek vs Tahmin Edilen Ev Fiyatları')
45 | plt.grid(True)
46 | plt.show()
47 | 
48 | 
49 | def plot_svm_decision_boundary(model, X, y):
50 |     plt.scatter(X[:, 0], X[:, 1], c=y, cmap='bwr', s=60)
51 | 
52 |     ax = plt.gca()
53 |     xlim = ax.get_xlim()
54 |     ylim = ax.get_ylim()
55 | 
56 |     xx = np.linspace(xlim[0], xlim[1], 30)
57 |     yy = np.linspace(ylim[0], ylim[1], 30)
58 |     YY, XX = np.meshgrid(yy, xx)
59 |     xy = np.vstack([XX.ravel(), YY.ravel()]).T
60 |     Z = model.decision_function(xy).reshape(XX.shape)
61 | 
62 | 
63 |     ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.7,
64 |                linestyles=['--', '-', '--'])
65 | 
66 |     ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1],
67 |                s=100, linewidth=1, facecolors='none', edgecolors='k')
68 |     plt.title("SVM Sınıflandırması ve Destek Vektörleri")
69 |     plt.show()
70 | 
71 | 
72 | plot_svm_decision_boundary(model, X, y)


--------------------------------------------------------------------------------
/SVM/sample1.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from sklearn import datasets
 4 | from sklearn.svm import SVC
 5 | 
 6 | X,y = datasets.make_blobs(n_samples=50, centers=2, random_state=5)
 7 | 
 8 | model = SVC(kernel="linear")
 9 | model.fit(X,y)
10 | 
11 | def plot_svm_decision_boundary(model, X, y):
12 |     plt.scatter(X[:, 0], X[:, 1], c=y, cmap='bwr', s=60)
13 | 
14 |     ax = plt.gca()
15 |     xlim = ax.get_xlim()
16 |     ylim = ax.get_ylim()
17 | 
18 |     xx = np.linspace(xlim[0], xlim[1], 30)
19 |     yy = np.linspace(ylim[0], ylim[1], 30)
20 |     YY, XX = np.meshgrid(yy, xx)
21 |     xy = np.vstack([XX.ravel(), YY.ravel()]).T
22 |     Z = model.decision_function(xy).reshape(XX.shape)
23 | 
24 | 
25 |     ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.7,
26 |                linestyles=['--', '-', '--'])
27 | 
28 |     ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1],
29 |                s=100, linewidth=1, facecolors='none', edgecolors='k')
30 |     plt.title("SVM Sınıflandırması ve Destek Vektörleri")
31 |     plt.show()
32 | 
33 | 
34 | plot_svm_decision_boundary(model, X, y)
35 | 


--------------------------------------------------------------------------------
/SVM/sample2.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from sklearn.svm import SVC
 4 | from sklearn.preprocessing import StandardScaler
 5 | from sklearn.model_selection import train_test_split
 6 | from sklearn.metrics import accuracy_score, classification_report
 7 | 
 8 | 
 9 | np.random.seed(42)
10 | 
11 | n_samples = 300
12 | incomes = np.random.uniform(2,12,n_samples)
13 | debpts = np.random.uniform(10,90,n_samples)
14 | 
15 | labels = []
16 | 
17 | for income,debpt in zip(incomes,debpts):
18 |     if income<6 and debpt>70:
19 |         labels.append(1)#Riskli
20 |     else:
21 |         labels.append(0)#Güvenli
22 | 
23 | X = np.column_stack((incomes,debpts))
24 | y = np.array(labels)
25 | 
26 | scaler = StandardScaler()
27 | X_scaled = scaler.fit_transform(X)
28 | 
29 | X_train,X_test,y_train,y_test = train_test_split(X_scaled,y,test_size=0.2,random_state=42)
30 | 
31 | model = SVC(kernel="sigmoid")
32 | model.fit(X_train,y_train)
33 | 
34 | accuracy = model.score(X_test,y_test)
35 | print("accuracy : ", accuracy)
36 | 
37 | def plot_decision_boundary(model, X, y):
38 |     plt.figure(figsize=(10, 6))
39 |     plt.scatter(X[:, 0], X[:, 1], c=y, cmap='bwr', s=60, edgecolors='k', alpha=0.7)
40 |     ax = plt.gca()
41 |     xlim = ax.get_xlim()
42 |     ylim = ax.get_ylim()
43 | 
44 |     xx = np.linspace(xlim[0], xlim[1], 30)
45 |     yy = np.linspace(ylim[0], ylim[1], 30)
46 |     YY, XX = np.meshgrid(yy, xx)
47 |     xy = np.vstack([XX.ravel(), YY.ravel()]).T
48 |     Z = model.decision_function(xy).reshape(XX.shape)
49 | 
50 |     ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1],
51 |                linestyles=['--', '-', '--'])
52 | 
53 |     ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1],
54 |                s=150, linewidth=1.5, facecolors='none', edgecolors='k')
55 | 
56 |     plt.title("Faker Verisiyle SVM: Kredi Riski Tahmini")
57 |     plt.xlabel("Gelir (standardize)")
58 |     plt.ylabel("Borç Oranı (standardize)")
59 |     plt.grid(True)
60 |     plt.show()
61 | 
62 | plot_decision_boundary(model, X_scaled, y)


--------------------------------------------------------------------------------