├── DBSCAN └── sample1.py ├── DeepLearning ├── customer_categorical_order_prediction │ ├── README.md │ ├── best_model.keras │ ├── main.py │ ├── models │ │ └── saved │ │ │ ├── category_1_model.keras │ │ │ ├── category_2_model.keras │ │ │ ├── category_3_model.keras │ │ │ ├── category_4_model.keras │ │ │ ├── category_5_model.keras │ │ │ ├── category_6_model.keras │ │ │ ├── category_7_model.keras │ │ │ └── category_8_model.keras │ ├── reports │ │ ├── confusion_matrices │ │ │ ├── confusion_matrix_Category_1.png │ │ │ ├── confusion_matrix_Category_2.png │ │ │ ├── confusion_matrix_Category_3.png │ │ │ ├── confusion_matrix_Category_4.png │ │ │ ├── confusion_matrix_Category_5.png │ │ │ ├── confusion_matrix_Category_6.png │ │ │ ├── confusion_matrix_Category_7.png │ │ │ └── confusion_matrix_Category_8.png │ │ ├── evaluation_reports │ │ │ ├── evaluation_report_Category_1.txt │ │ │ ├── evaluation_report_Category_2.txt │ │ │ ├── evaluation_report_Category_3.txt │ │ │ ├── evaluation_report_Category_4.txt │ │ │ ├── evaluation_report_Category_5.txt │ │ │ ├── evaluation_report_Category_6.txt │ │ │ ├── evaluation_report_Category_7.txt │ │ │ └── evaluation_report_Category_8.txt │ │ ├── precision_recall_curves │ │ │ ├── precision_recall_curve_Category_1.png │ │ │ ├── precision_recall_curve_Category_2.png │ │ │ ├── precision_recall_curve_Category_3.png │ │ │ ├── precision_recall_curve_Category_4.png │ │ │ ├── precision_recall_curve_Category_5.png │ │ │ ├── precision_recall_curve_Category_6.png │ │ │ ├── precision_recall_curve_Category_7.png │ │ │ └── precision_recall_curve_Category_8.png │ │ ├── roc_curves │ │ │ ├── roc_curve_Category_1.png │ │ │ ├── roc_curve_Category_2.png │ │ │ ├── roc_curve_Category_3.png │ │ │ ├── roc_curve_Category_4.png │ │ │ ├── roc_curve_Category_5.png │ │ │ ├── roc_curve_Category_6.png │ │ │ ├── roc_curve_Category_7.png │ │ │ └── roc_curve_Category_8.png │ │ └── training_history │ │ │ ├── training_history_Category_1.png │ │ │ ├── training_history_Category_2.png │ │ │ ├── training_history_Category_3.png │ │ │ ├── training_history_Category_4.png │ │ │ ├── training_history_Category_5.png │ │ │ ├── training_history_Category_6.png │ │ │ ├── training_history_Category_7.png │ │ │ └── training_history_Category_8.png │ ├── requirements.txt │ ├── setup.py │ ├── src │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-312.pyc │ │ │ └── config.cpython-312.pyc │ │ ├── config.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-312.pyc │ │ │ │ ├── database.cpython-312.pyc │ │ │ │ └── feature_engineering.cpython-312.pyc │ │ │ ├── database.py │ │ │ └── feature_engineering.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-312.pyc │ │ │ │ ├── model_evaluation.cpython-312.pyc │ │ │ │ └── neural_network.cpython-312.pyc │ │ │ ├── model_evaluation.py │ │ │ └── neural_network.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-312.pyc │ │ │ └── helpers.cpython-312.pyc │ │ │ └── helpers.py │ └── tests │ │ ├── __init__.py │ │ ├── test_data.py │ │ └── test_models.py ├── customer_order_prediction │ ├── data │ │ └── query.sql │ ├── main.py │ ├── outputs │ │ └── model.h5 │ └── src │ │ ├── __pycache__ │ │ ├── config.cpython-312.pyc │ │ ├── data_loader.cpython-312.pyc │ │ ├── evaluate.cpython-312.pyc │ │ ├── model.cpython-312.pyc │ │ ├── preprocessing.cpython-312.pyc │ │ └── train.cpython-312.pyc │ │ ├── config.py │ │ ├── data_loader.py │ │ ├── evaluate.py │ │ ├── model.py │ │ ├── preprocessing.py │ │ └── train.py ├── customer_order_risk │ ├── __pycache__ │ │ └── config.cpython-312.pyc │ ├── best_model.keras │ ├── config.py │ ├── main.py │ ├── requirements.txt │ └── src │ │ ├── __pycache__ │ │ ├── database.cpython-312.pyc │ │ ├── feature_engineering.cpython-312.pyc │ │ └── model.cpython-312.pyc │ │ ├── database.py │ │ ├── feature_engineering.py │ │ └── model.py ├── project1.py ├── sample1.py └── sample2.py ├── DesicionTrees ├── __pycache__ │ └── main.cpython-313.pyc ├── credit_model.pkl ├── main.py └── sample1.py ├── KMeans └── sample1.py ├── KNN ├── knn_model.pkl ├── sample1.py ├── sample2.py └── sample3.py ├── NaiveBayes └── sample1.py ├── RandomForest └── sample1.py └── SVM ├── sample1.py └── sample2.py /DBSCAN/sample1.py: -------------------------------------------------------------------------------- 1 | #Müşterilerin alışveriş davranışlarına göre gruplanması ve aykırı verilerin keşfi 2 | 3 | #order_details,customers,orders 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | import psycopg2 9 | from sqlalchemy import create_engine 10 | from sklearn.preprocessing import StandardScaler 11 | from sklearn.cluster import DBSCAN 12 | from sklearn.neighbors import NearestNeighbors 13 | from kneed import KneeLocator 14 | 15 | user = "postgres" 16 | password = "12345" 17 | host = "localhost" 18 | port = "5432" 19 | database = "northwind" 20 | 21 | engine = create_engine(f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}") 22 | 23 | query = """ 24 | select 25 | c.customer_id, 26 | count(o.order_id) as total_orders, 27 | sum(od.unit_price*od.quantity) as total_spent, 28 | avg(od.unit_price*od.quantity) as avg_order_value 29 | from customers c inner join orders o 30 | on c.customer_id =o.customer_id 31 | inner join order_details od 32 | on o.order_id = od.order_id 33 | group by c.customer_id 34 | having count(o.order_id)>0 35 | """ 36 | 37 | df = pd.read_sql_query(query,engine) 38 | print(df.head()) 39 | 40 | X = df[["total_orders","total_spent","avg_order_value"]] 41 | 42 | scaler = StandardScaler() 43 | X_scaled = scaler.fit_transform(X) 44 | 45 | def find_optimal_eps(X_scaled,min_samples=3): 46 | neighbors = NearestNeighbors(n_neighbors=min_samples).fit(X_scaled) 47 | distances,_ = neighbors.kneighbors(X_scaled) 48 | 49 | distances = np.sort(distances[:,min_samples-1]) 50 | 51 | kneedle = KneeLocator(range(len(distances)), distances, curve='convex', direction='increasing') 52 | optimal_eps = distances[kneedle.elbow] 53 | 54 | plt.figure(figsize=(10, 6)) 55 | plt.plot(distances) 56 | plt.axvline(x=kneedle.elbow, color='r', linestyle='--', label=f'Optimal eps: {optimal_eps:.2f}') 57 | plt.xlabel('Points sorted by distance') 58 | plt.ylabel(f'{min_samples}-th nearest neighbor distance') 59 | plt.title('Elbow Method for Optimal eps') 60 | plt.legend() 61 | plt.grid(True) 62 | plt.show() 63 | 64 | return optimal_eps 65 | 66 | optimal_eps = find_optimal_eps(X_scaled) 67 | dbscan = DBSCAN(eps=optimal_eps,min_samples=3) 68 | 69 | df["cluster"] = dbscan.fit_predict(X_scaled) 70 | 71 | plt.figure(figsize=(10, 6)) 72 | plt.scatter(df['total_orders'], df['total_spent'], c=df['cluster'], cmap='plasma', s=60) 73 | plt.xlabel("Toplam Sipariş Sayısı") 74 | plt.ylabel("Toplam Harcama") 75 | plt.title("Müşteri Segmentasyonu (DBSCAN)") 76 | plt.grid(True) 77 | plt.colorbar(label='Küme No') 78 | plt.show() 79 | 80 | outliers = df[df["cluster"]==-1] 81 | print("Aykırı veri sayısı : ", len(outliers)) 82 | print(outliers[["customer_id","total_orders","total_spent"]]) -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/README.md: -------------------------------------------------------------------------------- 1 | # Customer Category Purchase Prediction 2 | 3 | Bu proje, müşterilerin geçmiş satın alma davranışlarına dayanarak yeni ürün kategorilerinde satın alma olasılıklarını tahmin eden bir derin öğrenme modeli içerir. 4 | 5 | ## Proje Yapısı 6 | 7 | ``` 8 | customer_categorical_order_prediction/ 9 | ├── src/ 10 | │ ├── data/ 11 | │ │ ├── __init__.py 12 | │ │ ├── database.py 13 | │ │ └── feature_engineering.py 14 | │ ├── models/ 15 | │ │ ├── __init__.py 16 | │ │ ├── neural_network.py 17 | │ │ └── model_evaluation.py 18 | │ ├── utils/ 19 | │ │ ├── __init__.py 20 | │ │ └── helpers.py 21 | │ └── config.py 22 | ├── tests/ 23 | │ ├── __init__.py 24 | │ ├── test_data.py 25 | │ └── test_models.py 26 | ├── notebooks/ 27 | │ └── exploratory_analysis.ipynb 28 | ├── .env.example 29 | ├── requirements.txt 30 | └── README.md 31 | ``` 32 | 33 | ## Kurulum 34 | 35 | 1. Sanal ortam oluşturun: 36 | ```bash 37 | python -m venv venv 38 | source venv/bin/activate # Linux/Mac 39 | venv\Scripts\activate # Windows 40 | ``` 41 | 42 | 2. Gerekli paketleri yükleyin: 43 | ```bash 44 | pip install -r requirements.txt 45 | ``` 46 | 47 | 3. `.env.example` dosyasını `.env` olarak kopyalayın ve veritabanı bağlantı bilgilerinizi girin. 48 | 49 | ## Kullanım 50 | 51 | 1. Veri hazırlama: 52 | ```bash 53 | python src/data/feature_engineering.py 54 | ``` 55 | 56 | 2. Model eğitimi: 57 | ```bash 58 | python src/models/neural_network.py 59 | ``` 60 | 61 | ## Test 62 | 63 | ```bash 64 | pytest tests/ 65 | ``` 66 | 67 | ## Kod Kalitesi 68 | 69 | - Black ile kod formatlaması 70 | - Flake8 ile kod analizi 71 | - MyPy ile tip kontrolü 72 | - Pytest ile birim testleri -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/best_model.keras: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/best_model.keras -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main script for customer category prediction. 3 | """ 4 | import os 5 | import logging 6 | from src.data.database import get_customer_order_history 7 | from src.data.feature_engineering import ( 8 | create_customer_features, 9 | create_category_features, 10 | create_time_based_features, 11 | prepare_model_data 12 | ) 13 | from src.models.neural_network import CustomerCategoryPredictor 14 | from src.models.model_evaluation import ( 15 | plot_training_history, 16 | plot_roc_curve, 17 | plot_confusion_matrix, 18 | generate_evaluation_report 19 | ) 20 | from src.config import MODEL_CONFIG, FEATURE_CONFIG 21 | 22 | def main(): 23 | """ 24 | Main function to run the customer category prediction model. 25 | """ 26 | # Get raw data 27 | raw_data = get_customer_order_history() 28 | print("Raw data shape:", raw_data.shape) 29 | print("Raw data columns:", raw_data.columns.tolist()) 30 | 31 | # Create features 32 | customer_features = create_customer_features(raw_data) 33 | print("\nCustomer features shape:", customer_features.shape) 34 | print("Customer features columns:", customer_features.columns.tolist()) 35 | 36 | category_features = create_category_features(raw_data) 37 | print("\nCategory features shape:", category_features.shape) 38 | print("Category features columns:", category_features.columns.tolist()) 39 | 40 | time_features = create_time_based_features(raw_data) 41 | print("\nTime features shape:", time_features.shape) 42 | print("Time features columns:", time_features.columns.tolist()) 43 | 44 | # Merge features 45 | # Start with unique customer-category pairs 46 | df = raw_data[['customer_id', 'category_id']].drop_duplicates() 47 | 48 | # Merge customer features 49 | df = df.merge(customer_features, on='customer_id', how='left') 50 | 51 | # Merge category features 52 | df = df.merge(category_features, on=['customer_id', 'category_id'], how='left') 53 | 54 | # Merge time features (using only the latest time features for each customer) 55 | latest_time_features = time_features.sort_values('order_date').groupby('customer_id').last() 56 | df = df.merge(latest_time_features, on='customer_id', how='left') 57 | 58 | # Drop duplicate columns 59 | df = df.loc[:, ~df.columns.duplicated()] 60 | 61 | print("\nFinal features shape:", df.shape) 62 | print("Final features columns:", df.columns.tolist()) 63 | 64 | # Prepare model data for each target category 65 | for target_category in FEATURE_CONFIG['target_categories']: 66 | print(f"\nTraining model for category {target_category}") 67 | 68 | # Prepare data 69 | X_train, X_test, y_train, y_test = prepare_model_data(df, target_category) 70 | 71 | # Initialize and train model 72 | model = CustomerCategoryPredictor( 73 | input_dim=X_train.shape[1], 74 | hidden_layers=MODEL_CONFIG['hidden_layers'], 75 | dropout_rate=MODEL_CONFIG['dropout_rate'], 76 | learning_rate=MODEL_CONFIG['learning_rate'] 77 | ) 78 | 79 | # Train model 80 | history = model.train( 81 | X_train, y_train, 82 | batch_size=MODEL_CONFIG['batch_size'], 83 | epochs=MODEL_CONFIG['epochs'], 84 | validation_split=MODEL_CONFIG['validation_split'] 85 | ) 86 | 87 | # Plot training history 88 | plot_training_history(history, f"Category_{target_category}") 89 | 90 | # Make predictions on test set 91 | y_pred = model.predict(X_test) 92 | 93 | # Plot ROC curve 94 | plot_roc_curve(y_test, y_pred, f"Category_{target_category}") 95 | 96 | # Plot confusion matrix 97 | plot_confusion_matrix(y_test, y_pred, f"Category_{target_category}") 98 | 99 | # Generate evaluation report 100 | generate_evaluation_report( 101 | y_true=y_test, 102 | y_pred=y_pred, 103 | y_pred_proba=y_pred, 104 | category_name=f"Category_{target_category}", 105 | threshold=0.5 106 | ) 107 | 108 | # Save model 109 | model_path = os.path.join('models', 'saved', f'category_{target_category}_model.keras') 110 | model.save(model_path) 111 | print(f"Model saved to {model_path}") 112 | 113 | if __name__ == "__main__": 114 | main() -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/models/saved/category_1_model.keras: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_1_model.keras -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/models/saved/category_2_model.keras: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_2_model.keras -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/models/saved/category_3_model.keras: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_3_model.keras -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/models/saved/category_4_model.keras: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_4_model.keras -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/models/saved/category_5_model.keras: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_5_model.keras -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/models/saved/category_6_model.keras: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_6_model.keras -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/models/saved/category_7_model.keras: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_7_model.keras -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/models/saved/category_8_model.keras: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/models/saved/category_8_model.keras -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_1.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_2.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_3.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_4.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_5.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_6.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_7.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/confusion_matrices/confusion_matrix_Category_8.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_1.txt: -------------------------------------------------------------------------------- 1 | Evaluation Report for Category_1 2 | ================================================== 3 | 4 | Classification threshold: 0.5 5 | 6 | precision recall f1-score support 7 | 8 | 0 0.86 1.00 0.92 103 9 | 1 0.00 0.00 0.00 17 10 | 11 | accuracy 0.86 120 12 | macro avg 0.43 0.50 0.46 120 13 | weighted avg 0.74 0.86 0.79 120 14 | 15 | Additional Metrics: 16 | -------------------- 17 | Number of samples: 120 18 | Number of positive samples: 17 19 | Number of predicted positive samples: [0] 20 | Positive class ratio: 14.17% 21 | -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_2.txt: -------------------------------------------------------------------------------- 1 | Evaluation Report for Category_2 2 | ================================================== 3 | 4 | Classification threshold: 0.5 5 | 6 | precision recall f1-score support 7 | 8 | 0 0.88 1.00 0.94 106 9 | 1 0.00 0.00 0.00 14 10 | 11 | accuracy 0.88 120 12 | macro avg 0.44 0.50 0.47 120 13 | weighted avg 0.78 0.88 0.83 120 14 | 15 | Additional Metrics: 16 | -------------------- 17 | Number of samples: 120 18 | Number of positive samples: 14 19 | Number of predicted positive samples: [0] 20 | Positive class ratio: 11.67% 21 | -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_3.txt: -------------------------------------------------------------------------------- 1 | Evaluation Report for Category_3 2 | ================================================== 3 | 4 | Classification threshold: 0.5 5 | 6 | precision recall f1-score support 7 | 8 | 0 0.87 1.00 0.93 104 9 | 1 0.00 0.00 0.00 16 10 | 11 | accuracy 0.87 120 12 | macro avg 0.43 0.50 0.46 120 13 | weighted avg 0.75 0.87 0.80 120 14 | 15 | Additional Metrics: 16 | -------------------- 17 | Number of samples: 120 18 | Number of positive samples: 16 19 | Number of predicted positive samples: [0] 20 | Positive class ratio: 13.33% 21 | -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_4.txt: -------------------------------------------------------------------------------- 1 | Evaluation Report for Category_4 2 | ================================================== 3 | 4 | Classification threshold: 0.5 5 | 6 | precision recall f1-score support 7 | 8 | 0 0.87 1.00 0.93 104 9 | 1 0.00 0.00 0.00 16 10 | 11 | accuracy 0.87 120 12 | macro avg 0.43 0.50 0.46 120 13 | weighted avg 0.75 0.87 0.80 120 14 | 15 | Additional Metrics: 16 | -------------------- 17 | Number of samples: 120 18 | Number of positive samples: 16 19 | Number of predicted positive samples: [0] 20 | Positive class ratio: 13.33% 21 | -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_5.txt: -------------------------------------------------------------------------------- 1 | Evaluation Report for Category_5 2 | ================================================== 3 | 4 | Classification threshold: 0.5 5 | 6 | precision recall f1-score support 7 | 8 | 0 0.88 1.00 0.94 106 9 | 1 0.00 0.00 0.00 14 10 | 11 | accuracy 0.88 120 12 | macro avg 0.44 0.50 0.47 120 13 | weighted avg 0.78 0.88 0.83 120 14 | 15 | Additional Metrics: 16 | -------------------- 17 | Number of samples: 120 18 | Number of positive samples: 14 19 | Number of predicted positive samples: [0] 20 | Positive class ratio: 11.67% 21 | -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_6.txt: -------------------------------------------------------------------------------- 1 | Evaluation Report for Category_6 2 | ================================================== 3 | 4 | Classification threshold: 0.5 5 | 6 | precision recall f1-score support 7 | 8 | 0 0.88 1.00 0.94 106 9 | 1 0.00 0.00 0.00 14 10 | 11 | accuracy 0.88 120 12 | macro avg 0.44 0.50 0.47 120 13 | weighted avg 0.78 0.88 0.83 120 14 | 15 | Additional Metrics: 16 | -------------------- 17 | Number of samples: 120 18 | Number of positive samples: 14 19 | Number of predicted positive samples: [0] 20 | Positive class ratio: 11.67% 21 | -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_7.txt: -------------------------------------------------------------------------------- 1 | Evaluation Report for Category_7 2 | ================================================== 3 | 4 | Classification threshold: 0.5 5 | 6 | precision recall f1-score support 7 | 8 | 0 0.89 1.00 0.94 107 9 | 1 0.00 0.00 0.00 13 10 | 11 | accuracy 0.89 120 12 | macro avg 0.45 0.50 0.47 120 13 | weighted avg 0.80 0.89 0.84 120 14 | 15 | Additional Metrics: 16 | -------------------- 17 | Number of samples: 120 18 | Number of positive samples: 13 19 | Number of predicted positive samples: [0] 20 | Positive class ratio: 10.83% 21 | -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/evaluation_reports/evaluation_report_Category_8.txt: -------------------------------------------------------------------------------- 1 | Evaluation Report for Category_8 2 | ================================================== 3 | 4 | Classification threshold: 0.5 5 | 6 | precision recall f1-score support 7 | 8 | 0 0.86 1.00 0.92 103 9 | 1 0.00 0.00 0.00 17 10 | 11 | accuracy 0.86 120 12 | macro avg 0.43 0.50 0.46 120 13 | weighted avg 0.74 0.86 0.79 120 14 | 15 | Additional Metrics: 16 | -------------------- 17 | Number of samples: 120 18 | Number of positive samples: 17 19 | Number of predicted positive samples: [0] 20 | Positive class ratio: 14.17% 21 | -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_1.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_2.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_3.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_4.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_5.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_6.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_7.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/precision_recall_curves/precision_recall_curve_Category_8.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_1.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_2.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_3.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_4.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_5.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_6.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_7.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/roc_curves/roc_curve_Category_8.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_1.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_2.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_3.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_4.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_5.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_6.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_7.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/reports/training_history/training_history_Category_8.png -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.26.0 2 | pandas>=2.1.0 3 | scikit-learn>=1.3.2 4 | tensorflow>=2.15.0 5 | psycopg2-binary>=2.9.9 6 | python-dotenv>=1.0.0 7 | pytest>=7.4.3 8 | black>=23.11.0 9 | flake8>=6.1.0 10 | mypy>=1.7.0 11 | matplotlib>=3.8.0 12 | seaborn>=0.13.0 -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="customer_category_prediction", 5 | version="0.1.0", 6 | packages=find_packages(), 7 | install_requires=[ 8 | "numpy>=1.26.0", 9 | "pandas>=2.1.0", 10 | "scikit-learn>=1.3.2", 11 | "tensorflow>=2.15.0", 12 | "psycopg2-binary>=2.9.9", 13 | "python-dotenv>=1.0.0", 14 | "pytest>=7.4.3", 15 | "black>=23.11.0", 16 | "flake8>=6.1.0", 17 | "mypy>=1.7.0", 18 | "matplotlib>=3.8.0", 19 | "seaborn>=0.13.0", 20 | "sqlalchemy>=2.0.0" 21 | ], 22 | python_requires=">=3.12.0", 23 | ) -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Customer Category Prediction package. 3 | """ -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/__pycache__/config.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/__pycache__/config.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration settings for the project. 3 | """ 4 | import os 5 | from typing import Dict, Any 6 | from dotenv import load_dotenv 7 | 8 | # Load environment variables 9 | load_dotenv() 10 | 11 | # Database configuration 12 | DB_CONFIG = { 13 | 'host': os.getenv('DB_HOST', 'localhost'), 14 | 'port': os.getenv('DB_PORT', '5432'), 15 | 'database': os.getenv('DB_NAME', 'northwind'), 16 | 'user': os.getenv('DB_USER', 'postgres'), 17 | 'password': os.getenv('DB_PASSWORD', '12345') # Empty default password 18 | } 19 | 20 | # Model configuration 21 | MODEL_CONFIG = { 22 | 'input_dim': 20, # Number of input features 23 | 'hidden_layers': [64, 32, 16], # Hidden layer sizes 24 | 'dropout_rate': 0.3, 25 | 'learning_rate': 0.001, 26 | 'batch_size': 32, 27 | 'epochs': 100, 28 | 'early_stopping_patience': 10, 29 | 'validation_split': 0.2 30 | } 31 | 32 | # Feature engineering configuration 33 | FEATURE_CONFIG = { 34 | 'time_windows': [7, 30, 90, 180], # Days for rolling features 35 | 'lag_periods': [1, 3, 7, 14], # Days for lag features 36 | 'percentiles': [0.25, 0.5, 0.75], # Percentiles for feature calculation 37 | 'min_purchase_count': 3, # Minimum purchases for customer analysis 38 | 'target_categories': [1, 2, 3, 4, 5, 6, 7, 8] # Categories to predict 39 | } 40 | 41 | # Data processing configuration 42 | DATA_CONFIG = { 43 | 'train_test_split': 0.2, 44 | 'random_state': 42, 45 | 'missing_value_strategy': 'mean', # Options: 'mean', 'median', 'mode', 'drop' 46 | 'feature_scaling': 'standard', # Options: 'standard', 'minmax', 'robust' 47 | 'categorical_encoding': 'onehot' # Options: 'onehot', 'label', 'target' 48 | } 49 | 50 | # Evaluation configuration 51 | EVAL_CONFIG = { 52 | 'metrics': ['accuracy', 'precision', 'recall', 'f1', 'auc'], 53 | 'threshold': 0.5, 54 | 'cv_folds': 5, 55 | 'confidence_threshold': 0.8 56 | } 57 | 58 | # Path configuration 59 | PATH_CONFIG = { 60 | 'data_dir': 'data', 61 | 'raw_data_dir': 'data/raw', 62 | 'processed_data_dir': 'data/processed', 63 | 'model_dir': 'models', 64 | 'report_dir': 'reports', 65 | 'log_dir': 'logs' 66 | } 67 | 68 | # Logging configuration 69 | LOG_CONFIG = { 70 | 'level': 'INFO', 71 | 'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s', 72 | 'date_format': '%Y-%m-%d %H:%M:%S' 73 | } 74 | 75 | def get_config() -> Dict[str, Any]: 76 | """ 77 | Get all configuration settings. 78 | 79 | Returns: 80 | Dict[str, Any]: Configuration dictionary 81 | """ 82 | return { 83 | 'db': DB_CONFIG, 84 | 'model': MODEL_CONFIG, 85 | 'feature': FEATURE_CONFIG, 86 | 'data': DATA_CONFIG, 87 | 'eval': EVAL_CONFIG, 88 | 'path': PATH_CONFIG, 89 | 'log': LOG_CONFIG 90 | } -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/data/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data processing and feature engineering package. 3 | """ -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/data/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/data/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/data/__pycache__/database.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/data/__pycache__/database.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/data/__pycache__/feature_engineering.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/data/__pycache__/feature_engineering.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/data/database.py: -------------------------------------------------------------------------------- 1 | """ 2 | Database connection and query module. 3 | """ 4 | import os 5 | from typing import List, Dict, Any, Optional 6 | import pandas as pd 7 | from sqlalchemy import create_engine, text 8 | from sqlalchemy.engine import Engine 9 | from dotenv import load_dotenv 10 | from src.config import DB_CONFIG 11 | 12 | # Load environment variables 13 | load_dotenv() 14 | 15 | def get_database_connection() -> Engine: 16 | """ 17 | Create database connection using environment variables. 18 | 19 | Returns: 20 | Engine: SQLAlchemy database engine 21 | """ 22 | # Get database credentials from config 23 | db_host = DB_CONFIG['host'] 24 | db_port = DB_CONFIG['port'] 25 | db_name = DB_CONFIG['database'] 26 | db_user = DB_CONFIG['user'] 27 | db_password = DB_CONFIG['password'] 28 | 29 | # Create connection string 30 | connection_string = f"postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}" 31 | 32 | # Create engine 33 | engine = create_engine(connection_string) 34 | 35 | return engine 36 | 37 | def execute_query(query: str, params: Optional[Dict[str, Any]] = None) -> pd.DataFrame: 38 | """ 39 | Execute SQL query and return results as DataFrame. 40 | 41 | Args: 42 | query (str): SQL query to execute 43 | params (Dict[str, Any], optional): Query parameters 44 | 45 | Returns: 46 | pd.DataFrame: Query results 47 | """ 48 | engine = get_database_connection() 49 | 50 | try: 51 | with engine.connect() as connection: 52 | result = connection.execute(text(query), params or {}) 53 | return pd.DataFrame(result.fetchall(), columns=result.keys()) 54 | except Exception as e: 55 | raise Exception(f"Error executing query: {str(e)}") 56 | finally: 57 | engine.dispose() 58 | 59 | def get_customer_category_data() -> pd.DataFrame: 60 | """ 61 | Get customer category purchase data. 62 | 63 | Returns: 64 | pd.DataFrame: Customer category data 65 | """ 66 | query = """ 67 | WITH customer_category_stats AS ( 68 | SELECT 69 | c.customer_id, 70 | p.category_id, 71 | COUNT(DISTINCT o.order_id) as order_count, 72 | SUM(od.unit_price * od.quantity * (1 - od.discount)) as total_amount, 73 | MAX(o.order_date) as last_order_date 74 | FROM customers c 75 | JOIN orders o ON c.customer_id = o.customer_id 76 | JOIN order_details od ON o.order_id = od.order_id 77 | JOIN products p ON od.product_id = p.product_id 78 | GROUP BY c.customer_id, p.category_id 79 | ) 80 | SELECT 81 | ccs.customer_id, 82 | c.company_name, 83 | cat.category_name, 84 | ccs.order_count, 85 | ccs.total_amount, 86 | ccs.last_order_date 87 | FROM customer_category_stats ccs 88 | JOIN customers c ON ccs.customer_id = c.customer_id 89 | JOIN categories cat ON ccs.category_id = cat.category_id 90 | ORDER BY ccs.customer_id, ccs.total_amount DESC; 91 | """ 92 | 93 | return execute_query(query) 94 | 95 | def get_customer_order_history() -> pd.DataFrame: 96 | """ 97 | Get detailed customer order history. 98 | 99 | Returns: 100 | pd.DataFrame: Customer order history 101 | """ 102 | query = """ 103 | SELECT 104 | c.customer_id, 105 | c.company_name, 106 | o.order_id, 107 | o.order_date, 108 | p.category_id, 109 | cat.category_name, 110 | od.unit_price * od.quantity * (1 - od.discount) as total_amount 111 | FROM customers c 112 | JOIN orders o ON c.customer_id = o.customer_id 113 | JOIN order_details od ON o.order_id = od.order_id 114 | JOIN products p ON od.product_id = p.product_id 115 | JOIN categories cat ON p.category_id = cat.category_id 116 | ORDER BY c.customer_id, o.order_date; 117 | """ 118 | 119 | df = execute_query(query) 120 | print("Veritabanından gelen sütunlar:", df.columns.tolist()) 121 | print("\nİlk 5 satır:") 122 | print(df.head()) 123 | return df 124 | 125 | def get_category_metrics() -> pd.DataFrame: 126 | """ 127 | Get category-level metrics. 128 | 129 | Returns: 130 | pd.DataFrame: Category metrics 131 | """ 132 | query = """ 133 | WITH category_stats AS ( 134 | SELECT 135 | p.category_id, 136 | COUNT(DISTINCT o.customer_id) as unique_customers, 137 | COUNT(DISTINCT o.order_id) as total_orders, 138 | SUM(od.unit_price * od.quantity * (1 - od.discount)) as total_revenue, 139 | AVG(od.unit_price * od.quantity * (1 - od.discount)) as avg_order_value 140 | FROM orders o 141 | JOIN order_details od ON o.order_id = od.order_id 142 | JOIN products p ON od.product_id = p.product_id 143 | GROUP BY p.category_id 144 | ) 145 | SELECT 146 | cat.category_id, 147 | cat.category_name, 148 | cs.unique_customers, 149 | cs.total_orders, 150 | cs.total_revenue, 151 | cs.avg_order_value 152 | FROM categories cat 153 | JOIN category_stats cs ON cat.category_id = cs.category_id 154 | ORDER BY cs.total_revenue DESC; 155 | """ 156 | 157 | return execute_query(query) -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/data/feature_engineering.py: -------------------------------------------------------------------------------- 1 | """ 2 | Feature engineering module for customer category prediction. 3 | """ 4 | import pandas as pd 5 | import numpy as np 6 | from typing import List, Dict, Any, Tuple 7 | from datetime import datetime, timedelta 8 | from sklearn.preprocessing import StandardScaler 9 | from sklearn.model_selection import train_test_split 10 | from .database import get_customer_category_data 11 | 12 | def create_customer_features(df: pd.DataFrame) -> pd.DataFrame: 13 | """ 14 | Create customer-level features. 15 | 16 | Args: 17 | df (pd.DataFrame): Input DataFrame with customer data 18 | 19 | Returns: 20 | pd.DataFrame: DataFrame with customer features 21 | """ 22 | # Convert order_date to datetime if it's not already 23 | if not pd.api.types.is_datetime64_any_dtype(df['order_date']): 24 | df['order_date'] = pd.to_datetime(df['order_date']) 25 | 26 | # Group by customer and calculate metrics 27 | customer_features = df.groupby('customer_id').agg({ 28 | 'order_id': 'count', 29 | 'total_amount': ['sum', 'mean', 'std'], 30 | 'order_date': ['min', 'max'], 31 | 'category_id': 'nunique' # Number of unique categories purchased 32 | }).reset_index() 33 | 34 | # Flatten column names 35 | customer_features.columns = ['customer_id', 'total_orders', 36 | 'total_spent', 'avg_order_value', 37 | 'std_order_value', 'first_order_date', 38 | 'last_order_date', 'unique_categories'] 39 | 40 | # Calculate time-based features 41 | customer_features['customer_lifetime'] = ( 42 | customer_features['last_order_date'] - customer_features['first_order_date'] 43 | ).dt.days 44 | 45 | customer_features['avg_days_between_orders'] = ( 46 | customer_features['customer_lifetime'] / customer_features['total_orders'] 47 | ) 48 | 49 | # Calculate category diversity 50 | customer_features['category_diversity'] = ( 51 | customer_features['unique_categories'] / customer_features['total_orders'] 52 | ) 53 | 54 | return customer_features 55 | 56 | def create_category_features(df: pd.DataFrame) -> pd.DataFrame: 57 | """ 58 | Create category-level features. 59 | 60 | Args: 61 | df (pd.DataFrame): Input DataFrame with category data 62 | 63 | Returns: 64 | pd.DataFrame: DataFrame with category features 65 | """ 66 | # Convert order_date to datetime if it's not already 67 | if not pd.api.types.is_datetime64_any_dtype(df['order_date']): 68 | df['order_date'] = pd.to_datetime(df['order_date']) 69 | 70 | # Group by customer and category 71 | category_features = df.groupby(['customer_id', 'category_id']).agg({ 72 | 'order_id': 'count', 73 | 'total_amount': ['sum', 'mean'], 74 | 'order_date': ['min', 'max'] 75 | }).reset_index() 76 | 77 | # Flatten column names 78 | category_features.columns = ['customer_id', 'category_id', 79 | 'category_orders', 'category_spent', 80 | 'avg_category_order', 'first_category_order', 81 | 'last_category_order'] 82 | 83 | # Calculate category-specific metrics 84 | category_features['category_lifetime'] = ( 85 | category_features['last_category_order'] - category_features['first_category_order'] 86 | ).dt.days 87 | 88 | category_features['category_order_frequency'] = ( 89 | category_features['category_lifetime'] / category_features['category_orders'] 90 | ) 91 | 92 | return category_features 93 | 94 | def create_time_based_features(df: pd.DataFrame) -> pd.DataFrame: 95 | """ 96 | Create time-based features. 97 | 98 | Args: 99 | df (pd.DataFrame): Input DataFrame with temporal data 100 | 101 | Returns: 102 | pd.DataFrame: DataFrame with time-based features 103 | """ 104 | # Convert order_date to datetime if it's not already 105 | if not pd.api.types.is_datetime64_any_dtype(df['order_date']): 106 | df['order_date'] = pd.to_datetime(df['order_date']) 107 | 108 | # Extract time components 109 | df['order_year'] = df['order_date'].dt.year 110 | df['order_month'] = df['order_date'].dt.month 111 | df['order_day'] = df['order_date'].dt.day 112 | df['order_dayofweek'] = df['order_date'].dt.dayofweek 113 | df['order_quarter'] = df['order_date'].dt.quarter 114 | 115 | # Calculate time since last order 116 | df['days_since_last_order'] = df.groupby('customer_id')['order_date'].diff().dt.days 117 | 118 | return df 119 | 120 | def calculate_customer_metrics(df: pd.DataFrame) -> pd.DataFrame: 121 | """ 122 | Calculate customer-level metrics. 123 | 124 | Args: 125 | df (pd.DataFrame): Input DataFrame with customer data 126 | 127 | Returns: 128 | pd.DataFrame: DataFrame with customer metrics 129 | """ 130 | # Convert order_date to datetime if it's not already 131 | if not pd.api.types.is_datetime64_any_dtype(df['order_date']): 132 | df['order_date'] = pd.to_datetime(df['order_date']) 133 | 134 | # Calculate RFM metrics 135 | current_date = df['order_date'].max() 136 | 137 | rfm = df.groupby('customer_id').agg({ 138 | 'order_date': lambda x: (current_date - x.max()).days, # Recency 139 | 'order_id': 'count', # Frequency 140 | 'total_amount': 'sum' # Monetary 141 | }).reset_index() 142 | 143 | rfm.columns = ['customer_id', 'recency', 'frequency', 'monetary'] 144 | 145 | # Calculate additional metrics 146 | rfm['avg_order_value'] = rfm['monetary'] / rfm['frequency'] 147 | rfm['purchase_rate'] = rfm['frequency'] / ( 148 | (current_date - df.groupby('customer_id')['order_date'].min()).dt.days 149 | ) 150 | 151 | return rfm 152 | 153 | def prepare_model_data(df: pd.DataFrame, 154 | target_category: int, 155 | test_size: float = 0.2, 156 | random_state: int = 42) -> Tuple[np.ndarray, np.ndarray, 157 | np.ndarray, np.ndarray]: 158 | """ 159 | Prepare data for model training. 160 | 161 | Args: 162 | df (pd.DataFrame): Input DataFrame 163 | target_category (int): Target category ID 164 | test_size (float): Test set size 165 | random_state (int): Random state for reproducibility 166 | 167 | Returns: 168 | Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: X_train, X_test, y_train, y_test 169 | """ 170 | # Use category_id_x as the main category_id column 171 | df['target'] = (df['category_id_x'] == target_category).astype(int) 172 | 173 | # Select features 174 | feature_cols = [col for col in df.columns if col not in 175 | ['customer_id', 'category_id_x', 'category_id_y', 'order_id', 'order_date', 176 | 'target', 'first_order_date', 'last_order_date', 177 | 'first_category_order', 'last_category_order', 178 | 'company_name', 'category_name']] 179 | 180 | X = df[feature_cols] 181 | y = df['target'] 182 | 183 | # Split data 184 | X_train, X_test, y_train, y_test = train_test_split( 185 | X, y, test_size=test_size, random_state=random_state, 186 | stratify=y 187 | ) 188 | 189 | # Scale features 190 | scaler = StandardScaler() 191 | X_train = scaler.fit_transform(X_train) 192 | X_test = scaler.transform(X_test) 193 | 194 | return X_train, X_test, y_train, y_test 195 | 196 | def handle_missing_values(df: pd.DataFrame, 197 | strategy: str = 'mean') -> pd.DataFrame: 198 | """ 199 | Handle missing values in the dataset. 200 | 201 | Args: 202 | df (pd.DataFrame): Input DataFrame 203 | strategy (str): Strategy for handling missing values 204 | 205 | Returns: 206 | pd.DataFrame: DataFrame with handled missing values 207 | """ 208 | if strategy == 'mean': 209 | return df.fillna(df.mean()) 210 | elif strategy == 'median': 211 | return df.fillna(df.median()) 212 | elif strategy == 'mode': 213 | return df.fillna(df.mode().iloc[0]) 214 | elif strategy == 'drop': 215 | return df.dropna() 216 | else: 217 | raise ValueError(f"Unknown strategy: {strategy}") 218 | 219 | def get_train_test_split(df: pd.DataFrame, 220 | target_col: str, 221 | test_size: float = 0.2, 222 | random_state: int = 42) -> Tuple[pd.DataFrame, 223 | pd.DataFrame, 224 | pd.Series, 225 | pd.Series]: 226 | """ 227 | Split data into training and test sets. 228 | 229 | Args: 230 | df (pd.DataFrame): Input DataFrame 231 | target_col (str): Target column name 232 | test_size (float): Test set size 233 | random_state (int): Random state for reproducibility 234 | 235 | Returns: 236 | Tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]: X_train, X_test, y_train, y_test 237 | """ 238 | X = df.drop(columns=[target_col]) 239 | y = df[target_col] 240 | 241 | return train_test_split( 242 | X, y, test_size=test_size, random_state=random_state, 243 | stratify=y 244 | ) -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/models/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neural network models and evaluation package. 3 | """ -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/models/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/models/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/models/__pycache__/model_evaluation.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/models/__pycache__/model_evaluation.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/models/__pycache__/neural_network.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/models/__pycache__/neural_network.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/models/model_evaluation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Model evaluation and visualization module. 3 | """ 4 | import os 5 | import numpy as np 6 | import pandas as pd 7 | import matplotlib.pyplot as plt 8 | import seaborn as sns 9 | from sklearn.metrics import ( 10 | confusion_matrix, 11 | classification_report, 12 | roc_curve, 13 | auc, 14 | precision_recall_curve 15 | ) 16 | from typing import Dict, Any, Tuple 17 | import json 18 | 19 | def plot_training_history(history: Dict[str, Any], category_name: str): 20 | """ 21 | Plot training history metrics. 22 | 23 | Args: 24 | history (Dict[str, Any]): Training history dictionary 25 | category_name (str): Name of the category being predicted 26 | """ 27 | # Create reports directory if it doesn't exist 28 | reports_dir = os.path.join('reports', 'training_history') 29 | os.makedirs(reports_dir, exist_ok=True) 30 | 31 | # Plot metrics 32 | plt.figure(figsize=(12, 4)) 33 | 34 | # Plot loss 35 | plt.subplot(1, 2, 1) 36 | plt.plot(history['loss'], label='Training Loss') 37 | plt.plot(history['val_loss'], label='Validation Loss') 38 | plt.title(f'Loss - {category_name}') 39 | plt.xlabel('Epoch') 40 | plt.ylabel('Loss') 41 | plt.legend() 42 | 43 | # Plot accuracy 44 | plt.subplot(1, 2, 2) 45 | plt.plot(history['accuracy'], label='Training Accuracy') 46 | plt.plot(history['val_accuracy'], label='Validation Accuracy') 47 | plt.title(f'Accuracy - {category_name}') 48 | plt.xlabel('Epoch') 49 | plt.ylabel('Accuracy') 50 | plt.legend() 51 | 52 | plt.tight_layout() 53 | 54 | # Save plot 55 | save_path = os.path.join(reports_dir, f'training_history_{category_name}.png') 56 | plt.savefig(save_path) 57 | plt.close() 58 | 59 | def plot_confusion_matrix(y_true: np.ndarray, 60 | y_pred: np.ndarray, 61 | category_name: str, 62 | threshold: float = 0.5): 63 | """ 64 | Plot confusion matrix. 65 | 66 | Args: 67 | y_true (np.ndarray): True labels 68 | y_pred (np.ndarray): Predicted probabilities 69 | category_name (str): Name of the category being predicted 70 | threshold (float): Classification threshold for converting probabilities to binary predictions 71 | """ 72 | # Create reports directory if it doesn't exist 73 | reports_dir = os.path.join('reports', 'confusion_matrices') 74 | os.makedirs(reports_dir, exist_ok=True) 75 | 76 | # Convert probabilities to binary predictions 77 | y_pred_binary = (y_pred >= threshold).astype(int) 78 | 79 | # Calculate confusion matrix 80 | cm = confusion_matrix(y_true, y_pred_binary) 81 | 82 | # Plot confusion matrix 83 | plt.figure(figsize=(8, 6)) 84 | sns.heatmap(cm, annot=True, fmt='d', cmap='Blues') 85 | plt.title(f'Confusion Matrix - {category_name} (threshold={threshold})') 86 | plt.xlabel('Predicted') 87 | plt.ylabel('True') 88 | 89 | # Save plot 90 | save_path = os.path.join(reports_dir, f'confusion_matrix_{category_name}.png') 91 | plt.savefig(save_path) 92 | plt.close() 93 | 94 | def plot_roc_curve(y_true: np.ndarray, 95 | y_pred_proba: np.ndarray, 96 | category_name: str): 97 | """ 98 | Plot ROC curve. 99 | 100 | Args: 101 | y_true (np.ndarray): True labels 102 | y_pred_proba (np.ndarray): Predicted probabilities 103 | category_name (str): Name of the category being predicted 104 | """ 105 | # Create reports directory if it doesn't exist 106 | reports_dir = os.path.join('reports', 'roc_curves') 107 | os.makedirs(reports_dir, exist_ok=True) 108 | 109 | # Calculate ROC curve 110 | fpr, tpr, _ = roc_curve(y_true, y_pred_proba) 111 | roc_auc = auc(fpr, tpr) 112 | 113 | # Plot ROC curve 114 | plt.figure(figsize=(8, 6)) 115 | plt.plot(fpr, tpr, color='darkorange', lw=2, 116 | label=f'ROC curve (AUC = {roc_auc:.2f})') 117 | plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') 118 | plt.xlim([0.0, 1.0]) 119 | plt.ylim([0.0, 1.05]) 120 | plt.xlabel('False Positive Rate') 121 | plt.ylabel('True Positive Rate') 122 | plt.title(f'ROC Curve - {category_name}') 123 | plt.legend(loc="lower right") 124 | 125 | # Save plot 126 | save_path = os.path.join(reports_dir, f'roc_curve_{category_name}.png') 127 | plt.savefig(save_path) 128 | plt.close() 129 | 130 | def plot_precision_recall_curve(y_true: np.ndarray, 131 | y_pred_proba: np.ndarray, 132 | category_name: str): 133 | """ 134 | Plot precision-recall curve. 135 | 136 | Args: 137 | y_true (np.ndarray): True labels 138 | y_pred_proba (np.ndarray): Predicted probabilities 139 | category_name (str): Name of the category being predicted 140 | """ 141 | # Create reports directory if it doesn't exist 142 | reports_dir = os.path.join('reports', 'precision_recall_curves') 143 | os.makedirs(reports_dir, exist_ok=True) 144 | 145 | # Calculate precision-recall curve 146 | precision, recall, _ = precision_recall_curve(y_true, y_pred_proba) 147 | 148 | # Plot precision-recall curve 149 | plt.figure(figsize=(8, 6)) 150 | plt.plot(recall, precision, color='blue', lw=2) 151 | plt.xlabel('Recall') 152 | plt.ylabel('Precision') 153 | plt.title(f'Precision-Recall Curve - {category_name}') 154 | plt.grid(True) 155 | 156 | # Save plot 157 | save_path = os.path.join(reports_dir, f'precision_recall_curve_{category_name}.png') 158 | plt.savefig(save_path) 159 | plt.close() 160 | 161 | def generate_evaluation_report(y_true: np.ndarray, 162 | y_pred: np.ndarray, 163 | y_pred_proba: np.ndarray, 164 | category_name: str, 165 | threshold: float = 0.5) -> Dict[str, float]: 166 | """ 167 | Generate comprehensive evaluation report. 168 | 169 | Args: 170 | y_true (np.ndarray): True labels 171 | y_pred (np.ndarray): Predicted probabilities 172 | y_pred_proba (np.ndarray): Predicted probabilities (same as y_pred) 173 | category_name (str): Name of the category being predicted 174 | threshold (float): Classification threshold for converting probabilities to binary predictions 175 | 176 | Returns: 177 | Dict[str, float]: Dictionary of evaluation metrics 178 | """ 179 | # Create reports directory if it doesn't exist 180 | reports_dir = os.path.join('reports', 'evaluation_reports') 181 | os.makedirs(reports_dir, exist_ok=True) 182 | 183 | # Convert probabilities to binary predictions 184 | y_pred_binary = (y_pred >= threshold).astype(int) 185 | 186 | # Generate plots 187 | plot_confusion_matrix(y_true, y_pred, category_name, threshold) 188 | plot_roc_curve(y_true, y_pred_proba, category_name) 189 | plot_precision_recall_curve(y_true, y_pred_proba, category_name) 190 | 191 | # Calculate metrics with zero_division=0 192 | report = classification_report(y_true, y_pred_binary, output_dict=True, zero_division=0) 193 | 194 | # Save report 195 | save_path = os.path.join(reports_dir, f'evaluation_report_{category_name}.txt') 196 | with open(save_path, 'w') as f: 197 | f.write(f"Evaluation Report for {category_name}\n") 198 | f.write("=" * 50 + "\n\n") 199 | f.write(f"Classification threshold: {threshold}\n\n") 200 | f.write(classification_report(y_true, y_pred_binary, zero_division=0)) 201 | 202 | # Add additional metrics 203 | f.write("\nAdditional Metrics:\n") 204 | f.write("-" * 20 + "\n") 205 | f.write(f"Number of samples: {len(y_true)}\n") 206 | f.write(f"Number of positive samples: {sum(y_true)}\n") 207 | f.write(f"Number of predicted positive samples: {sum(y_pred_binary)}\n") 208 | f.write(f"Positive class ratio: {sum(y_true)/len(y_true):.2%}\n") 209 | 210 | return report 211 | 212 | def analyze_feature_importance(model, feature_names: list, 213 | save_path: str = None) -> pd.DataFrame: 214 | """ 215 | Analyze feature importance using model weights. 216 | 217 | Args: 218 | model: Trained neural network model 219 | feature_names (list): List of feature names 220 | save_path (str, optional): Path to save the plot 221 | 222 | Returns: 223 | pd.DataFrame: Feature importance scores 224 | """ 225 | # Get weights from first layer 226 | weights = np.abs(model.layers[0].get_weights()[0]) 227 | 228 | # Calculate feature importance 229 | importance = np.mean(weights, axis=1) 230 | 231 | # Create DataFrame 232 | importance_df = pd.DataFrame({ 233 | 'feature': feature_names, 234 | 'importance': importance 235 | }) 236 | importance_df = importance_df.sort_values('importance', ascending=False) 237 | 238 | # Plot feature importance 239 | plt.figure(figsize=(10, 6)) 240 | sns.barplot(x='importance', y='feature', data=importance_df) 241 | plt.title('Feature Importance') 242 | plt.tight_layout() 243 | 244 | if save_path: 245 | os.makedirs(os.path.dirname(save_path), exist_ok=True) 246 | plt.savefig(save_path, bbox_inches='tight', dpi=300) 247 | plt.close() 248 | 249 | return importance_df -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/models/neural_network.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neural network model for customer category prediction. 3 | """ 4 | import os 5 | import numpy as np 6 | import tensorflow as tf 7 | from tensorflow.keras.models import Sequential, load_model 8 | from tensorflow.keras.layers import Dense, Dropout 9 | from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint 10 | from tensorflow.keras.metrics import AUC 11 | from typing import Tuple, Dict, Any 12 | import json 13 | 14 | from src.data.feature_engineering import ( 15 | create_customer_features, 16 | prepare_model_data, 17 | get_train_test_split 18 | ) 19 | 20 | class CustomerCategoryPredictor: 21 | """ 22 | Neural network model for predicting customer category purchases. 23 | """ 24 | def __init__(self, 25 | input_dim: int, 26 | hidden_layers: list = [64, 32, 16], 27 | dropout_rate: float = 0.3, 28 | learning_rate: float = 0.001): 29 | """ 30 | Initialize the model. 31 | 32 | Args: 33 | input_dim (int): Number of input features 34 | hidden_layers (list): List of hidden layer sizes 35 | dropout_rate (float): Dropout rate for regularization 36 | learning_rate (float): Learning rate for optimizer 37 | """ 38 | self.input_dim = input_dim 39 | self.hidden_layers = hidden_layers 40 | self.dropout_rate = dropout_rate 41 | self.learning_rate = learning_rate 42 | self.model = self._build_model() 43 | 44 | def _build_model(self) -> Sequential: 45 | """ 46 | Build the neural network model. 47 | 48 | Returns: 49 | Sequential: Compiled Keras model 50 | """ 51 | model = Sequential() 52 | 53 | # Input layer 54 | model.add(Dense(self.hidden_layers[0], activation='relu', input_dim=self.input_dim)) 55 | model.add(Dropout(self.dropout_rate)) 56 | 57 | # Hidden layers 58 | for units in self.hidden_layers[1:]: 59 | model.add(Dense(units, activation='relu')) 60 | model.add(Dropout(self.dropout_rate)) 61 | 62 | # Output layer 63 | model.add(Dense(1, activation='sigmoid')) 64 | 65 | # Compile model 66 | model.compile( 67 | optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate), 68 | loss='binary_crossentropy', 69 | metrics=['accuracy', AUC(name='auc')] 70 | ) 71 | 72 | return model 73 | 74 | def train(self, 75 | X_train: np.ndarray, 76 | y_train: np.ndarray, 77 | batch_size: int = 32, 78 | epochs: int = 100, 79 | validation_split: float = 0.2) -> dict: 80 | """ 81 | Train the model. 82 | 83 | Args: 84 | X_train (np.ndarray): Training features 85 | y_train (np.ndarray): Training labels 86 | batch_size (int): Batch size for training 87 | epochs (int): Number of training epochs 88 | validation_split (float): Validation split ratio 89 | 90 | Returns: 91 | dict: Training history 92 | """ 93 | # Create callbacks 94 | callbacks = [ 95 | EarlyStopping( 96 | monitor='val_loss', 97 | patience=10, 98 | restore_best_weights=True, 99 | mode='min' 100 | ), 101 | ModelCheckpoint( 102 | 'best_model.keras', 103 | monitor='val_loss', 104 | save_best_only=True, 105 | mode='min' 106 | ) 107 | ] 108 | 109 | # Train model 110 | history = self.model.fit( 111 | X_train, y_train, 112 | batch_size=batch_size, 113 | epochs=epochs, 114 | validation_split=validation_split, 115 | callbacks=callbacks, 116 | verbose=1 117 | ) 118 | 119 | return history.history 120 | 121 | def predict(self, X: np.ndarray) -> np.ndarray: 122 | """ 123 | Make predictions. 124 | 125 | Args: 126 | X (np.ndarray): Input features 127 | 128 | Returns: 129 | np.ndarray: Predicted probabilities 130 | """ 131 | return self.model.predict(X, verbose=0) 132 | 133 | def evaluate(self, X: np.ndarray, y: np.ndarray) -> dict: 134 | """ 135 | Evaluate the model. 136 | 137 | Args: 138 | X (np.ndarray): Test features 139 | y (np.ndarray): Test labels 140 | 141 | Returns: 142 | dict: Evaluation metrics 143 | """ 144 | return dict(zip(self.model.metrics_names, 145 | self.model.evaluate(X, y, verbose=0))) 146 | 147 | def save(self, filepath: str): 148 | """ 149 | Save the model. 150 | 151 | Args: 152 | filepath (str): Path to save the model 153 | """ 154 | # Ensure filepath ends with .keras 155 | if not filepath.endswith('.keras'): 156 | filepath = f"{filepath}.keras" 157 | 158 | # Create directory if it doesn't exist 159 | os.makedirs(os.path.dirname(filepath), exist_ok=True) 160 | 161 | # Save model without specifying save_format 162 | self.model.save(filepath) 163 | 164 | @classmethod 165 | def load(cls, filepath: str) -> 'CustomerCategoryPredictor': 166 | """ 167 | Load a saved model. 168 | 169 | Args: 170 | filepath (str): Path to the saved model 171 | 172 | Returns: 173 | CustomerCategoryPredictor: Loaded model 174 | """ 175 | # Ensure filepath ends with .keras 176 | if not filepath.endswith('.keras'): 177 | filepath = f"{filepath}.keras" 178 | 179 | model = load_model(filepath, compile=True) 180 | instance = cls(input_dim=model.input_shape[1]) 181 | instance.model = model 182 | return instance 183 | 184 | def main(): 185 | """Main training script.""" 186 | # Prepare data 187 | df = create_customer_features() 188 | X, y = prepare_model_data(df) 189 | X_train, X_test, y_train, y_test = get_train_test_split(X, y) 190 | 191 | # Split training data into train and validation 192 | X_train, X_val, y_train, y_val = get_train_test_split(X_train, y_train, test_size=0.2) 193 | 194 | # Initialize and train model 195 | model = CustomerCategoryPredictor(input_dim=X.shape[1]) 196 | history = model.train(X_train, y_train, X_val, y_val) 197 | 198 | # Evaluate model 199 | metrics = model.evaluate(X_test, y_test) 200 | print("\nTest Metrics:") 201 | for metric_name, value in metrics.items(): 202 | print(f"{metric_name}: {value:.4f}") 203 | 204 | # Save model 205 | model.save('models/saved/customer_category_predictor.h5') 206 | 207 | if __name__ == '__main__': 208 | main() -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions package. 3 | """ -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/utils/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/utils/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/utils/__pycache__/helpers.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_categorical_order_prediction/src/utils/__pycache__/helpers.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/src/utils/helpers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions for the project. 3 | """ 4 | import os 5 | import json 6 | import logging 7 | from typing import Dict, Any, List, Optional 8 | import numpy as np 9 | import pandas as pd 10 | from datetime import datetime 11 | 12 | # Configure logging 13 | logging.basicConfig( 14 | level=logging.INFO, 15 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' 16 | ) 17 | logger = logging.getLogger(__name__) 18 | 19 | def setup_directories() -> None: 20 | """ 21 | Create necessary directories for the project. 22 | """ 23 | directories = [ 24 | 'data/raw', 25 | 'data/processed', 26 | 'models', 27 | 'reports', 28 | 'logs' 29 | ] 30 | 31 | for directory in directories: 32 | os.makedirs(directory, exist_ok=True) 33 | logger.info(f"Created directory: {directory}") 34 | 35 | def save_json(data: Dict[str, Any], filepath: str) -> None: 36 | """ 37 | Save data to a JSON file. 38 | 39 | Args: 40 | data (Dict[str, Any]): Data to save 41 | filepath (str): Path to save the file 42 | """ 43 | try: 44 | with open(filepath, 'w', encoding='utf-8') as f: 45 | json.dump(data, f, indent=4, ensure_ascii=False) 46 | logger.info(f"Saved data to {filepath}") 47 | except Exception as e: 48 | logger.error(f"Error saving data to {filepath}: {str(e)}") 49 | raise 50 | 51 | def load_json(filepath: str) -> Dict[str, Any]: 52 | """ 53 | Load data from a JSON file. 54 | 55 | Args: 56 | filepath (str): Path to the file 57 | 58 | Returns: 59 | Dict[str, Any]: Loaded data 60 | """ 61 | try: 62 | with open(filepath, 'r', encoding='utf-8') as f: 63 | data = json.load(f) 64 | logger.info(f"Loaded data from {filepath}") 65 | return data 66 | except Exception as e: 67 | logger.error(f"Error loading data from {filepath}: {str(e)}") 68 | raise 69 | 70 | def save_dataframe(df: pd.DataFrame, filepath: str) -> None: 71 | """ 72 | Save DataFrame to a file. 73 | 74 | Args: 75 | df (pd.DataFrame): DataFrame to save 76 | filepath (str): Path to save the file 77 | """ 78 | try: 79 | df.to_csv(filepath, index=False) 80 | logger.info(f"Saved DataFrame to {filepath}") 81 | except Exception as e: 82 | logger.error(f"Error saving DataFrame to {filepath}: {str(e)}") 83 | raise 84 | 85 | def load_dataframe(filepath: str) -> pd.DataFrame: 86 | """ 87 | Load DataFrame from a file. 88 | 89 | Args: 90 | filepath (str): Path to the file 91 | 92 | Returns: 93 | pd.DataFrame: Loaded DataFrame 94 | """ 95 | try: 96 | df = pd.read_csv(filepath) 97 | logger.info(f"Loaded DataFrame from {filepath}") 98 | return df 99 | except Exception as e: 100 | logger.error(f"Error loading DataFrame from {filepath}: {str(e)}") 101 | raise 102 | 103 | def calculate_time_features(df: pd.DataFrame, 104 | date_column: str) -> pd.DataFrame: 105 | """ 106 | Calculate time-based features from a date column. 107 | 108 | Args: 109 | df (pd.DataFrame): Input DataFrame 110 | date_column (str): Name of the date column 111 | 112 | Returns: 113 | pd.DataFrame: DataFrame with additional time features 114 | """ 115 | df = df.copy() 116 | 117 | # Convert to datetime if not already 118 | if not pd.api.types.is_datetime64_any_dtype(df[date_column]): 119 | df[date_column] = pd.to_datetime(df[date_column]) 120 | 121 | # Extract time components 122 | df[f'{date_column}_year'] = df[date_column].dt.year 123 | df[f'{date_column}_month'] = df[date_column].dt.month 124 | df[f'{date_column}_day'] = df[date_column].dt.day 125 | df[f'{date_column}_dayofweek'] = df[date_column].dt.dayofweek 126 | df[f'{date_column}_quarter'] = df[date_column].dt.quarter 127 | 128 | return df 129 | 130 | def calculate_rolling_features(df: pd.DataFrame, 131 | group_col: str, 132 | value_col: str, 133 | windows: List[int]) -> pd.DataFrame: 134 | """ 135 | Calculate rolling window features. 136 | 137 | Args: 138 | df (pd.DataFrame): Input DataFrame 139 | group_col (str): Column to group by 140 | value_col (str): Column to calculate rolling features for 141 | windows (List[int]): List of window sizes 142 | 143 | Returns: 144 | pd.DataFrame: DataFrame with rolling features 145 | """ 146 | df = df.copy() 147 | 148 | for window in windows: 149 | # Calculate rolling mean 150 | df[f'{value_col}_rolling_mean_{window}'] = ( 151 | df.groupby(group_col)[value_col] 152 | .transform(lambda x: x.rolling(window, min_periods=1).mean()) 153 | ) 154 | 155 | # Calculate rolling std 156 | df[f'{value_col}_rolling_std_{window}'] = ( 157 | df.groupby(group_col)[value_col] 158 | .transform(lambda x: x.rolling(window, min_periods=1).std()) 159 | ) 160 | 161 | return df 162 | 163 | def calculate_lag_features(df: pd.DataFrame, 164 | group_col: str, 165 | value_col: str, 166 | lags: List[int]) -> pd.DataFrame: 167 | """ 168 | Calculate lag features. 169 | 170 | Args: 171 | df (pd.DataFrame): Input DataFrame 172 | group_col (str): Column to group by 173 | value_col (str): Column to calculate lag features for 174 | lags (List[int]): List of lag periods 175 | 176 | Returns: 177 | pd.DataFrame: DataFrame with lag features 178 | """ 179 | df = df.copy() 180 | 181 | for lag in lags: 182 | df[f'{value_col}_lag_{lag}'] = ( 183 | df.groupby(group_col)[value_col] 184 | .transform(lambda x: x.shift(lag)) 185 | ) 186 | 187 | return df 188 | 189 | def calculate_ratio_features(df: pd.DataFrame, 190 | numerator_col: str, 191 | denominator_col: str, 192 | prefix: str = '') -> pd.DataFrame: 193 | """ 194 | Calculate ratio features. 195 | 196 | Args: 197 | df (pd.DataFrame): Input DataFrame 198 | numerator_col (str): Numerator column 199 | denominator_col (str): Denominator column 200 | prefix (str): Prefix for the new column name 201 | 202 | Returns: 203 | pd.DataFrame: DataFrame with ratio features 204 | """ 205 | df = df.copy() 206 | 207 | # Calculate ratio 208 | ratio_col = f'{prefix}ratio' if prefix else 'ratio' 209 | df[ratio_col] = df[numerator_col] / df[denominator_col] 210 | 211 | # Handle division by zero 212 | df[ratio_col] = df[ratio_col].replace([np.inf, -np.inf], np.nan) 213 | 214 | return df 215 | 216 | def calculate_percentile_features(df: pd.DataFrame, 217 | group_col: str, 218 | value_col: str, 219 | percentiles: List[float]) -> pd.DataFrame: 220 | """ 221 | Calculate percentile features. 222 | 223 | Args: 224 | df (pd.DataFrame): Input DataFrame 225 | group_col (str): Column to group by 226 | value_col (str): Column to calculate percentiles for 227 | percentiles (List[float]): List of percentiles to calculate 228 | 229 | Returns: 230 | pd.DataFrame: DataFrame with percentile features 231 | """ 232 | df = df.copy() 233 | 234 | for percentile in percentiles: 235 | df[f'{value_col}_percentile_{int(percentile*100)}'] = ( 236 | df.groupby(group_col)[value_col] 237 | .transform(lambda x: x.quantile(percentile)) 238 | ) 239 | 240 | return df -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test package. 3 | """ -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/tests/test_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for data processing functions. 3 | """ 4 | import pytest 5 | import pandas as pd 6 | import numpy as np 7 | from datetime import datetime, timedelta 8 | from src.data.feature_engineering import ( 9 | create_customer_features, 10 | prepare_model_data, 11 | get_train_test_split 12 | ) 13 | from src.utils.helpers import ( 14 | handle_missing_values, 15 | calculate_customer_metrics, 16 | create_time_based_features 17 | ) 18 | 19 | @pytest.fixture 20 | def sample_data(): 21 | """Create sample data for testing.""" 22 | data = { 23 | 'customer_id': ['C1', 'C1', 'C2', 'C2', 'C3'], 24 | 'category_name': ['A', 'B', 'A', 'C', 'B'], 25 | 'purchase_count': [2, 1, 3, 1, 2], 26 | 'total_spent': [100, 50, 150, 75, 80], 27 | 'last_purchase_date': [ 28 | datetime.now() - timedelta(days=x) 29 | for x in [1, 2, 3, 4, 5] 30 | ] 31 | } 32 | return pd.DataFrame(data) 33 | 34 | def test_create_customer_features(sample_data): 35 | """Test customer feature creation.""" 36 | df = create_customer_features() 37 | assert isinstance(df, pd.DataFrame) 38 | assert not df.empty 39 | assert 'category_spend_ratio' in df.columns 40 | assert 'category_purchase_ratio' in df.columns 41 | 42 | def test_prepare_model_data(sample_data): 43 | """Test model data preparation.""" 44 | X, y = prepare_model_data(sample_data) 45 | assert isinstance(X, np.ndarray) 46 | assert isinstance(y, np.ndarray) 47 | assert len(X) == len(y) 48 | assert X.shape[1] > 0 49 | 50 | def test_get_train_test_split(sample_data): 51 | """Test train-test split function.""" 52 | X, y = prepare_model_data(sample_data) 53 | X_train, X_test, y_train, y_test = get_train_test_split(X, y, test_size=0.2) 54 | 55 | assert len(X_train) + len(X_test) == len(X) 56 | assert len(y_train) + len(y_test) == len(y) 57 | assert X_train.shape[1] == X_test.shape[1] 58 | 59 | def test_handle_missing_values(sample_data): 60 | """Test missing value handling.""" 61 | # Add some missing values 62 | sample_data.loc[0, 'total_spent'] = np.nan 63 | sample_data.loc[1, 'category_name'] = None 64 | 65 | # Test different strategies 66 | df_mean = handle_missing_values(sample_data, strategy='mean') 67 | df_median = handle_missing_values(sample_data, strategy='median') 68 | df_zero = handle_missing_values(sample_data, strategy='zero') 69 | 70 | assert not df_mean.isnull().any().any() 71 | assert not df_median.isnull().any().any() 72 | assert not df_zero.isnull().any().any() 73 | 74 | def test_calculate_customer_metrics(sample_data): 75 | """Test customer metrics calculation.""" 76 | metrics = calculate_customer_metrics( 77 | sample_data, 78 | customer_id_col='customer_id', 79 | date_col='last_purchase_date', 80 | value_col='total_spent' 81 | ) 82 | 83 | assert isinstance(metrics, pd.DataFrame) 84 | assert 'days_since_first_purchase' in metrics.columns 85 | assert 'days_since_last_purchase' in metrics.columns 86 | assert 'purchase_frequency' in metrics.columns 87 | 88 | def test_create_time_based_features(sample_data): 89 | """Test time-based feature creation.""" 90 | df = create_time_based_features(sample_data, 'last_purchase_date') 91 | 92 | assert 'year' in df.columns 93 | assert 'month' in df.columns 94 | assert 'day' in df.columns 95 | assert 'dayofweek' in df.columns 96 | assert 'month_sin' in df.columns 97 | assert 'month_cos' in df.columns -------------------------------------------------------------------------------- /DeepLearning/customer_categorical_order_prediction/tests/test_models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for model functions. 3 | """ 4 | import pytest 5 | import numpy as np 6 | import tensorflow as tf 7 | from src.models.neural_network import CustomerCategoryPredictor 8 | from src.models.model_evaluation import ( 9 | plot_training_history, 10 | plot_roc_curve, 11 | plot_precision_recall_curve, 12 | plot_confusion_matrix, 13 | generate_evaluation_report, 14 | analyze_feature_importance 15 | ) 16 | 17 | @pytest.fixture 18 | def sample_data(): 19 | """Create sample data for testing.""" 20 | np.random.seed(42) 21 | X = np.random.randn(100, 10) 22 | y = np.random.randint(0, 2, 100) 23 | return X, y 24 | 25 | @pytest.fixture 26 | def sample_model(sample_data): 27 | """Create a sample model for testing.""" 28 | X, _ = sample_data 29 | model = CustomerCategoryPredictor(input_dim=X.shape[1]) 30 | return model 31 | 32 | def test_model_initialization(sample_data): 33 | """Test model initialization.""" 34 | X, _ = sample_data 35 | model = CustomerCategoryPredictor(input_dim=X.shape[1]) 36 | 37 | assert isinstance(model.model, tf.keras.Sequential) 38 | assert model.history is None 39 | 40 | def test_model_training(sample_model, sample_data): 41 | """Test model training.""" 42 | X, y = sample_data 43 | X_train, X_val = X[:80], X[80:] 44 | y_train, y_val = y[:80], y[80:] 45 | 46 | history = sample_model.train( 47 | X_train, y_train, 48 | X_val, y_val, 49 | batch_size=32, 50 | epochs=2 51 | ) 52 | 53 | assert isinstance(history, dict) 54 | assert 'loss' in history 55 | assert 'accuracy' in history 56 | 57 | def test_model_evaluation(sample_model, sample_data): 58 | """Test model evaluation.""" 59 | X, y = sample_data 60 | X_train, X_test = X[:80], X[80:] 61 | y_train, y_test = y[:80], y[80:] 62 | 63 | # Train model 64 | sample_model.train(X_train, y_train, X_test, y_test, epochs=2) 65 | 66 | # Evaluate model 67 | metrics = sample_model.evaluate(X_test, y_test) 68 | 69 | assert isinstance(metrics, dict) 70 | assert 'loss' in metrics 71 | assert 'accuracy' in metrics 72 | 73 | def test_model_prediction(sample_model, sample_data): 74 | """Test model prediction.""" 75 | X, _ = sample_data 76 | predictions = sample_model.predict(X) 77 | 78 | assert isinstance(predictions, np.ndarray) 79 | assert predictions.shape[0] == X.shape[0] 80 | assert predictions.shape[1] == 1 81 | 82 | def test_model_save_load(sample_model, sample_data, tmp_path): 83 | """Test model saving and loading.""" 84 | X, y = sample_data 85 | X_train, X_val = X[:80], X[80:] 86 | y_train, y_val = y[:80], y[80:] 87 | 88 | # Train model 89 | sample_model.train(X_train, y_train, X_val, y_val, epochs=2) 90 | 91 | # Save model 92 | save_path = tmp_path / "test_model.h5" 93 | sample_model.save_model(str(save_path)) 94 | 95 | # Load model 96 | loaded_model = CustomerCategoryPredictor.load_model(str(save_path)) 97 | 98 | # Compare predictions 99 | original_preds = sample_model.predict(X) 100 | loaded_preds = loaded_model.predict(X) 101 | 102 | np.testing.assert_array_almost_equal(original_preds, loaded_preds) 103 | 104 | def test_plot_training_history(sample_model, sample_data, tmp_path): 105 | """Test training history plotting.""" 106 | X, y = sample_data 107 | X_train, X_val = X[:80], X[80:] 108 | y_train, y_val = y[:80], y[80:] 109 | 110 | # Train model 111 | history = sample_model.train(X_train, y_train, X_val, y_val, epochs=2) 112 | 113 | # Plot history 114 | save_path = tmp_path / "history.png" 115 | plot_training_history(history, str(save_path)) 116 | 117 | assert save_path.exists() 118 | 119 | def test_plot_roc_curve(sample_model, sample_data, tmp_path): 120 | """Test ROC curve plotting.""" 121 | X, y = sample_data 122 | X_train, X_test = X[:80], X[80:] 123 | y_train, y_test = y[:80], y[80:] 124 | 125 | # Train model 126 | sample_model.train(X_train, y_train, X_test, y_test, epochs=2) 127 | 128 | # Get predictions 129 | y_pred = sample_model.predict(X_test) 130 | 131 | # Plot ROC curve 132 | save_path = tmp_path / "roc.png" 133 | plot_roc_curve(y_test, y_pred, str(save_path)) 134 | 135 | assert save_path.exists() 136 | 137 | def test_plot_confusion_matrix(sample_model, sample_data, tmp_path): 138 | """Test confusion matrix plotting.""" 139 | X, y = sample_data 140 | X_train, X_test = X[:80], X[80:] 141 | y_train, y_test = y[:80], y[80:] 142 | 143 | # Train model 144 | sample_model.train(X_train, y_train, X_test, y_test, epochs=2) 145 | 146 | # Get predictions 147 | y_pred = sample_model.predict(X_test) 148 | 149 | # Plot confusion matrix 150 | save_path = tmp_path / "confusion.png" 151 | plot_confusion_matrix(y_test, y_pred, threshold=0.5, save_path=str(save_path)) 152 | 153 | assert save_path.exists() 154 | 155 | def test_generate_evaluation_report(sample_model, sample_data, tmp_path): 156 | """Test evaluation report generation.""" 157 | X, y = sample_data 158 | X_train, X_test = X[:80], X[80:] 159 | y_train, y_test = y[:80], y[80:] 160 | 161 | # Train model 162 | sample_model.train(X_train, y_train, X_test, y_test, epochs=2) 163 | 164 | # Get predictions 165 | y_pred = sample_model.predict(X_test) 166 | 167 | # Generate report 168 | save_dir = tmp_path / "reports" 169 | report = generate_evaluation_report( 170 | y_test, y_pred, 171 | threshold=0.5, 172 | save_dir=str(save_dir) 173 | ) 174 | 175 | assert isinstance(report, dict) 176 | assert save_dir.exists() 177 | assert (save_dir / "metrics.json").exists() 178 | 179 | def test_analyze_feature_importance(sample_model, sample_data, tmp_path): 180 | """Test feature importance analysis.""" 181 | X, y = sample_data 182 | feature_names = [f"feature_{i}" for i in range(X.shape[1])] 183 | 184 | # Train model 185 | sample_model.train(X[:80], y[:80], X[80:], y[80:], epochs=2) 186 | 187 | # Analyze feature importance 188 | save_path = tmp_path / "importance.png" 189 | importance_df = analyze_feature_importance( 190 | sample_model.model, 191 | feature_names, 192 | save_path=str(save_path) 193 | ) 194 | 195 | assert isinstance(importance_df, pd.DataFrame) 196 | assert 'feature' in importance_df.columns 197 | assert 'importance' in importance_df.columns 198 | assert save_path.exists() -------------------------------------------------------------------------------- /DeepLearning/customer_order_prediction/data/query.sql: -------------------------------------------------------------------------------- 1 | with last_order_date as 2 | ( 3 | select max(order_date) as max_date from orders 4 | ), 5 | customer_order_stats as ( 6 | select 7 | c.customer_id, 8 | count(o.order_id) as total_orders, 9 | sum(od.unit_price*od.quantity) as total_spent, 10 | avg(od.unit_price*od.quantity) as avg_order_value 11 | from orders o 12 | inner join customers c 13 | on o.customer_id = c.customer_id 14 | inner join order_details od 15 | on od.order_id = o.order_id 16 | group by c.customer_id), 17 | label_data as( 18 | select c.customer_id, 19 | case when exists( 20 | select 1 from orders o2,last_order_date lod 21 | where o2.customer_id = c.customer_id 22 | and o2.order_date>(lod.max_date-Interval '6 months') 23 | ) 24 | then 1 else 0 25 | end as will_order_again 26 | from customers c 27 | ) 28 | select 29 | s.customer_id, 30 | s.total_orders, 31 | s.total_spent, 32 | s.avg_order_value, 33 | l.will_order_again 34 | from customer_order_stats s join label_data l 35 | on s.customer_id = l.customer_id -------------------------------------------------------------------------------- /DeepLearning/customer_order_prediction/main.py: -------------------------------------------------------------------------------- 1 | from src.data_loader import load_data 2 | from src.preprocessing import preprocess_data 3 | from src.model import build_model 4 | from src.train import train_model 5 | from src.evaluate import evaluate_model 6 | 7 | 8 | def main(): 9 | print("Loading data") 10 | df = load_data("data/query.sql") 11 | 12 | print("Preprocessing data") 13 | X_train,X_test,y_train,y_test = preprocess_data(df,class_imbalance=2) 14 | 15 | print("Building model") 16 | model = build_model(input_shape=X_train.shape[1]) 17 | 18 | print("Training model") 19 | model = train_model(model,X_train,y_train, X_test,y_test) 20 | 21 | print("Evaluating model") 22 | evaluate_model(model,X_test,y_test) 23 | 24 | 25 | 26 | if __name__ =="__main__": 27 | main() -------------------------------------------------------------------------------- /DeepLearning/customer_order_prediction/outputs/model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_prediction/outputs/model.h5 -------------------------------------------------------------------------------- /DeepLearning/customer_order_prediction/src/__pycache__/config.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_prediction/src/__pycache__/config.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_order_prediction/src/__pycache__/data_loader.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_prediction/src/__pycache__/data_loader.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_order_prediction/src/__pycache__/evaluate.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_prediction/src/__pycache__/evaluate.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_order_prediction/src/__pycache__/model.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_prediction/src/__pycache__/model.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_order_prediction/src/__pycache__/preprocessing.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_prediction/src/__pycache__/preprocessing.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_order_prediction/src/__pycache__/train.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_prediction/src/__pycache__/train.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_order_prediction/src/config.py: -------------------------------------------------------------------------------- 1 | DB_CONFIG = { 2 | "host":"localhost", 3 | "dbname":"northwind", 4 | "user":"postgres", 5 | "password":"12345", 6 | "port":5432 7 | } -------------------------------------------------------------------------------- /DeepLearning/customer_order_prediction/src/data_loader.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import psycopg2 3 | from src.config import DB_CONFIG 4 | 5 | 6 | def load_data(sql_path): 7 | 8 | with open(sql_path,"r") as file: 9 | query = file.read() 10 | 11 | connection = psycopg2.connect(**DB_CONFIG) 12 | df = pd.read_sql(query,connection) 13 | connection.close() 14 | return df -------------------------------------------------------------------------------- /DeepLearning/customer_order_prediction/src/evaluate.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import classification_report 2 | 3 | def evaluate_model(model,X_test,y_test): 4 | y_pred = model.predict(X_test) 5 | y_pred_labels = (y_pred>0.5).astype("int32") 6 | 7 | print(classification_report(y_test,y_pred_labels)) -------------------------------------------------------------------------------- /DeepLearning/customer_order_prediction/src/model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def build_model(input_shape): 4 | 5 | model = tf.keras.Sequential( 6 | [ 7 | tf.keras.layers.Dense(8,activation="relu",input_shape=(input_shape,)), 8 | tf.keras.layers.Dense(4,activation="relu"), 9 | tf.keras.layers.Dense(1,activation="sigmoid") 10 | ] 11 | ) 12 | 13 | model.compile(optimizer="adam",loss="mean_squared_error",metrics=["accuracy"]) 14 | return model -------------------------------------------------------------------------------- /DeepLearning/customer_order_prediction/src/preprocessing.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.model_selection import train_test_split 3 | from sklearn.preprocessing import StandardScaler 4 | from imblearn.over_sampling import SMOTE 5 | from imblearn.over_sampling import RandomOverSampler 6 | from sklearn.utils.class_weight import compute_class_weight 7 | import numpy as np 8 | 9 | def preprocess_data(df,test_size=0.2,class_imbalance=0): 10 | X = df[["total_orders","total_spent","avg_order_value"]] 11 | y = df["will_order_again"] 12 | 13 | scaler = StandardScaler() 14 | X_scaled = scaler.fit_transform(X) 15 | 16 | X_train,X_test,y_train,y_test = train_test_split(X_scaled,y,test_size=test_size,random_state=42) 17 | 18 | if class_imbalance==0: 19 | return X_train,X_test,y_train,y_test 20 | 21 | #smote 22 | if class_imbalance == 1: 23 | print("Random Oversampling uygulanıyor...") 24 | ros = RandomOverSampler(random_state=42) 25 | X_resampled, y_resampled = ros.fit_resample(X_train, y_train) 26 | return X_resampled, X_test, y_resampled, y_test 27 | 28 | if class_imbalance == 2: 29 | print("Class Weight uygulanıyor...") 30 | class_weights = compute_class_weight( 31 | class_weight='balanced', 32 | classes=np.unique(y_train), 33 | y=y_train 34 | ) 35 | 36 | class_weight_dict = dict(zip(np.unique(y_train), class_weights)) 37 | print(f"Sınıf ağırlıkları: {class_weight_dict}") 38 | 39 | global class_weights_dict 40 | class_weights_dict = class_weight_dict 41 | return X_train, X_test, y_train, y_test -------------------------------------------------------------------------------- /DeepLearning/customer_order_prediction/src/train.py: -------------------------------------------------------------------------------- 1 | def train_model(model,X_train,y_train,X_test,y_test): 2 | model.fit(X_train,y_train,epochs=50,validation_data=(X_test,y_test),verbose=1) 3 | model.save("outputs/model.h5") 4 | return model -------------------------------------------------------------------------------- /DeepLearning/customer_order_risk/__pycache__/config.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_risk/__pycache__/config.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_order_risk/best_model.keras: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_risk/best_model.keras -------------------------------------------------------------------------------- /DeepLearning/customer_order_risk/config.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | import os 3 | 4 | load_dotenv() 5 | 6 | DB_CONFIG = { 7 | "dbname": os.getenv("DB_NAME","northwind"), 8 | "user":os.getenv("DB_USER","postgres"), 9 | "password":os.getenv("DB_PASSWORD","12345"), 10 | "host":os.getenv("DB_HOST","localhost"), 11 | "port":os.getenv("DB_PORT",5432) 12 | } 13 | 14 | MODEL_CONFIG = { 15 | "test_size":0.2, 16 | "random_state":42, 17 | "epochs":50 18 | } 19 | 20 | FEATURE_CONFIG = { 21 | "high_discount_threshold":0.75, #75th percentile means high discount begins 22 | "low_amount_threshold": 0.25 23 | } 24 | -------------------------------------------------------------------------------- /DeepLearning/customer_order_risk/main.py: -------------------------------------------------------------------------------- 1 | from src.database import DatabaseManager 2 | from src.feature_engineering import FeatureEngineer 3 | from src.model import ReturnRiskModel 4 | 5 | from sklearn.model_selection import train_test_split 6 | from config import MODEL_CONFIG 7 | import pandas as pd 8 | import numpy as np 9 | 10 | 11 | def main(): 12 | db_manager = None 13 | 14 | try: 15 | db_manager = DatabaseManager() 16 | feature_engineer = FeatureEngineer() 17 | model = ReturnRiskModel() 18 | 19 | print("Fetching order data") 20 | df = db_manager.get_order_data() 21 | 22 | print("Creating features") 23 | df_processed = feature_engineer.create_feautures(df) 24 | 25 | X, y = feature_engineer.prepare_model_data(df_processed) 26 | feature_names = [ 27 | "unit_price", "quantity", "discount", "total_amount", "discount_amount", 28 | "avg_order_amount", "std_order_amount", "total_spent", "avg_discount", 29 | "max_discount", "avg_quantity", "total_quantity" 30 | ] 31 | 32 | X_train, X_test, y_train, y_test = model.split_data(X, y) 33 | 34 | model.build_model(input_dim=X_train.shape[1]) 35 | model.train(X_train, y_train, X_test, y_test) 36 | loss, accuracy = model.evaluate(X_test, y_test) 37 | 38 | print(f"Test accuracy: {accuracy}") 39 | 40 | # Riskli bulunan siparişleri belirle 41 | predictions = model.predict(X_test) 42 | risky_orders = X_test[predictions.flatten() > 0.5] # 0.5'ten büyük tahminleri riskli kabul et 43 | 44 | if len(risky_orders) > 0: 45 | print("\nRiskli bulunan siparişlerin açıklaması:") 46 | shap_df, feature_importance = model.explain_prediction(risky_orders, feature_names) 47 | 48 | print("\nEn önemli özellikler (SHAP değerlerine göre):") 49 | print(feature_importance.head()) 50 | 51 | print("\nİlk riskli sipariş için özellik katkıları:") 52 | first_risky = shap_df.iloc[0] 53 | for feature, value in first_risky.items(): 54 | if abs(value) > 0.01: # Sadece önemli katkıları göster 55 | direction = "arttırdı" if value > 0 else "azalttı" 56 | print(f"{feature}: {value:.4f} ({direction})") 57 | 58 | except Exception as e: 59 | print(e) 60 | finally: 61 | if db_manager is not None: 62 | db_manager.disconnect() 63 | 64 | if __name__ == "__main__": 65 | main() -------------------------------------------------------------------------------- /DeepLearning/customer_order_risk/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas>=2.1.0 2 | psycopg2-binary>=2.9.9 3 | tensorflow>=2.15.0 4 | scikit-learn>=1.3.0 5 | lime>=0.2.0.1 6 | numpy>=1.24.0 -------------------------------------------------------------------------------- /DeepLearning/customer_order_risk/src/__pycache__/database.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_risk/src/__pycache__/database.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_order_risk/src/__pycache__/feature_engineering.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_risk/src/__pycache__/feature_engineering.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_order_risk/src/__pycache__/model.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DeepLearning/customer_order_risk/src/__pycache__/model.cpython-312.pyc -------------------------------------------------------------------------------- /DeepLearning/customer_order_risk/src/database.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import psycopg2 3 | from config import DB_CONFIG 4 | 5 | class DatabaseManager: 6 | def __init__(self): 7 | self.conn = None 8 | self.connect() 9 | 10 | def connect(self): 11 | try: 12 | self.conn = psycopg2.connect(**DB_CONFIG) 13 | print("Database connection established") 14 | except Exception as e: 15 | print(f"Error connecting to the database : {e}") 16 | raise 17 | 18 | def disconnect(self): 19 | if self.conn: 20 | self.conn.close() 21 | print("Connection closed") 22 | 23 | def get_order_data(self): 24 | query = """ 25 | select 26 | od.order_id, 27 | od.product_id, 28 | od.unit_price, 29 | od.quantity, 30 | od.discount, 31 | o.customer_id, 32 | o.order_date, 33 | p.category_id, 34 | c.company_name 35 | from 36 | orders o inner join order_details od 37 | on o.order_id=od.order_id 38 | inner join products p 39 | on p.product_id=od.product_id 40 | inner join customers c 41 | on c.customer_id=o.customer_id 42 | """ 43 | 44 | try: 45 | df = pd.read_sql_query(query,self.conn) 46 | return df 47 | except Exception as e: 48 | print(f"Error {e}") 49 | raise 50 | 51 | -------------------------------------------------------------------------------- /DeepLearning/customer_order_risk/src/feature_engineering.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import StandardScaler 2 | import pandas as pd 3 | import numpy as np 4 | from config import MODEL_CONFIG 5 | from config import FEATURE_CONFIG 6 | 7 | class FeatureEngineer: 8 | 9 | def __init__(self): 10 | self.scaler = StandardScaler() 11 | self.customer_features = None 12 | 13 | def create_feautures(self,df): 14 | df["total_amount"] = df["unit_price"]*df["quantity"]*(1-df["discount"]) 15 | df["discount_amount"] = df["unit_price"]*df["quantity"]*df["discount"] 16 | 17 | self.customer_features = df.groupby("customer_id").agg({ 18 | "total_amount":["mean","std","sum"], 19 | "discount":["mean","max"], 20 | "quantity":["mean","sum"] 21 | }).reset_index() 22 | 23 | self.customer_features.columns = ["customer_id","avg_order_amount","std_order_amount","total_spent","avg_discount","max_discount","avg_quantity","total_quantity"] 24 | 25 | df = df.merge(self.customer_features, on ="customer_id",how="left") 26 | 27 | high_discount = df["discount"]>df["discount"].quantile(FEATURE_CONFIG["high_discount_threshold"]) 28 | low_amount = df["total_amount"](lod.max_date-Interval '6 months') 34 | ) 35 | then 1 else 0 36 | end as will_order_again 37 | from customers c 38 | ) 39 | select 40 | s.customer_id, 41 | s.total_orders, 42 | s.total_spent, 43 | s.avg_order_value, 44 | l.will_order_again 45 | from customer_order_stats s join label_data l 46 | on s.customer_id = l.customer_id 47 | """ 48 | 49 | df = pd.read_sql(query,connection) 50 | connection.close() 51 | 52 | X = df[["total_orders","total_spent","avg_order_value"]] 53 | y = df["will_order_again"] 54 | 55 | scaler = StandardScaler() 56 | X_scaled = scaler.fit_transform(X) 57 | 58 | X_train,X_test,y_train,y_test = train_test_split(X_scaled,y,test_size=0.2,random_state=42) 59 | 60 | model = tf.keras.Sequential( 61 | [ 62 | tf.keras.layers.Dense(8,activation="relu",input_shape=(X_train.shape[1],)), 63 | tf.keras.layers.Dense(4,activation="relu"), 64 | tf.keras.layers.Dense(1,activation="sigmoid") 65 | ] 66 | ) 67 | 68 | model.compile(optimizer="adam",loss="mean_squared_error",metrics=["accuracy"]) 69 | 70 | model.fit(X_train,y_train,epochs=50,validation_data=(X_test,y_test),verbose=1) 71 | 72 | loss,acc = model.evaluate(X_test,y_test) 73 | print(acc) -------------------------------------------------------------------------------- /DeepLearning/sample1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | #inputs 4 | temperature = 5 5 | humidity = 60 6 | 7 | X = np.array([temperature,humidity]) 8 | 9 | #nöron #weights 10 | weights = np.array([0.4,0.6]) 11 | 12 | #eşik değer 13 | bias = -20 14 | 15 | #noron çıktısı(output) 16 | 17 | output = np.dot(X,weights) + bias 18 | 19 | print("Nöronun ham çıktısı", output) 20 | 21 | def sigmoid(x): 22 | return 1/(1+np.exp(-x)) 23 | 24 | activated_output = sigmoid(output) 25 | 26 | print("Nöronun aktivasyon sonrası çıktısı : ", activated_output) -------------------------------------------------------------------------------- /DeepLearning/sample2.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.preprocessing import StandardScaler 5 | 6 | 7 | #ages 8 | X = np.array([5,6,7,8,9,10],dtype=float).reshape(-1,1) 9 | 10 | #heights 11 | Y = np.array([110,116,123,130,136,142],dtype=float).reshape(-1,1) 12 | 13 | x_scaler = StandardScaler() 14 | y_scaler = StandardScaler() 15 | 16 | X_scaled = x_scaler.fit_transform(X) 17 | Y_scaled = y_scaler.fit_transform(Y) 18 | 19 | model = tf.keras.Sequential([ 20 | tf.keras.layers.Dense(units=10, activation="relu",input_shape=[1]), 21 | tf.keras.layers.Dense(units=1) 22 | ]) 23 | 24 | model.compile(optimizer="adam",loss="mean_squared_error") 25 | 26 | model.fit(X_scaled,Y_scaled,epochs=500, verbose=0) 27 | 28 | 29 | test_age = np.array([[7.5]]) 30 | test_age_scaled = x_scaler.transform(test_age) 31 | 32 | 33 | 34 | predicted_height_scaled = model.predict(np.array([test_age_scaled])) 35 | 36 | predicted_height = y_scaler.inverse_transform(predicted_height_scaled) 37 | 38 | print(f"{test_age} için boy tahmini = {predicted_height}") -------------------------------------------------------------------------------- /DesicionTrees/__pycache__/main.cpython-313.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DesicionTrees/__pycache__/main.cpython-313.pyc -------------------------------------------------------------------------------- /DesicionTrees/credit_model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/DesicionTrees/credit_model.pkl -------------------------------------------------------------------------------- /DesicionTrees/main.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from sklearn.tree import DecisionTreeClassifier 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.metrics import accuracy_score 6 | from sklearn import tree 7 | import random 8 | import joblib 9 | from fastapi import FastAPI 10 | from pydantic import BaseModel 11 | 12 | def generateData(m=1000): 13 | data=[] 14 | for _ in range(1000): 15 | age = random.randint(20,65) 16 | income = round(random.uniform(2.5,15.0),2) 17 | credit_score = random.randint(300,800) 18 | has_default = random.choice([0,1]) 19 | approved = 1 if credit_score>650 and income>5 and not has_default else 0 20 | data.append([age,income,credit_score,has_default,approved]) 21 | return pd.DataFrame(data,columns=["age","income","credit_score","has_default","approved"]) 22 | 23 | df = generateData() 24 | 25 | X = df[["age","income","credit_score","has_default"]] 26 | y = df["approved"] 27 | 28 | model = DecisionTreeClassifier(random_state=42) 29 | model.fit(X,y) 30 | 31 | joblib.dump(model,"credit_model.pkl") 32 | 33 | app = FastAPI(title="Credit Approval API",description="Credit Approval API using Desicion Trees") 34 | 35 | 36 | class Applicant(BaseModel): 37 | age:int 38 | income:float 39 | credit_score:int 40 | has_default:int 41 | 42 | @app.post("/predict",tags=["prediction"]) 43 | def predict_approval(applicant:Applicant): 44 | data_model = joblib.load("credit_model.pkl") 45 | input_data = [[applicant.age,applicant.income,applicant.credit_score,applicant.has_default]] 46 | prediction = data_model.predict(input_data)[0] 47 | result = "Approved" if prediction==1 else "Rejected" 48 | 49 | return { 50 | "prediction": result, 51 | "details":{ 52 | "age" : applicant.age, 53 | "income": applicant.income, 54 | "credit_score":applicant.credit_score, 55 | "has_default":applicant.has_default 56 | } 57 | } 58 | 59 | #Ödev 1 - ARGE : DesicionTrees'de gini yerine alternatif ne kullanılabilir? Farkı nedir? 60 | #Ödev 2 - ARGE : Pydantic ile başka neler yapılabilir? 61 | #Ödev 3 - ARGE : Faker kütüphanesi ne işe yarar? Detaylı araştırınız. -------------------------------------------------------------------------------- /DesicionTrees/sample1.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from sklearn.tree import DecisionTreeClassifier 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.metrics import accuracy_score 6 | from sklearn import tree 7 | import matplotlib.pyplot as plt 8 | import random 9 | 10 | def generateData(m=1000): 11 | data=[] 12 | for _ in range(1000): 13 | age = random.randint(20,65) 14 | income = round(random.uniform(2.5,15.0),2) 15 | credit_score = random.randint(300,800) 16 | has_default = random.choice([0,1]) 17 | approved = 1 if credit_score>650 and income>5 and not has_default else 0 18 | data.append([age,income,credit_score,has_default,approved]) 19 | return pd.DataFrame(data,columns=["age","income","credit_score","has_default","approved"]) 20 | 21 | df = generateData() 22 | 23 | X = df[["age","income","credit_score","has_default"]] 24 | y = df["approved"] 25 | 26 | X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42) 27 | 28 | model = DecisionTreeClassifier(random_state=42) 29 | model.fit(X_train,y_train) 30 | 31 | y_prediction = model.predict(X_test) 32 | print("Accuracy : ", accuracy_score(y_test,y_prediction)) 33 | 34 | plt.figure(figsize=(12, 6)) 35 | tree.plot_tree(model, feature_names=X.columns, class_names=["Rejected", "Approved"], filled=True) 36 | plt.title("Karar Ağacı Görselleştirmesi") 37 | plt.show() -------------------------------------------------------------------------------- /KMeans/sample1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from sklearn.cluster import KMeans 4 | 5 | #müşteri gelir ve harcama listesi 6 | X = np.array([ 7 | [15,39],[16,50],[25,5],[85,59],[89,60],[75,39],[10,8],[150,29],[130,19],[24,79],[88,62],[85,49],[85,45], 8 | ]) 9 | 10 | kmeans = KMeans(n_clusters=4, random_state=42) 11 | 12 | kmeans.fit(X) 13 | labels = kmeans.labels_ 14 | 15 | plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='rainbow') 16 | plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=200, marker='X', c='black') 17 | plt.xlabel("Gelir") 18 | plt.ylabel("Harcama") 19 | plt.title("K-means ile Müşteri Segmentasyonu") 20 | plt.show() -------------------------------------------------------------------------------- /KNN/knn_model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/engindemirog/machinelearning/c5345e411fe370fa92dab7b941086d4a5c54e46d/KNN/knn_model.pkl -------------------------------------------------------------------------------- /KNN/sample1.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from sklearn.neighbors import KNeighborsClassifier 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.metrics import accuracy_score 6 | 7 | #midterm,final 8 | data = [ 9 | [30,40], 10 | [60,70], 11 | [90,80], 12 | [20,45], 13 | [30,49], 14 | [60,54], 15 | [90,64], 16 | [100,78], 17 | [10,40], 18 | [20,100], 19 | [80,60], 20 | [70,100], 21 | [70,90], 22 | [50,80], 23 | [50,77], 24 | ] 25 | 26 | def calculate(mid,final): 27 | average = mid*0.4+final*0.6 28 | return 1 if average>=50 else 0 29 | 30 | labels = [calculate(x[0],x[1]) for x in data] 31 | 32 | df = pd.DataFrame(data, columns=["mid","final"]) 33 | df["status"] = labels 34 | 35 | X = df[["mid","final"]] 36 | y = df["status"] 37 | 38 | X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42) 39 | 40 | model = KNeighborsClassifier(n_neighbors=3) 41 | model.fit(X_train,y_train) 42 | 43 | y_prediction = model.predict(X_test) 44 | 45 | print(accuracy_score(y_test,y_prediction)) 46 | 47 | student = np.array([[0,70]]) 48 | prediction = model.predict(student) 49 | 50 | print("Geçti" if prediction[0]==1 else "Kaldı") -------------------------------------------------------------------------------- /KNN/sample2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from sklearn.neighbors import KNeighborsClassifier 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.metrics import accuracy_score 6 | 7 | #1 eğitim seviyesi 0=lise , 1 lisans, 2 YL 8 | #2 tecrübe yılı 9 | #3 hired? 10 | data = [ 11 | [0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], 12 | [1, 0, 0], [1, 2, 0], [1, 2, 1], [1, 2, 0], 13 | [1, 4, 1], [1, 5, 1], [2, 0, 0], [2, 1, 1], 14 | [2, 2, 1], [2, 3, 1], [2, 4, 1], [2, 5, 1], 15 | [2, 6, 1], [2, 7, 1], [2, 8, 1], [2, 9, 1] 16 | ] 17 | 18 | df = pd.DataFrame(data, columns=["school","year","hired"]) 19 | 20 | X = df[["school","year"]] #features 21 | y = df["hired"] #target 22 | 23 | X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42) 24 | 25 | k_values = range(1,16) 26 | scores =[] 27 | 28 | for k in k_values: 29 | model = KNeighborsClassifier(n_neighbors=k) 30 | model.fit(X_train,y_train) 31 | y_prediction = model.predict(X_test) 32 | accuracy = accuracy_score(y_test,y_prediction) 33 | scores.append(accuracy) 34 | 35 | print(scores) 36 | -------------------------------------------------------------------------------- /KNN/sample3.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from sklearn.neighbors import KNeighborsClassifier 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.metrics import accuracy_score 6 | import joblib 7 | from flask import Flask,request,jsonify 8 | 9 | #1 eğitim seviyesi 0=lise , 1 lisans, 2 YL 10 | #2 tecrübe yılı 11 | #3 hired? 12 | data = [ 13 | [0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], 14 | [1, 0, 0], [1, 2, 0], [1, 2, 1], [1, 2, 0], 15 | [1, 4, 1], [1, 5, 1], [2, 0, 0], [2, 1, 1], 16 | [2, 2, 1], [2, 3, 1], [2, 4, 1], [2, 5, 1], 17 | [2, 6, 1], [2, 7, 1], [2, 8, 1], [2, 9, 1] 18 | ] 19 | 20 | df = pd.DataFrame(data, columns=["school","year","hired"]) 21 | 22 | X = df[["school","year"]] #features 23 | y = df["hired"] #target 24 | 25 | model = KNeighborsClassifier(n_neighbors=5) 26 | model.fit(X,y) 27 | 28 | joblib.dump(model,"knn_model.pkl") 29 | 30 | 31 | app = Flask(__name__) #json,restful 32 | 33 | 34 | model = joblib.load("knn_model.pkl") 35 | 36 | @app.route("/") 37 | def home(): 38 | return "KNN API hazır 🚀" #http://localhost:5000/ 39 | 40 | @app.route("/prediction", methods=["POST"]) #http://localhost:5000/prediction POST 41 | def predict(): 42 | data = request.get_json() 43 | try: 44 | school = int(data["school"]) 45 | year = int(data["year"]) 46 | 47 | testData = np.array([[school, year]]) 48 | result = model.predict(testData)[0] 49 | 50 | return jsonify({ 51 | "school": school, 52 | "year": year, 53 | "hired": "Alındı" if result == 1 else "Alınmadı" 54 | }) 55 | 56 | except Exception as e: 57 | return jsonify({"hata": str(e)}), 400 58 | 59 | if __name__ == "__main__": 60 | app.run(debug=True) -------------------------------------------------------------------------------- /NaiveBayes/sample1.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.feature_extraction.text import CountVectorizer 3 | from sklearn.naive_bayes import MultinomialNB 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.metrics import accuracy_score 6 | 7 | data = { 8 | 'text': [ 9 | 'Kredi kartı borcunuzu hemen ödeyin', 10 | 'Tebrikler! Kazandınız. Hemen tıklayın!', 11 | 'Yarın toplantıyı unutma', 12 | 'Bedava hediye seni bekliyor', 13 | 'Önemli bir fatura bildirimi var', 14 | 'Bu hafta sonu kahve içelim mi?', 15 | 'Ücretsiz tatil kazandınız!', 16 | 'Bu ay çok çalıştın, tebrikler' 17 | ], 18 | 'label': [1, 1, 0, 1, 0, 0, 1, 0] # 1: spam, 0: normal 19 | } 20 | 21 | df =pd.DataFrame(data) 22 | 23 | vectorizer = CountVectorizer() 24 | X = vectorizer.fit_transform(df["text"]) 25 | 26 | y = df["label"] 27 | 28 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) 29 | 30 | model = MultinomialNB() 31 | model.fit(X_train,y_train) 32 | 33 | y_pred = model.predict(X_test) 34 | 35 | print("Accuracy : ", accuracy_score(y_test,y_pred)) -------------------------------------------------------------------------------- /RandomForest/sample1.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from sklearn.ensemble import RandomForestRegressor 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.metrics import mean_squared_error,r2_score 6 | import matplotlib.pyplot as plt 7 | 8 | np.random.seed(42) 9 | 10 | n_samples = 200000 11 | area = np.random.randint(50,250,n_samples) 12 | rooms = np.random.randint(1,6,n_samples) 13 | age = np.random.randint(0,50,n_samples) 14 | location_score = np.random.uniform(0,10,n_samples) 15 | 16 | noise = np.random.normal(0,20000,n_samples) 17 | price = (area * 3000) + (rooms*50000) - (age*1000) + (location_score*10000) + noise 18 | 19 | df = pd.DataFrame({ 20 | "area":area, 21 | "rooms":rooms, 22 | "age":age, 23 | "location_score":location_score, 24 | "price":price 25 | }) 26 | 27 | X = df[["area","rooms","age","location_score"]] 28 | y = df["price"] 29 | 30 | X_train ,X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42) 31 | 32 | model = RandomForestRegressor(n_estimators=50,random_state=42) 33 | model.fit(X_train,y_train) 34 | y_prediction = model.predict(X_test) 35 | 36 | rmse = np.sqrt(mean_squared_error(y_test,y_prediction)) 37 | print("RMSE - Root Mean Squared Error : ", rmse) 38 | 39 | plt.figure(figsize=(8,6)) 40 | plt.scatter(y_test, y_prediction, alpha=0.5) 41 | plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--') 42 | plt.xlabel('Gerçek Fiyat') 43 | plt.ylabel('Tahmin Edilen Fiyat') 44 | plt.title('Gerçek vs Tahmin Edilen Ev Fiyatları') 45 | plt.grid(True) 46 | plt.show() 47 | 48 | 49 | def plot_svm_decision_boundary(model, X, y): 50 | plt.scatter(X[:, 0], X[:, 1], c=y, cmap='bwr', s=60) 51 | 52 | ax = plt.gca() 53 | xlim = ax.get_xlim() 54 | ylim = ax.get_ylim() 55 | 56 | xx = np.linspace(xlim[0], xlim[1], 30) 57 | yy = np.linspace(ylim[0], ylim[1], 30) 58 | YY, XX = np.meshgrid(yy, xx) 59 | xy = np.vstack([XX.ravel(), YY.ravel()]).T 60 | Z = model.decision_function(xy).reshape(XX.shape) 61 | 62 | 63 | ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.7, 64 | linestyles=['--', '-', '--']) 65 | 66 | ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], 67 | s=100, linewidth=1, facecolors='none', edgecolors='k') 68 | plt.title("SVM Sınıflandırması ve Destek Vektörleri") 69 | plt.show() 70 | 71 | 72 | plot_svm_decision_boundary(model, X, y) -------------------------------------------------------------------------------- /SVM/sample1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from sklearn import datasets 4 | from sklearn.svm import SVC 5 | 6 | X,y = datasets.make_blobs(n_samples=50, centers=2, random_state=5) 7 | 8 | model = SVC(kernel="linear") 9 | model.fit(X,y) 10 | 11 | def plot_svm_decision_boundary(model, X, y): 12 | plt.scatter(X[:, 0], X[:, 1], c=y, cmap='bwr', s=60) 13 | 14 | ax = plt.gca() 15 | xlim = ax.get_xlim() 16 | ylim = ax.get_ylim() 17 | 18 | xx = np.linspace(xlim[0], xlim[1], 30) 19 | yy = np.linspace(ylim[0], ylim[1], 30) 20 | YY, XX = np.meshgrid(yy, xx) 21 | xy = np.vstack([XX.ravel(), YY.ravel()]).T 22 | Z = model.decision_function(xy).reshape(XX.shape) 23 | 24 | 25 | ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.7, 26 | linestyles=['--', '-', '--']) 27 | 28 | ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], 29 | s=100, linewidth=1, facecolors='none', edgecolors='k') 30 | plt.title("SVM Sınıflandırması ve Destek Vektörleri") 31 | plt.show() 32 | 33 | 34 | plot_svm_decision_boundary(model, X, y) 35 | -------------------------------------------------------------------------------- /SVM/sample2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from sklearn.svm import SVC 4 | from sklearn.preprocessing import StandardScaler 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.metrics import accuracy_score, classification_report 7 | 8 | 9 | np.random.seed(42) 10 | 11 | n_samples = 300 12 | incomes = np.random.uniform(2,12,n_samples) 13 | debpts = np.random.uniform(10,90,n_samples) 14 | 15 | labels = [] 16 | 17 | for income,debpt in zip(incomes,debpts): 18 | if income<6 and debpt>70: 19 | labels.append(1)#Riskli 20 | else: 21 | labels.append(0)#Güvenli 22 | 23 | X = np.column_stack((incomes,debpts)) 24 | y = np.array(labels) 25 | 26 | scaler = StandardScaler() 27 | X_scaled = scaler.fit_transform(X) 28 | 29 | X_train,X_test,y_train,y_test = train_test_split(X_scaled,y,test_size=0.2,random_state=42) 30 | 31 | model = SVC(kernel="sigmoid") 32 | model.fit(X_train,y_train) 33 | 34 | accuracy = model.score(X_test,y_test) 35 | print("accuracy : ", accuracy) 36 | 37 | def plot_decision_boundary(model, X, y): 38 | plt.figure(figsize=(10, 6)) 39 | plt.scatter(X[:, 0], X[:, 1], c=y, cmap='bwr', s=60, edgecolors='k', alpha=0.7) 40 | ax = plt.gca() 41 | xlim = ax.get_xlim() 42 | ylim = ax.get_ylim() 43 | 44 | xx = np.linspace(xlim[0], xlim[1], 30) 45 | yy = np.linspace(ylim[0], ylim[1], 30) 46 | YY, XX = np.meshgrid(yy, xx) 47 | xy = np.vstack([XX.ravel(), YY.ravel()]).T 48 | Z = model.decision_function(xy).reshape(XX.shape) 49 | 50 | ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], 51 | linestyles=['--', '-', '--']) 52 | 53 | ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], 54 | s=150, linewidth=1.5, facecolors='none', edgecolors='k') 55 | 56 | plt.title("Faker Verisiyle SVM: Kredi Riski Tahmini") 57 | plt.xlabel("Gelir (standardize)") 58 | plt.ylabel("Borç Oranı (standardize)") 59 | plt.grid(True) 60 | plt.show() 61 | 62 | plot_decision_boundary(model, X_scaled, y) --------------------------------------------------------------------------------