├── Miuul_10_Dönem_Hibrit_Program.pdf ├── README.md ├── certificates ├── DataScientistBootcamp.png ├── MELİSA GÖZET -CRM Analytics- 2022-11-15.pdf ├── Melisa Gözet - 2022-12-29.pdf ├── Melisa Gözet - Measurement Problems- 2022-11-15 .pdf ├── Melisa Gözet - Python Programming for Data Science- 2022-11-15.pdf ├── Melisa Gözet - Recommendation-Systems - 2022-11-21.pdf ├── Melisa Gözet - feature-engineering- 2022-12-01.pdf └── Melisa Gözet -Introduction to Data Science and Artificial Intelligence- 2022-09-16.pdf ├── week_1 (29.09.22-05.10.22) └── python_programming.ipynb ├── week_2 (06.10.22-12.10.22) └── list_comprehension&pandas_exp.ipynb ├── week_3 (13.10.22-19.10.22) ├── Kural_Tabanlı_Sınıflandırma.pdf ├── kural_tabanli_siniflandirma_melisa.ipynb └── numpy_pandas.ipynb ├── week_4 (20.10.22-26.10.22) ├── CRMelisa.pdf ├── FLO_CLTV_Prediction_Proje.ipynb ├── FLO_RFM_Analizi_Melisa.ipynb ├── cltv_basicode_melisa.ipynb ├── cltv_prediction.ipynb └── rfm_melisa.ipynb ├── week_5 (27.10.22-02.11.22) ├── Rating_Product_Sorting_Reviews_Amazon220805073819-221026-191605.pdf ├── amazon_review.csv ├── amazon_review_melisa_gözet.ipynb ├── rating_products_melisa.ipynb ├── sorting_melisa.ipynb └── sorting_reviews_melisa.ipynb ├── week_6 (03.11.22-09.11.22) ├── Ab_Testing_Melisa.pdf ├── ab_testing.xlsx ├── ab_testing_melisa.ipynb └── ab_testing_proje_melisa.ipynb ├── week_7 (10.11.22-16.11.22) ├── association_rule_learning(arl).ipynb ├── item_based_recommender_melisa.ipynb ├── matrix_factorization_melisa.ipynb ├── projeler │ ├── armut_melisa.ipynb │ ├── bonus_arl_recommender_system.ipynb │ └── hibrit_melisa.ipynb ├── recommendation_systems_melisa.pdf └── user_based_recommender_melisa.ipynb ├── week_9 (24.11.22-30.11.22) ├── Soledad Galli_Feature Engineering Cookbook.pdf ├── feature_engineering_encoding.ipynb ├── feature_engineering_feature_extraction.ipynb ├── feature_engineering_missing_values.ipynb ├── feature_engineering_outliers.ipynb └── feature_engineering_titanic.ipynb ├── week__10 (01.12.22-07.12.22) ├── doğrusal_regresyon_uygulama.ipynb ├── machine_learning_case1.ipynb ├── makine_öğrenmesi_doğrusal_regresyon.ipynb ├── makine_öğrenmesi_doğrusal_regresyon_melisa.pdf ├── makine_öğrenmesi_temel_kavramlar .ipynb └── makine_öğrenmesi_temel_kavramlar_melisa.pdf ├── week__11 (08.12.22-14.12.22) ├── makine_öğrenmesi_k_enyakın_komşu+uygulama.ipynb ├── makine_öğrenmesi_lojistik_regresyon_uygulama.ipynb └── makine_öğrenmesi_lojistik_regresyonn.ipynb ├── week__12 (15.12.22-21.12.22) ├── cart_final.png ├── makine_öğrenmesi_cart.ipynb ├── makine_öğrenmesi_cart_uygulama.ipynb └── makine_öğrenmesi_gelişmiş_ağaç_yöntemleri_teori+uygulama.ipynb └── week__13 (22.12.22-28.12.22) ├── makine_öğrenmesi_denetimsiz_öğrenme.ipynb ├── makine_öğrenmesi_denetimsiz_öğrenme_uygulama.ipynb ├── makine_öğrenmesi_pipelineI.ipynb └── makine_öğrenmesi_pipeline_II_III.ipynb /Miuul_10_Dönem_Hibrit_Program.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/Miuul_10_Dönem_Hibrit_Program.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Data Science & Machine Learning Bootcamp Miuul 3 | 4 | A bootcamp program covering the following topics from basic to advanced for anyone who wants to learn data science and machine learning. 5 | ## Course Distribution by Week 6 | 7 | | Python Programming for Data Science (Week 1-2-3) | 8 | | ----------------- | 9 | | Configure a Virtual Environment | 10 | | Data Structures | 11 | | Functions | 12 | | Conditions | 13 | | Loops| 14 | |Comprehension (List-Dict) | 15 | |Data Analysis with Python (Numpy Pandas) | 16 | 17 | **Project** 18 | * Rule-Based Classification 19 | 20 | | CRM Analytics (Week 4) | 21 | | ----------------- | 22 | | CRM Analytics | 23 | | RFM Analytics | 24 | | Customer Lifetime Value | 25 | | Customer Lifetime Value Prediction | 26 | 27 | **Projects** 28 | 29 | * Flo RFM Analysis 30 | * Flo CLTV Prediction 31 | 32 | | Measurement Problems (Week 5-6) | 33 | | ----------------- | 34 | | Rating Products | 35 | | Sorting Products | 36 | | Sorting Reviews | 37 | | AB Testing | 38 | 39 | **Project** 40 | * Rating Product & Sorting Reviews in Amazon 41 | 42 | | Recommendation Systems (Week 7) | 43 | | ----------------- | 44 | | Association Rule Learning | 45 | | Content Based Recommendation | 46 | | Item Based Collaborative Filtering | 47 | | User Based Collaborative Filtering | 48 | | Model Based Matrix Factorization | 49 | 50 | **Projects** 51 | 52 | * Association Rule Based Recommender System 53 | * Hybrid Recommender System 54 | 55 | 56 | ❗️ A break is given in the 8th week. 57 | 58 | | Feature Engineering (Week 9) | 59 | | ----------------- | 60 | | Outliers | 61 | | Missing Values | 62 | | Encoding Scaling | 63 | | Feature Extraction | 64 | 65 | **Projects** 66 | * Telco Customer Churn Feature Engineering 67 | * Titanic Feature Engineering 68 | 69 | 70 | | Machine Learning (Week 10-11-12-13) | 71 | | ----------------- | 72 | | Basic Concepts | 73 | | Lineer Regression | 74 | | Logistic Regression | 75 | | KNN | 76 | | CART | 77 | | Advanced Tree Methods| 78 | | Unsupervised Learning| 79 | | Machine Learning Pipeline| 80 | 81 | **Projects** 82 | * Titanic Machine Learning 83 | * Diabetes Feature Engineering & Machine Learning 84 | * House Prices - Advanced Regression Techniques 85 | * Flo Machine Learning 86 | * Scoutium Classification with Machine Learning 87 | 88 | |SQL (Week 14)| 89 | | ----------------- | 90 | -------------------------------------------------------------------------------- /certificates/DataScientistBootcamp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/certificates/DataScientistBootcamp.png -------------------------------------------------------------------------------- /certificates/MELİSA GÖZET -CRM Analytics- 2022-11-15.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/certificates/MELİSA GÖZET -CRM Analytics- 2022-11-15.pdf -------------------------------------------------------------------------------- /certificates/Melisa Gözet - 2022-12-29.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/certificates/Melisa Gözet - 2022-12-29.pdf -------------------------------------------------------------------------------- /certificates/Melisa Gözet - Measurement Problems- 2022-11-15 .pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/certificates/Melisa Gözet - Measurement Problems- 2022-11-15 .pdf -------------------------------------------------------------------------------- /certificates/Melisa Gözet - Python Programming for Data Science- 2022-11-15.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/certificates/Melisa Gözet - Python Programming for Data Science- 2022-11-15.pdf -------------------------------------------------------------------------------- /certificates/Melisa Gözet - Recommendation-Systems - 2022-11-21.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/certificates/Melisa Gözet - Recommendation-Systems - 2022-11-21.pdf -------------------------------------------------------------------------------- /certificates/Melisa Gözet - feature-engineering- 2022-12-01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/certificates/Melisa Gözet - feature-engineering- 2022-12-01.pdf -------------------------------------------------------------------------------- /certificates/Melisa Gözet -Introduction to Data Science and Artificial Intelligence- 2022-09-16.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/certificates/Melisa Gözet -Introduction to Data Science and Artificial Intelligence- 2022-09-16.pdf -------------------------------------------------------------------------------- /week_3 (13.10.22-19.10.22)/Kural_Tabanlı_Sınıflandırma.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/week_3 (13.10.22-19.10.22)/Kural_Tabanlı_Sınıflandırma.pdf -------------------------------------------------------------------------------- /week_4 (20.10.22-26.10.22)/CRMelisa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/week_4 (20.10.22-26.10.22)/CRMelisa.pdf -------------------------------------------------------------------------------- /week_4 (20.10.22-26.10.22)/FLO_CLTV_Prediction_Proje.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyN1mLnROJT/tB30mbqxl+V7"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# **FLO RFM Analizi**\n","## **İş Problemi**\n","Online ayakkabı mağazası olan FLO müşterilerini segmentlere ayırıp bu segmentlere göre pazarlama stratejileri belirlemek istiyor. Buna yönelik olarak müşterilerin davranışları tanımlanacak ve bu davranışlardaki öbeklenmelere göre gruplar oluşturulacak.\n","\n","## **Veri Seti Hikayesi**\n","Veri seti Flo’dan son alışverişlerini 2020 - 2021 yıllarında OmniChannel (hem online hem offline alışveriş yapan) olarak yapan müşterilerin geçmiş alışveriş davranışlarından elde edilen bilgilerden oluşmaktadır.\n","\n","\n","### **DEGİSKENLER**\n","**master_id :** Eşsiz müşteri numarası\n","\n","**order_channel :** Alışveriş yapılan platforma ait hangi kanalın kullanıldığı (Android, ios, Desktop, Mobile)\n","\n","**last_order_channel:** En son alışverişin yapıldığı kanal\n","\n","**first_order_date :** Müşterinin yaptığı ilk alışveriş tarihi\n","\n","**last_order_date :** Müşterinin yaptığı son alışveriş tarihi\n","\n","**last_order_date_online :** Müşterinin online platformda yaptığı son alışveriş tarihi\n","\n","**last_order_date_offline :** Müşterinin offline platformda yaptığı son alışveriş tarihi\n","\n","**order_num_total_ever_online :** Müşterinin online platformda yaptığı toplam alışveriş sayısı\n","\n","**order_num_total_ever_offline :** Müşterinin offline'da yaptığı toplam alışveriş sayısı\n","\n","**customer_value_total_ever_offline :** Müşterinin offline alışverişlerinde ödediği toplam ücret\n","\n","**customer_value_total_ever_online :** Müşterinin online alışverişlerinde ödediği toplam ücret\n","\n","**interested_in_categories_12 :** Müşterinin son 12 ayda alışveriş yaptığı kategorilerin listesi"],"metadata":{"id":"EYigi3OSh92H"}},{"cell_type":"markdown","source":["#### **Görev 1**: Veriyi anlama ve hazırlama\n","##### **Adım 1:** flo_data_20K.csv verisini okuyunuz. Dataframe'in kopyasını oluşturunuz."],"metadata":{"id":"MWD0nN51iB6G"}},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"f_7rvH7Jhw8j","executionInfo":{"status":"ok","timestamp":1666647651910,"user_tz":-180,"elapsed":2397,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"7d639833-b179-454d-90d8-d825d188fc5d"},"outputs":[{"output_type":"stream","name":"stdout","text":["Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"]}],"source":["from google.colab import drive\n","drive.mount(\"/content/gdrive\")"]},{"cell_type":"code","source":["!pip install lifetimes "],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"XFpq0BGVi2JY","executionInfo":{"status":"ok","timestamp":1666647656309,"user_tz":-180,"elapsed":4412,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"37cc4edd-261d-4931-d245-407b1bec0159"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Requirement already satisfied: lifetimes in /usr/local/lib/python3.7/dist-packages (0.11.3)\n","Requirement already satisfied: numpy>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from lifetimes) (1.21.6)\n","Requirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.7/dist-packages (from lifetimes) (1.3.5)\n","Requirement already satisfied: autograd>=1.2.0 in /usr/local/lib/python3.7/dist-packages (from lifetimes) (1.5)\n","Requirement already satisfied: dill>=0.2.6 in /usr/local/lib/python3.7/dist-packages (from lifetimes) (0.3.5.1)\n","Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from lifetimes) (1.7.3)\n","Requirement already satisfied: future>=0.15.2 in /usr/local/lib/python3.7/dist-packages (from autograd>=1.2.0->lifetimes) (0.16.0)\n","Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24.0->lifetimes) (2022.4)\n","Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24.0->lifetimes) (2.8.2)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=0.24.0->lifetimes) (1.15.0)\n"]}]},{"cell_type":"code","source":["import pandas as pd\n","import datetime as dt\n","from lifetimes import BetaGeoFitter\n","from lifetimes import GammaGammaFitter\n","from sklearn.preprocessing import MinMaxScaler\n","pd.set_option('display.max_columns', None)\n","pd.set_option('display.max_rows', None)\n","pd.set_option('display.float_format', lambda x: '%.2f' % x)\n","pd.options.mode.chained_assignment = None"],"metadata":{"id":"33QqZew4iD3z","executionInfo":{"status":"ok","timestamp":1666647657742,"user_tz":-180,"elapsed":1453,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["df_ = pd.read_csv(\"/content/gdrive/MyDrive/DSMLBC10/week_4 (20.10.22-26.10.22)/projects/flo_data_20k.csv\")\n","df= df_.copy()\n","df.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":531},"id":"5i_HOcPkiaFo","executionInfo":{"status":"ok","timestamp":1666647657744,"user_tz":-180,"elapsed":38,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"82eb5b5d-bd1a-44b2-b898-f79700d72d9c"},"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" master_id order_channel last_order_channel \\\n","0 cc294636-19f0-11eb-8d74-000d3a38a36f Android App Offline \n","1 f431bd5a-ab7b-11e9-a2fc-000d3a38a36f Android App Mobile \n","2 69b69676-1a40-11ea-941b-000d3a38a36f Android App Android App \n","3 1854e56c-491f-11eb-806e-000d3a38a36f Android App Android App \n","4 d6ea1074-f1f5-11e9-9346-000d3a38a36f Desktop Desktop \n","\n"," first_order_date last_order_date last_order_date_online \\\n","0 2020-10-30 2021-02-26 2021-02-21 \n","1 2017-02-08 2021-02-16 2021-02-16 \n","2 2019-11-27 2020-11-27 2020-11-27 \n","3 2021-01-06 2021-01-17 2021-01-17 \n","4 2019-08-03 2021-03-07 2021-03-07 \n","\n"," last_order_date_offline order_num_total_ever_online \\\n","0 2021-02-26 4.00 \n","1 2020-01-10 19.00 \n","2 2019-12-01 3.00 \n","3 2021-01-06 1.00 \n","4 2019-08-03 1.00 \n","\n"," order_num_total_ever_offline customer_value_total_ever_offline \\\n","0 1.00 139.99 \n","1 2.00 159.97 \n","2 2.00 189.97 \n","3 1.00 39.99 \n","4 1.00 49.99 \n","\n"," customer_value_total_ever_online interested_in_categories_12 \n","0 799.38 [KADIN] \n","1 1853.58 [ERKEK, COCUK, KADIN, AKTIFSPOR] \n","2 395.35 [ERKEK, KADIN] \n","3 81.98 [AKTIFCOCUK, COCUK] \n","4 159.99 [AKTIFSPOR] "],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
master_idorder_channellast_order_channelfirst_order_datelast_order_datelast_order_date_onlinelast_order_date_offlineorder_num_total_ever_onlineorder_num_total_ever_offlinecustomer_value_total_ever_offlinecustomer_value_total_ever_onlineinterested_in_categories_12
0cc294636-19f0-11eb-8d74-000d3a38a36fAndroid AppOffline2020-10-302021-02-262021-02-212021-02-264.001.00139.99799.38[KADIN]
1f431bd5a-ab7b-11e9-a2fc-000d3a38a36fAndroid AppMobile2017-02-082021-02-162021-02-162020-01-1019.002.00159.971853.58[ERKEK, COCUK, KADIN, AKTIFSPOR]
269b69676-1a40-11ea-941b-000d3a38a36fAndroid AppAndroid App2019-11-272020-11-272020-11-272019-12-013.002.00189.97395.35[ERKEK, KADIN]
31854e56c-491f-11eb-806e-000d3a38a36fAndroid AppAndroid App2021-01-062021-01-172021-01-172021-01-061.001.0039.9981.98[AKTIFCOCUK, COCUK]
4d6ea1074-f1f5-11e9-9346-000d3a38a36fDesktopDesktop2019-08-032021-03-072021-03-072019-08-031.001.0049.99159.99[AKTIFSPOR]
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":4}]},{"cell_type":"markdown","source":["##### **Adım 2:** Aykırı değerleri baskılamak için gerekli olan outlier_thresholds ve replace_with_thresholds fonksiyonlarını tanımlayınız.\n","**Not:** cltv hesaplanırken frequency değerleri integer olması gerekmektedir.Bu nedenle alt ve üst limitlerini round() ile yuvarlayınız.\n"],"metadata":{"id":"YLDkXigNjGLE"}},{"cell_type":"code","source":["def outlier_thresholds(dataframe, variable):\n"," quartile1 = dataframe[variable].quantile(0.01)\n"," quartile3 = dataframe[variable].quantile(0.99)\n"," interquantile_range = quartile3 - quartile1\n"," up_limit = quartile3 + 1.5 * interquantile_range\n"," low_limit = quartile1 - 1.5 * interquantile_range\n"," return low_limit, up_limit\n","\n","def replace_with_thresholds(dataframe, variable):\n"," low_limit, up_limit = outlier_thresholds(dataframe, variable)\n"," # dataframe.loc[(dataframe[variable] < low_limit), variable] = low_limit\n"," dataframe.loc[(dataframe[variable] > up_limit), variable] = up_limit"],"metadata":{"id":"UNwHSVlZiXkG","executionInfo":{"status":"ok","timestamp":1666647657745,"user_tz":-180,"elapsed":34,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}}},"execution_count":5,"outputs":[]},{"cell_type":"markdown","source":["##### **Adım 3:** \"order_num_total_ever_online\",\"order_num_total_ever_offline\",\"customer_value_total_ever_offline\",\"customer_value_total_ever_online\" değişkenlerinin aykırı değerleri varsa baskılayınız."],"metadata":{"id":"p1L4XwvMj8Bx"}},{"cell_type":"code","source":["df.describe().T"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":175},"id":"-rr1OyCQmewJ","executionInfo":{"status":"ok","timestamp":1666647657746,"user_tz":-180,"elapsed":34,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"9b19b0bf-45aa-439d-8d35-2267da0266a1"},"execution_count":6,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" count mean std min 25% 50% \\\n","order_num_total_ever_online 19945.00 3.11 4.23 1.00 1.00 2.00 \n","order_num_total_ever_offline 19945.00 1.91 2.06 1.00 1.00 1.00 \n","customer_value_total_ever_offline 19945.00 253.92 301.53 10.00 99.99 179.98 \n","customer_value_total_ever_online 19945.00 497.32 832.60 12.99 149.98 286.46 \n","\n"," 75% max \n","order_num_total_ever_online 4.00 200.00 \n","order_num_total_ever_offline 2.00 109.00 \n","customer_value_total_ever_offline 319.97 18119.14 \n","customer_value_total_ever_online 578.44 45220.13 "],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
countmeanstdmin25%50%75%max
order_num_total_ever_online19945.003.114.231.001.002.004.00200.00
order_num_total_ever_offline19945.001.912.061.001.001.002.00109.00
customer_value_total_ever_offline19945.00253.92301.5310.0099.99179.98319.9718119.14
customer_value_total_ever_online19945.00497.32832.6012.99149.98286.46578.4445220.13
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":6}]},{"cell_type":"code","source":["replace_with_thresholds(df, \"order_num_total_ever_online\")\n","replace_with_thresholds(df, \"order_num_total_ever_offline\")\n","replace_with_thresholds(df, \"customer_value_total_ever_offline\")\n","replace_with_thresholds(df, \"customer_value_total_ever_online\")"],"metadata":{"id":"QvzR5zDHjTpx","executionInfo":{"status":"ok","timestamp":1666647657747,"user_tz":-180,"elapsed":32,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}}},"execution_count":7,"outputs":[]},{"cell_type":"code","source":["df.describe().T"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":175},"id":"o8fycieCQT7Y","executionInfo":{"status":"ok","timestamp":1666647658173,"user_tz":-180,"elapsed":457,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"f3788407-9d0f-426f-f001-5e40b8e1fcfb"},"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" count mean std min 25% 50% \\\n","order_num_total_ever_online 19945.00 3.09 3.81 1.00 1.00 2.00 \n","order_num_total_ever_offline 19945.00 1.89 1.43 1.00 1.00 1.00 \n","customer_value_total_ever_offline 19945.00 251.92 251.02 10.00 99.99 179.98 \n","customer_value_total_ever_online 19945.00 489.71 632.61 12.99 149.98 286.46 \n","\n"," 75% max \n","order_num_total_ever_online 4.00 48.50 \n","order_num_total_ever_offline 2.00 16.00 \n","customer_value_total_ever_offline 319.97 3019.88 \n","customer_value_total_ever_online 578.44 7799.54 "],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
countmeanstdmin25%50%75%max
order_num_total_ever_online19945.003.093.811.001.002.004.0048.50
order_num_total_ever_offline19945.001.891.431.001.001.002.0016.00
customer_value_total_ever_offline19945.00251.92251.0210.0099.99179.98319.973019.88
customer_value_total_ever_online19945.00489.71632.6112.99149.98286.46578.447799.54
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":8}]},{"cell_type":"markdown","source":["##### **Adım 4:** Omnichannel müşterilerin hem online'dan hem de offline platformlardan alışveriş yaptığını ifade etmektedir. Her bir müşterinin toplam alışveriş sayısı ve harcaması için yeni değişkenler oluşturunuz."],"metadata":{"id":"HAHunMNBkDxG"}},{"cell_type":"code","source":["df[\"order_num_total\"] = df[\"order_num_total_ever_online\"] + df[\"order_num_total_ever_offline\"]\n","df[\"customer_value_total_price\"] = df[\"customer_value_total_ever_offline\"] + df[\"customer_value_total_ever_online\"]"],"metadata":{"id":"PCw7P0a_kFse","executionInfo":{"status":"ok","timestamp":1666647658175,"user_tz":-180,"elapsed":32,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}}},"execution_count":9,"outputs":[]},{"cell_type":"markdown","source":["##### **Adım 5:** Değişken tiplerini inceleyiniz. Tarih ifade eden değişkenlerin tipini date'e çeviriniz."],"metadata":{"id":"ZZBposHTkF0O"}},{"cell_type":"code","source":["df.dtypes"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cxjfxjH4kHAP","executionInfo":{"status":"ok","timestamp":1666647658177,"user_tz":-180,"elapsed":33,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"680b609c-0935-43df-b63a-93fc752b524f"},"execution_count":10,"outputs":[{"output_type":"execute_result","data":{"text/plain":["master_id object\n","order_channel object\n","last_order_channel object\n","first_order_date object\n","last_order_date object\n","last_order_date_online object\n","last_order_date_offline object\n","order_num_total_ever_online float64\n","order_num_total_ever_offline float64\n","customer_value_total_ever_offline float64\n","customer_value_total_ever_online float64\n","interested_in_categories_12 object\n","order_num_total float64\n","customer_value_total_price float64\n","dtype: object"]},"metadata":{},"execution_count":10}]},{"cell_type":"code","source":["import datetime as dt"],"metadata":{"id":"3it-e-mBm932","executionInfo":{"status":"ok","timestamp":1666647658180,"user_tz":-180,"elapsed":27,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}}},"execution_count":11,"outputs":[]},{"cell_type":"code","source":["for col in df.columns:\n"," if \"date\" in col:\n"," df[col] = df[col].apply(pd.to_datetime)"],"metadata":{"id":"sErSNrelm8U9","executionInfo":{"status":"ok","timestamp":1666647668094,"user_tz":-180,"elapsed":9938,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}}},"execution_count":12,"outputs":[]},{"cell_type":"code","source":["df.dtypes"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"MEtJMsY3nD-o","executionInfo":{"status":"ok","timestamp":1666647668097,"user_tz":-180,"elapsed":61,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"836b6850-b77c-4655-9c97-ea1ee5d0dc95"},"execution_count":13,"outputs":[{"output_type":"execute_result","data":{"text/plain":["master_id object\n","order_channel object\n","last_order_channel object\n","first_order_date datetime64[ns]\n","last_order_date datetime64[ns]\n","last_order_date_online datetime64[ns]\n","last_order_date_offline datetime64[ns]\n","order_num_total_ever_online float64\n","order_num_total_ever_offline float64\n","customer_value_total_ever_offline float64\n","customer_value_total_ever_online float64\n","interested_in_categories_12 object\n","order_num_total float64\n","customer_value_total_price float64\n","dtype: object"]},"metadata":{},"execution_count":13}]},{"cell_type":"markdown","source":["##### **Görev 2:** CLTV VERI YAPISININ OLUSTURULMASI\n","##### **Adım 1:** Veri setindeki en son alışverişin yapıldığı tarihten 2 gün sonrasını analiz tarihi olarak alınız."],"metadata":{"id":"ZIaO615XkHRx"}},{"cell_type":"code","source":["df[\"last_order_date\"].max()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"qgpBwI2_kQSk","executionInfo":{"status":"ok","timestamp":1666647668098,"user_tz":-180,"elapsed":54,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"3822f1c3-2d3e-4845-8fcc-6d826464daff"},"execution_count":14,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Timestamp('2021-05-30 00:00:00')"]},"metadata":{},"execution_count":14}]},{"cell_type":"code","source":["analysis_date = dt.datetime(2021, 6, 1)\n","type(analysis_date)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"MhpaAjjznKeD","executionInfo":{"status":"ok","timestamp":1666647668103,"user_tz":-180,"elapsed":49,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"26a28ead-d2ab-4b29-ffe4-5536db4f289d"},"execution_count":15,"outputs":[{"output_type":"execute_result","data":{"text/plain":["datetime.datetime"]},"metadata":{},"execution_count":15}]},{"cell_type":"markdown","source":["##### **Adım 2:** customer_id, recency_cltv_weekly, T_weekly, frequency ve monetary_cltv_avg değerlerinin yer aldığı yeni bir cltv dataframe'i oluşturunuz. Monetary değeri satın alma başına ortalama değer olarak, recency ve tenure değerleri ise haftalık cinsten ifade edilecek.\n"],"metadata":{"id":"7mmd5H46kXF7"}},{"cell_type":"code","source":["#df[\"total_order\"]=df[\"order_num_total_ever_online\"]+df.order_num_total_ever_offline\n","\n","#df[\"monetary\"]=df.customer_value_total_ever_offline+df.customer_value_total_ever_online\n","#df.loc[:, df.columns.str.contains(\"date\")]=df.loc[:, df.columns.str.contains(\"date\")]\\\n"," #.apply(pd.to_datetime,format='%Y-%m-%d')\n","#df.last_order_date.max()\n","#Timestamp('2021-05-30 00:00:00')\n","#today_date=dt.datetime(2021,6,2)\n","#df[\"T\"]=(today_date-df.first_order_date).dt.days\n","#df[\"recency\"]=(df.last_order_date-df.first_order_date).dt.days"],"metadata":{"id":"p-GfVeiNO12e","executionInfo":{"status":"ok","timestamp":1666647668105,"user_tz":-180,"elapsed":45,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}}},"execution_count":16,"outputs":[]},{"cell_type":"code","source":["cltv_df = pd.DataFrame()\n","cltv_df[\"customer_id\"] = df[\"master_id\"]\n","cltv_df[\"recency_cltv_weekly\"] = ((df.last_order_date-df.first_order_date).dt.days) / 7\n","cltv_df[\"T_weekly\"] = ((analysis_date - df[\"first_order_date\"]).astype('timedelta64[D]'))/7\n","cltv_df[\"frequency\"] = df[\"order_num_total\"]\n","cltv_df[\"monetary_cltv_avg\"] = df[\"customer_value_total_price\"] / df[\"order_num_total\"]\n","cltv_df.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"IugKx7vk3hM4","executionInfo":{"status":"ok","timestamp":1666647668107,"user_tz":-180,"elapsed":46,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"733505ec-5556-4a54-9c4f-4eb74521d1cb"},"execution_count":17,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" customer_id recency_cltv_weekly T_weekly \\\n","0 cc294636-19f0-11eb-8d74-000d3a38a36f 17.00 30.57 \n","1 f431bd5a-ab7b-11e9-a2fc-000d3a38a36f 209.86 224.86 \n","2 69b69676-1a40-11ea-941b-000d3a38a36f 52.29 78.86 \n","3 1854e56c-491f-11eb-806e-000d3a38a36f 1.57 20.86 \n","4 d6ea1074-f1f5-11e9-9346-000d3a38a36f 83.14 95.43 \n","\n"," frequency monetary_cltv_avg \n","0 5.00 187.87 \n","1 21.00 95.88 \n","2 5.00 117.06 \n","3 2.00 60.98 \n","4 2.00 104.99 "],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
customer_idrecency_cltv_weeklyT_weeklyfrequencymonetary_cltv_avg
0cc294636-19f0-11eb-8d74-000d3a38a36f17.0030.575.00187.87
1f431bd5a-ab7b-11e9-a2fc-000d3a38a36f209.86224.8621.0095.88
269b69676-1a40-11ea-941b-000d3a38a36f52.2978.865.00117.06
31854e56c-491f-11eb-806e-000d3a38a36f1.5720.862.0060.98
4d6ea1074-f1f5-11e9-9346-000d3a38a36f83.1495.432.00104.99
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":17}]},{"cell_type":"markdown","source":["##### **Görev 3:** BG/NBD, Gamma-Gamma Modellerinin Kurulması ve CLTV’nin Hesaplanması\n","\n","##### **Adım 1:** BG/NBD modelini fit ediniz.\n","\n","3 ay içerisinde müşterilerden beklenen satın almaları tahmin ediniz ve exp_sales_3_month olarak cltv dataframe'ine ekleyiniz.\n","\n","6 ay içerisinde müşterilerden beklenen satın almaları tahmin ediniz ve exp_sales_6_month olarak cltv dataframe'ine ekleyiniz.\n"],"metadata":{"id":"l0mOaz9ZkgmB"}},{"cell_type":"code","source":["bgf = BetaGeoFitter(penalizer_coef=0.001)\n","bgf.fit(cltv_df[\"frequency\"],\n"," cltv_df[\"recency_cltv_weekly\"],\n"," cltv_df[\"T_weekly\"])"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"3X7_cerNkzrx","executionInfo":{"status":"ok","timestamp":1666647670662,"user_tz":-180,"elapsed":2599,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"e2b4d57c-8063-47ad-c59a-d72699653968"},"execution_count":18,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{},"execution_count":18}]},{"cell_type":"code","source":["cltv_df[\"exp_sales_3_month\"] = bgf.predict(4 * 3,\n"," cltv_df[\"frequency\"],\n"," cltv_df[\"recency_cltv_weekly\"],\n"," cltv_df[\"T_weekly\"])"],"metadata":{"id":"YlMl4YvxnxBc","executionInfo":{"status":"ok","timestamp":1666647670673,"user_tz":-180,"elapsed":44,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}}},"execution_count":19,"outputs":[]},{"cell_type":"code","source":["cltv_df[\"exp_sales_6_month\"] = bgf.predict(4 * 6,\n"," cltv_df[\"frequency\"],\n"," cltv_df[\"recency_cltv_weekly\"],\n"," cltv_df[\"T_weekly\"])\n","\n","cltv_df.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"8tpWFauJnxGv","executionInfo":{"status":"ok","timestamp":1666647670674,"user_tz":-180,"elapsed":42,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"de5e7a42-aeff-4730-91d3-3effaa39ec65"},"execution_count":20,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" customer_id recency_cltv_weekly T_weekly \\\n","0 cc294636-19f0-11eb-8d74-000d3a38a36f 17.00 30.57 \n","1 f431bd5a-ab7b-11e9-a2fc-000d3a38a36f 209.86 224.86 \n","2 69b69676-1a40-11ea-941b-000d3a38a36f 52.29 78.86 \n","3 1854e56c-491f-11eb-806e-000d3a38a36f 1.57 20.86 \n","4 d6ea1074-f1f5-11e9-9346-000d3a38a36f 83.14 95.43 \n","\n"," frequency monetary_cltv_avg exp_sales_3_month exp_sales_6_month \n","0 5.00 187.87 0.97 1.95 \n","1 21.00 95.88 0.98 1.97 \n","2 5.00 117.06 0.67 1.34 \n","3 2.00 60.98 0.70 1.40 \n","4 2.00 104.99 0.40 0.79 "],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
customer_idrecency_cltv_weeklyT_weeklyfrequencymonetary_cltv_avgexp_sales_3_monthexp_sales_6_month
0cc294636-19f0-11eb-8d74-000d3a38a36f17.0030.575.00187.870.971.95
1f431bd5a-ab7b-11e9-a2fc-000d3a38a36f209.86224.8621.0095.880.981.97
269b69676-1a40-11ea-941b-000d3a38a36f52.2978.865.00117.060.671.34
31854e56c-491f-11eb-806e-000d3a38a36f1.5720.862.0060.980.701.40
4d6ea1074-f1f5-11e9-9346-000d3a38a36f83.1495.432.00104.990.400.79
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":20}]},{"cell_type":"code","source":["cltv_df[\"frequency\"].astype(int).head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"b-fDRhgtSPqs","executionInfo":{"status":"ok","timestamp":1666647670676,"user_tz":-180,"elapsed":39,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"ff72e7c4-aa5c-492f-e165-3012fd010fa7"},"execution_count":21,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 5\n","1 21\n","2 5\n","3 2\n","4 2\n","Name: frequency, dtype: int64"]},"metadata":{},"execution_count":21}]},{"cell_type":"markdown","source":["##### **Adım 2:** Gamma-Gamma modelini fit ediniz. Müşterilerin ortalama bırakacakları değeri tahminleyip exp_average_value olarak cltv dataframe'ine ekleyiniz.\n","\n"],"metadata":{"id":"jEJctrpxlBzL"}},{"cell_type":"code","source":["ggf = GammaGammaFitter(penalizer_coef=0.01)\n","\n","ggf.fit(cltv_df[\"frequency\"].astype(int), cltv_df[\"monetary_cltv_avg\"])"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"M0rvOaxRlNRY","executionInfo":{"status":"ok","timestamp":1666647671219,"user_tz":-180,"elapsed":574,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"58e51467-4293-451d-8983-98972e8c6f1e"},"execution_count":22,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{},"execution_count":22}]},{"cell_type":"code","source":["cltv_df[\"exp_average_value\"] = ggf.conditional_expected_average_profit(cltv_df[\"frequency\"],\n"," cltv_df[\"monetary_cltv_avg\"])"],"metadata":{"id":"oB-m6pI-q9NO","executionInfo":{"status":"ok","timestamp":1666647671220,"user_tz":-180,"elapsed":19,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}}},"execution_count":23,"outputs":[]},{"cell_type":"code","source":["cltv_df.sort_values(\"exp_average_value\", ascending=False).head()"],"metadata":{"id":"RGbAfGvJrV7D","executionInfo":{"status":"ok","timestamp":1666647671221,"user_tz":-180,"elapsed":18,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"colab":{"base_uri":"https://localhost:8080/","height":206},"outputId":"1e1ea820-7f4a-4d0f-fc5d-9368d353b50e"},"execution_count":24,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" customer_id recency_cltv_weekly T_weekly \\\n","9055 47a642fe-975b-11eb-8c2a-000d3a38a36f 2.86 7.86 \n","17323 f59053e2-a503-11e9-a2fc-000d3a38a36f 51.71 101.00 \n","15516 9083981a-f59e-11e9-841e-000d3a38a36f 63.57 83.86 \n","6402 851de3b4-8f0c-11eb-8cb8-000d3a38a36f 8.29 9.43 \n","16410 6fecd6c8-261a-11ea-8e1c-000d3a38a36f 57.00 94.86 \n","\n"," frequency monetary_cltv_avg exp_sales_3_month exp_sales_6_month \\\n","9055 4.00 1401.77 1.09 2.19 \n","17323 7.00 1106.47 0.72 1.44 \n","15516 4.00 1090.36 0.57 1.15 \n","6402 2.00 862.69 0.79 1.59 \n","16410 2.00 859.58 0.40 0.79 \n","\n"," exp_average_value \n","9055 1449.03 \n","17323 1127.61 \n","15516 1127.35 \n","6402 923.68 \n","16410 920.36 "],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
customer_idrecency_cltv_weeklyT_weeklyfrequencymonetary_cltv_avgexp_sales_3_monthexp_sales_6_monthexp_average_value
905547a642fe-975b-11eb-8c2a-000d3a38a36f2.867.864.001401.771.092.191449.03
17323f59053e2-a503-11e9-a2fc-000d3a38a36f51.71101.007.001106.470.721.441127.61
155169083981a-f59e-11e9-841e-000d3a38a36f63.5783.864.001090.360.571.151127.35
6402851de3b4-8f0c-11eb-8cb8-000d3a38a36f8.299.432.00862.690.791.59923.68
164106fecd6c8-261a-11ea-8e1c-000d3a38a36f57.0094.862.00859.580.400.79920.36
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":24}]},{"cell_type":"markdown","source":["##### **Adım 3:** 6 aylık CLTV hesaplayınız ve cltv ismiyle dataframe'e ekleyiniz.\n","\n"," Cltv değeri en yüksek 20 kişiyi gözlemleyiniz."],"metadata":{"id":"7-qpZJCClNcX"}},{"cell_type":"code","source":["cltv = ggf.customer_lifetime_value(bgf,\n"," cltv_df[\"frequency\"],\n"," cltv_df[\"recency_cltv_weekly\"],\n"," cltv_df[\"T_weekly\"],\n"," cltv_df[\"monetary_cltv_avg\"],\n"," time=6, # 6 aylık / aylık olmasına dikkat et\n"," freq=\"W\", # T'nin frekans bilgisi.\n"," discount_rate=0.01)\n","cltv_df[\"cltv\"] = cltv"],"metadata":{"id":"Xn2SVMtqlUiR","executionInfo":{"status":"ok","timestamp":1666647672056,"user_tz":-180,"elapsed":850,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}}},"execution_count":25,"outputs":[]},{"cell_type":"code","source":["cltv_df[\"cltv\"].sort_values(ascending = False).head(20)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-gg5C6lO1u1Q","executionInfo":{"status":"ok","timestamp":1666647672058,"user_tz":-180,"elapsed":15,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"fff2852a-3351-444e-a63d-2a43164e04fa"},"execution_count":26,"outputs":[{"output_type":"execute_result","data":{"text/plain":["9055 3327.71\n","13880 3172.22\n","17323 1708.98\n","12438 1662.52\n","7330 1627.74\n","8868 1623.81\n","6402 1538.86\n","6666 1529.23\n","19538 1485.82\n","14858 1423.00\n","17963 1362.61\n","15516 1359.44\n","6717 1355.44\n","4157 1353.53\n","4735 1334.83\n","11694 1297.52\n","11179 1286.14\n","1853 1285.23\n","5775 1282.58\n","7312 1263.19\n","Name: cltv, dtype: float64"]},"metadata":{},"execution_count":26}]},{"cell_type":"markdown","source":["##### **Görev 4:** CLTV Değerine Göre Segmentlerin Oluşturulması\n","\n","##### **Adım 1:** 6 aylık CLTV'ye göre tüm müşterilerinizi 4 gruba (segmente) ayırınız ve grup isimlerini veri setine ekleyiniz.\n"],"metadata":{"id":"1FzPSLPGlgMN"}},{"cell_type":"code","source":["cltv_df"],"metadata":{"id":"-hcN--wlUxwn"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["cltv_df[\"segment\"] = pd.qcut(cltv_df[\"cltv\"], 4, labels=[\"D\", \"C\", \"B\", \"A\"])\n","cltv_df.head()"],"metadata":{"id":"rK3jmX-ilpRT"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["##### **Adım 2:** 4 grup içerisinden seçeceğiniz 2 grup için yönetime kısa kısa 6 aylık aksiyon önerilerinde bulununuz."],"metadata":{"id":"ceBnGBDalpgm"}},{"cell_type":"code","source":["cltv_df.groupby(\"segment\").agg({\"exp_sales_6_month\": [\"count\", \"mean\", \"sum\",\"median\"]})"],"metadata":{"id":"YawUIHgulruy"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["A ve D segmenti özelinde konuşmak gerekirse, A segmenti, kısıtlı imkanlarım varsa ilgilenmem gereken bir gruptur firma için değerli müşterilerdir. D segmenti imkanlarım doğrultusunda daha çok ilgilenilmesi kampanya yapılması gereken gruptur."],"metadata":{"id":"2E1fSecs48ms"}}]} -------------------------------------------------------------------------------- /week_5 (27.10.22-02.11.22)/Rating_Product_Sorting_Reviews_Amazon220805073819-221026-191605.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/week_5 (27.10.22-02.11.22)/Rating_Product_Sorting_Reviews_Amazon220805073819-221026-191605.pdf -------------------------------------------------------------------------------- /week_5 (27.10.22-02.11.22)/rating_products_melisa.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyPzD8tqBYsAFao1ioOfkN2B"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["## Rating Products\n","\n"," - Average\n","\n"," - Time-Based Weighted Average\n","\n"," - User-Based Weighted Average\n","\n"," - Weighted Rating\n","\n"," * Uygulama: Kullanıcı ve Zaman Ağırlıklı Kurs Puanı Hesaplama"],"metadata":{"id":"Y1BcjJSOoq3u"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"E4-01tHmom_r"},"outputs":[],"source":["import pandas as pd\n","import math\n","import scipy.stats as st\n","from sklearn.preprocessing import MinMaxScaler\n","\n","pd.set_option('display.max_columns', None)\n","pd.set_option('display.max_rows', None)\n","pd.set_option('display.width', 500)\n","pd.set_option('display.expand_frame_repr', False)\n","pd.set_option('display.float_format', lambda x: '%.5f' % x)"]},{"cell_type":"code","source":["from google.colab import drive\n","drive.mount(\"/content/gdrive\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"d8mdYmqYqm2U","executionInfo":{"status":"ok","timestamp":1666862530974,"user_tz":-180,"elapsed":2227,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"c901e5af-d588-46b9-d238-d5a8a5ac8181"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"]}]},{"cell_type":"markdown","source":["* (50+ Saat) Python A-Z™: Veri Bilimi ve Machine Learning\n","\n","* Puan: 4.8 (4.764925)\n","\n","* Toplam Puan: 4611\n","\n","* Puan Yüzdeleri: 75, 20, 4, 1, <1\n","\n","* Yaklaşık Sayısal Karşılıkları: 3458, 922, 184, 46, 6"],"metadata":{"id":"YHhH2tSUrfbI"}},{"cell_type":"code","source":["df = pd.read_csv(\"/content/gdrive/MyDrive/DSMLBC10/week_5 (27.10.22-02.11.22)/datasets/course_reviews.csv\")\n","df.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"-iqaI_U8pLBg","executionInfo":{"status":"ok","timestamp":1666862530978,"user_tz":-180,"elapsed":55,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"687eec22-1676-4011-b411-dcaeb8dd6243"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" Rating Timestamp Enrolled Progress Questions Asked Questions Answered\n","0 5.00000 2021-02-05 07:45:55 2021-01-25 15:12:08 5.00000 0.00000 0.00000\n","1 5.00000 2021-02-04 21:05:32 2021-02-04 20:43:40 1.00000 0.00000 0.00000\n","2 4.50000 2021-02-04 20:34:03 2019-07-04 23:23:27 1.00000 0.00000 0.00000\n","3 5.00000 2021-02-04 16:56:28 2021-02-04 14:41:29 10.00000 0.00000 0.00000\n","4 4.00000 2021-02-04 15:00:24 2020-10-13 03:10:07 10.00000 0.00000 0.00000"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
RatingTimestampEnrolledProgressQuestions AskedQuestions Answered
05.000002021-02-05 07:45:552021-01-25 15:12:085.000000.000000.00000
15.000002021-02-04 21:05:322021-02-04 20:43:401.000000.000000.00000
24.500002021-02-04 20:34:032019-07-04 23:23:271.000000.000000.00000
35.000002021-02-04 16:56:282021-02-04 14:41:2910.000000.000000.00000
44.000002021-02-04 15:00:242020-10-13 03:10:0710.000000.000000.00000
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":3}]},{"cell_type":"code","source":["df.shape"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"TOHZ5FKmrDNi","executionInfo":{"status":"ok","timestamp":1666862530982,"user_tz":-180,"elapsed":57,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"9d200171-c78a-46ab-ce01-776f710427da"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(4323, 6)"]},"metadata":{},"execution_count":4}]},{"cell_type":"code","source":["# rating dagılımı\n","df[\"Rating\"].value_counts()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"lCwLAE_9rOok","executionInfo":{"status":"ok","timestamp":1666862530983,"user_tz":-180,"elapsed":55,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"0e99de44-80d1-4ba2-fffc-95751cd28609"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["5.00000 3267\n","4.50000 475\n","4.00000 383\n","3.50000 96\n","3.00000 62\n","1.00000 15\n","2.00000 12\n","2.50000 11\n","1.50000 2\n","Name: Rating, dtype: int64"]},"metadata":{},"execution_count":5}]},{"cell_type":"code","source":["df[\"Questions Asked\"].value_counts()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"n49aKXy8rr26","executionInfo":{"status":"ok","timestamp":1666862530986,"user_tz":-180,"elapsed":56,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"009659d7-81d8-4226-f653-c36fd388968c"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.00000 3867\n","1.00000 276\n","2.00000 80\n","3.00000 43\n","4.00000 15\n","5.00000 13\n","6.00000 9\n","8.00000 5\n","9.00000 3\n","14.00000 2\n","11.00000 2\n","7.00000 2\n","10.00000 2\n","15.00000 2\n","22.00000 1\n","12.00000 1\n","Name: Questions Asked, dtype: int64"]},"metadata":{},"execution_count":6}]},{"cell_type":"code","source":["df.groupby(\"Questions Asked\").agg({\"Questions Asked\": \"count\",\n"," \"Rating\": \"mean\"})"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":583},"id":"lZqnoDKVruWQ","executionInfo":{"status":"ok","timestamp":1666862530988,"user_tz":-180,"elapsed":55,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"233e3ec2-a26a-4d85-b1ca-81f59a39a578"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" Questions Asked Rating\n","Questions Asked \n","0.00000 3867 4.76519\n","1.00000 276 4.74094\n","2.00000 80 4.80625\n","3.00000 43 4.74419\n","4.00000 15 4.83333\n","5.00000 13 4.65385\n","6.00000 9 5.00000\n","7.00000 2 4.75000\n","8.00000 5 4.90000\n","9.00000 3 5.00000\n","10.00000 2 5.00000\n","11.00000 2 5.00000\n","12.00000 1 5.00000\n","14.00000 2 4.50000\n","15.00000 2 3.00000\n","22.00000 1 5.00000"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Questions AskedRating
Questions Asked
0.0000038674.76519
1.000002764.74094
2.00000804.80625
3.00000434.74419
4.00000154.83333
5.00000134.65385
6.0000095.00000
7.0000024.75000
8.0000054.90000
9.0000035.00000
10.0000025.00000
11.0000025.00000
12.0000015.00000
14.0000024.50000
15.0000023.00000
22.0000015.00000
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":7}]},{"cell_type":"markdown","source":["### Average"],"metadata":{"id":"b_u7fz7Er3nY"}},{"cell_type":"code","source":["# Ortalama Puan\n","df[\"Rating\"].mean()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"HoVec716rxk1","executionInfo":{"status":"ok","timestamp":1666862530995,"user_tz":-180,"elapsed":58,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"ddebf0c3-3dd4-4164-8496-f35c991b70a2"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4.764284061993986"]},"metadata":{},"execution_count":8}]},{"cell_type":"markdown","source":["### Time-Based Weighted Average\n","\n","Sadece ortalamaya bakıldığında, gözden kaçabilecek bazı durumlar olabilir. Örneğin, son zamanlardaki trend nedir ? Ürünün kargo, paketleme hizmetlerinin, iade hizmetlerinin nasıl olduğu bilgisine ihtiyaç olabilir."],"metadata":{"id":"Xk-eTZ61r_g4"}},{"cell_type":"code","source":["df.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"GR_fikWAv1on","executionInfo":{"status":"ok","timestamp":1666862530997,"user_tz":-180,"elapsed":57,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"67d7b4e2-8b50-40b4-9652-2507ce601732"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","RangeIndex: 4323 entries, 0 to 4322\n","Data columns (total 6 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 Rating 4323 non-null float64\n"," 1 Timestamp 4323 non-null object \n"," 2 Enrolled 4323 non-null object \n"," 3 Progress 4323 non-null float64\n"," 4 Questions Asked 4323 non-null float64\n"," 5 Questions Answered 4323 non-null float64\n","dtypes: float64(4), object(2)\n","memory usage: 202.8+ KB\n"]}]},{"cell_type":"code","source":["# Puan Zamanlarına Göre Ağırlıklı Ortalama\n","df[\"Timestamp\"] = pd.to_datetime(df[\"Timestamp\"])"],"metadata":{"id":"XPIEd38vr7q6"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["current_date = pd.to_datetime('2021-02-10 0:0:0')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"H4PMhtBKsGhl","executionInfo":{"status":"ok","timestamp":1666862530999,"user_tz":-180,"elapsed":56,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"19465f66-7fda-4da7-c3fe-5f8b359c7f6a"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4.775773195876289"]},"metadata":{},"execution_count":11}]},{"cell_type":"code","source":["current_date"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7S0EZA4kwD_N","executionInfo":{"status":"ok","timestamp":1666862576413,"user_tz":-180,"elapsed":327,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"08fb0516-5215-483f-d0ca-cec9d7781ed1"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Timestamp('2021-02-10 00:00:00')"]},"metadata":{},"execution_count":23}]},{"cell_type":"code","source":["df[\"days\"] = (current_date - df[\"Timestamp\"]).dt.days\n","\n","df.loc[df[\"days\"] <= 30, \"Rating\"].mean()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"5DZyjWKUwGM0","executionInfo":{"status":"ok","timestamp":1666863060311,"user_tz":-180,"elapsed":315,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"f0aa8684-c91e-4c74-b1ae-254eb7b0b129"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4.775773195876289"]},"metadata":{},"execution_count":25}]},{"cell_type":"code","source":["df.loc[(df[\"days\"] > 30) & (df[\"days\"] <= 90), \"Rating\"].mean()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"qysuF0KcsMFF","executionInfo":{"status":"ok","timestamp":1666862530999,"user_tz":-180,"elapsed":54,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"9e386d0a-52f1-46a7-bbea-97067e9a2055"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4.763833992094861"]},"metadata":{},"execution_count":12}]},{"cell_type":"code","source":["df.loc[(df[\"days\"] > 90) & (df[\"days\"] <= 180), \"Rating\"].mean()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nfp-FQxSsOUx","executionInfo":{"status":"ok","timestamp":1666862531003,"user_tz":-180,"elapsed":56,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"068611e4-a1c3-4fe6-eb70-02d7e4ceb6df"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4.752503576537912"]},"metadata":{},"execution_count":13}]},{"cell_type":"code","source":["df.loc[(df[\"days\"] > 180), \"Rating\"].mean()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"T08Y7VVlsQYz","executionInfo":{"status":"ok","timestamp":1666862531556,"user_tz":-180,"elapsed":606,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"23eaa02b-6a3d-4e30-b6ee-d90f5c1281fb"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4.76641586867305"]},"metadata":{},"execution_count":14}]},{"cell_type":"code","source":["df.loc[df[\"days\"] <= 30, \"Rating\"].mean() * 28/100 + \\\n"," df.loc[(df[\"days\"] > 30) & (df[\"days\"] <= 90), \"Rating\"].mean() * 26/100 + \\\n"," df.loc[(df[\"days\"] > 90) & (df[\"days\"] <= 180), \"Rating\"].mean() * 24/100 + \\\n"," df.loc[(df[\"days\"] > 180), \"Rating\"].mean() * 22/100\n","\n","# çarpılan değerler istediğim zaman aralığının ağırlıklı değerlerine karşılık"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"T_bMJGWFsR_Z","executionInfo":{"status":"ok","timestamp":1666862531560,"user_tz":-180,"elapsed":38,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"40641f24-5657-4fa4-d409-077183565b29"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4.765025682267194"]},"metadata":{},"execution_count":15}]},{"cell_type":"code","source":["def time_based_weighted_average(dataframe, w1=28, w2=26, w3=24, w4=22):\n"," return dataframe.loc[df[\"days\"] <= 30, \"Rating\"].mean() * w1 / 100 + \\\n"," dataframe.loc[(dataframe[\"days\"] > 30) & (dataframe[\"days\"] <= 90), \"Rating\"].mean() * w2 / 100 + \\\n"," dataframe.loc[(dataframe[\"days\"] > 90) & (dataframe[\"days\"] <= 180), \"Rating\"].mean() * w3 / 100 + \\\n"," dataframe.loc[(dataframe[\"days\"] > 180), \"Rating\"].mean() * w4 / 100\n","\n","time_based_weighted_average(df)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"MMXgpEwqsU4s","executionInfo":{"status":"ok","timestamp":1666862531561,"user_tz":-180,"elapsed":35,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"ea6ee86d-6311-4f99-839b-cdfe121bd4d8"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4.765025682267194"]},"metadata":{},"execution_count":16}]},{"cell_type":"code","source":["time_based_weighted_average(df, 30, 26, 22, 22) #sırasıyla ağırlıkların değiştirilmesi işlemi yapıldı"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"w28JWLhgsXwS","executionInfo":{"status":"ok","timestamp":1666862531562,"user_tz":-180,"elapsed":34,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"6d319b34-0412-42ab-d927-46efcebd06e1"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4.765491074653962"]},"metadata":{},"execution_count":17}]},{"cell_type":"markdown","source":["### User-Based Weighted Average\n","\n","kursu hiç izlemeyen bir kişinin vereceği puanla %75 ini izleyen kişinin vereceği puanın ağırlıkları aynı mı olmalı ? "],"metadata":{"id":"Ked33eB3sbn6"}},{"cell_type":"code","source":["df.groupby(\"Progress\").agg({\"Rating\": \"mean\"}).head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":238},"id":"Ez1wDJnBsZek","executionInfo":{"status":"ok","timestamp":1666862531562,"user_tz":-180,"elapsed":32,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"bfb75217-bfbd-4060-fb0d-abd0a34fa386"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" Rating\n","Progress \n","0.00000 4.67391\n","1.00000 4.64269\n","2.00000 4.65476\n","3.00000 4.66355\n","4.00000 4.77733"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Rating
Progress
0.000004.67391
1.000004.64269
2.000004.65476
3.000004.66355
4.000004.77733
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":18}]},{"cell_type":"code","source":["df.loc[df[\"Progress\"] <= 10, \"Rating\"].mean() * 22 / 100 + \\\n"," df.loc[(df[\"Progress\"] > 10) & (df[\"Progress\"] <= 45), \"Rating\"].mean() * 24 / 100 + \\\n"," df.loc[(df[\"Progress\"] > 45) & (df[\"Progress\"] <= 75), \"Rating\"].mean() * 26 / 100 + \\\n"," df.loc[(df[\"Progress\"] > 75), \"Rating\"].mean() * 28 / 100"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Q07nvgzrseAk","executionInfo":{"status":"ok","timestamp":1666862531563,"user_tz":-180,"elapsed":31,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"3115b55d-a3a1-4fb0-bd52-d17d55cc7a37"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4.800257704672543"]},"metadata":{},"execution_count":19}]},{"cell_type":"code","source":["def user_based_weighted_average(dataframe, w1=22, w2=24, w3=26, w4=28):\n"," return dataframe.loc[dataframe[\"Progress\"] <= 10, \"Rating\"].mean() * w1 / 100 + \\\n"," dataframe.loc[(dataframe[\"Progress\"] > 10) & (dataframe[\"Progress\"] <= 45), \"Rating\"].mean() * w2 / 100 + \\\n"," dataframe.loc[(dataframe[\"Progress\"] > 45) & (dataframe[\"Progress\"] <= 75), \"Rating\"].mean() * w3 / 100 + \\\n"," dataframe.loc[(dataframe[\"Progress\"] > 75), \"Rating\"].mean() * w4 / 100\n","\n","\n","user_based_weighted_average(df, 20, 24, 26, 30)\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"gyQOnM2vsiJM","executionInfo":{"status":"ok","timestamp":1666862531563,"user_tz":-180,"elapsed":25,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"7e403325-a62d-4380-f62f-3929eb61456d"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4.803286469062915"]},"metadata":{},"execution_count":20}]},{"cell_type":"markdown","source":["### Weighted Rating"],"metadata":{"id":"Cmv-wT6TslkI"}},{"cell_type":"code","source":["def course_weighted_rating(dataframe, time_w=50, user_w=50):\n"," return time_based_weighted_average(dataframe) * time_w/100 + user_based_weighted_average(dataframe)*user_w/100 \n","\n","course_weighted_rating(df)\n","\n","#(%50 ye %50 ağırlık kalite metriği belirleyip ona göre hassaslaştırıldı.)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"KJ0Mg6fKsjll","executionInfo":{"status":"ok","timestamp":1666862531564,"user_tz":-180,"elapsed":23,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"b8d5ced0-863b-4b97-d6b9-a5e32920b3e4"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4.782641693469868"]},"metadata":{},"execution_count":21}]},{"cell_type":"code","source":["course_weighted_rating(df, time_w=40, user_w=60)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"5GVOYQnMspIp","executionInfo":{"status":"ok","timestamp":1666862531564,"user_tz":-180,"elapsed":21,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"a0ffa677-650c-4c0d-bb3f-f1efd40f09f8"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4.786164895710403"]},"metadata":{},"execution_count":22}]}]} -------------------------------------------------------------------------------- /week_5 (27.10.22-02.11.22)/sorting_reviews_melisa.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyOKYjFC3o64Tsm/V4EZBGna"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":1,"metadata":{"id":"5oAcP-sY-4R9","executionInfo":{"status":"ok","timestamp":1667500113652,"user_tz":-180,"elapsed":531,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}}},"outputs":[],"source":["import pandas as pd\n","import math\n","import scipy.stats as st\n","\n","pd.set_option('display.max_columns', None)\n","pd.set_option('display.expand_frame_repr', False)\n","pd.set_option('display.float_format', lambda x: '%.5f' % x)"]},{"cell_type":"code","source":["# Up-Down Diff Score = (up ratings) − (down ratings)\n","\n","# Review 1: 600 up 400 down total 1000\n","# Review 2: 5500 up 4500 down total 10000\n","\n","def score_up_down_diff(up, down):\n"," return up - down\n","\n","# Review 1 Score:\n","print(score_up_down_diff(600, 400))\n","\n","# Review 2 Score\n","print(score_up_down_diff(5500, 4500))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"XXG-E1_o--vn","executionInfo":{"status":"ok","timestamp":1667500113653,"user_tz":-180,"elapsed":14,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"100c5b7e-2666-4465-ad37-1de59ec021e9"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["200\n","1000\n"]}]},{"cell_type":"code","source":["# Score = Average rating = (up ratings) / (all ratings)\n","def score_average_rating(up, down):\n"," if up + down == 0:\n"," return 0\n"," return up / (up + down) \n"," \n","#payda 0 olamaz hata vereceğinden dolayı eğer sıfırsa return 0 döndür değilse bölme işlemini yap.\n","\n","print(score_average_rating(600, 400))\n","print(score_average_rating(5500, 4500))\n","\n","# Review 1: 2 up 0 down total 2\n","# Review 2: 100 up 1 down total 101\n","\n","print(score_average_rating(2, 0)) \n","score_average_rating(100, 1)\n","# burada da frekans bilgisi kaçtı. Eş zamanlı olarak hepsinin doğru bir sıralama score çıkması için aşağıdaki yöntem kullanılmıştır."],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"anUHFtCT_Q1o","executionInfo":{"status":"ok","timestamp":1667500114384,"user_tz":-180,"elapsed":742,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"0e3aaf3a-cbfa-4d14-a20b-158f284f88f7"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["0.6\n","0.55\n","1.0\n"]},{"output_type":"execute_result","data":{"text/plain":["0.9900990099009901"]},"metadata":{},"execution_count":3}]},{"cell_type":"markdown","source":["## **Wilson Lower Bound Score**\n","\n","İkili etkileşim sonucu ortaya çıkan bütün ölçme problemlerinde kullanılabilir. Mesela youtube like dislike karşılaştırması gibi.\n","Bernoulli parametresi (ikili olayların gerçekleşmesi olasılığı için kullanılır. Yazı Tura gibi.) p (bir olayın gözlenmesi-gerçekleşmesi durumu) için hesaplanacak güven aralığının alt sınırı WLB skoru olarak kabul edilir.\n","\n","Neden bunu kullanıyoruz ? Bizim elimizde bütün müşteriler/veri yok. Sadece örneklem var. Var olanların içinden bir up oranım var. Öyle bir genelleme yapmak istiyorum ki bütün kitleye yansıtabileyim. Bir olasılık problemi olarak düşünürsek, ilgilenilen olay üzerinden bir güven aralığı hesapladığımızda, istatistiksel olarak 100 kullanıcıdan 95 i(güven) etkileşim sağladığında %5 yanılma payım olmakla birlikte bu yorumun up oranı 0.5 ile 0.7 arasında olacaktır.\n","\n"," 600-400\n","\n","up = 0.6 bu bir örneklem değeri, kaygım bütün kitleye genellemek\n","\n"," 0.5 - 0.7\n","\n"," Bir skor belirlemem gerektiği için **0.5** i alt sınır skor olarak kabul ediyorum.\n","\n"," Bütün gözlem birimleri için aynı işlemi yaptığımda garanti bir alt skorum var. Bunu referans alarak bunları sıralayabilirim. \n","\n"," Bu bir rate hesabı olarak kullanılır ve buna dayalı olarak bir skorlama yapılır. \n","\n"," \n","**Not:**\n"," Eğer skorlar 1-5 arasındaysa 1-3 negatif, 4-5 pozitif olarak işaretlenir ve bernoulli'ye uygun hale getirilebilir.\n"," Bu beraberinde bazı problemleri de getirir. Bu sebeple bayesian average rating yapmak gerekir.\n","\n"],"metadata":{"id":"5xUTNnJuk7qF"}},{"cell_type":"code","source":["# Wilson Lower Bound Score\n","\n","# ikili etkileşim sonucu ortaya çıkan bütün ölçme problemlerinde kullanılabilir.\n","# Mesela youtube like dislike karşılaştırması gibi.\n","\n","# Bernoulli parametresi (ikili olayların gerçekleşmesi olasılığı için kullanılır. Yazı Tura gibi.) \n","# p (bir olayın gözlenmesi-gerçekleşmesi durumu) için \n","# hesaplanacak güven aralığının alt sınırı WLB skoru olarak kabul edilir.\n","\n","\n","# Neden bunu kullanıyoruz ? Bizim elimizde bütün müşteriler/veri yok. Sadece örneklem var. \n","# Var olanların içinden bir up oranım var. Öyle bir genelleme yapmak istiyorum ki bütün kitleye \n","# yansıtabileyim. Bir olasılık problemi olarak düşünürsek, ilgilenilen olay üzerinden bir güven aralığı\n","# hesapladığımızda, istatistiksel olarak 100 kullanıcıdan 95 i(güven) etkileşim sağladığında %5 yanılma payım olmakla birlikte \n","# bu yorumun up oranı 0.5 ile 0.7 arasında olacaktır.\n","\n","# 600-400\n","# up = 0.6 bu bir örneklem değeri, kaygım bütün kitleye genellemek\n","# 0.5 0.7\n","# 0.5 bir skor belirlemem gerektiği için 0.5 i alt sınır skor olarak kabul ediyorum.\n","# Bütün gözlem birimleri için aynı işlemi yaptığımda garanti bir alt skorum var. Bunu referans alarak bunları sıralayabilirim. \n","# Bu bir rate hesabı olarak kullanılır ve buna dayalı olarak bir skorlama yapılır. \n","\n"," \n"," # - Not:\n"," # Eğer skorlar 1-5 arasındaysa 1-3 negatif, 4-5 pozitif olarak işaretlenir ve bernoulli'ye uygun hale getirilebilir.\n"," # Bu beraberinde bazı problemleri de getirir. Bu sebeple bayesian average rating yapmak gerekir.\n","\n","\n","def wilson_lower_bound(up, down, confidence=0.95):\n"," \"\"\"\n"," Wilson Lower Bound Score hesapla\n","\n"," - Bernoulli parametresi p için hesaplanacak güven aralığının alt sınırı WLB skoru olarak kabul edilir.\n"," - Hesaplanacak skor ürün sıralaması için kullanılır.\n"," \n"," - Not:\n"," Eğer skorlar 1-5 arasındaysa 1-3 negatif, 4-5 pozitif olarak işaretlenir ve bernoulli'ye uygun hale getirilebilir.\n"," Bu beraberinde bazı problemleri de getirir. Bu sebeple bayesian average rating yapmak gerekir.\n","\n"," Parameters\n"," ----------\n"," up: int\n"," up count\n"," down: int\n"," down count\n"," confidence: float\n"," confidence\n","\n"," Returns\n"," -------\n"," wilson score: float\n","\n"," \"\"\"\n"," n = up + down\n"," if n == 0:\n"," return 0\n"," z = st.norm.ppf(1 - (1 - confidence) / 2)\n"," phat = 1.0 * up / n\n"," return (phat + z * z / (2 * n) - z * math.sqrt((phat * (1 - phat) + z * z / (4 * n)) / n)) / (1 + z * z / n)\n","\n","\n","wilson_lower_bound(600, 400)"],"metadata":{"id":"v6Rmwkfi_Vgb","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1667500144666,"user_tz":-180,"elapsed":709,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"58856689-e585-4796-acdd-b7577a25f8ef"},"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.5693094295142663"]},"metadata":{},"execution_count":5}]},{"cell_type":"code","source":["wilson_lower_bound(5500, 4500)"],"metadata":{"id":"RXtBrVWdlvOm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1667500149007,"user_tz":-180,"elapsed":429,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"05237e45-f350-424b-e0b2-7e3835cdb963"},"execution_count":6,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.5402319557715324"]},"metadata":{},"execution_count":6}]},{"cell_type":"code","source":["wilson_lower_bound(2, 0)"],"metadata":{"id":"ygLvhiPjlvRk","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1667500149533,"user_tz":-180,"elapsed":11,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"31134272-2983-4e1b-ca7f-a9ceee1737a5"},"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.3423802275066531"]},"metadata":{},"execution_count":7}]},{"cell_type":"code","source":["wilson_lower_bound(100, 1)"],"metadata":{"id":"3CStWIYQlx5Q","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1667500149535,"user_tz":-180,"elapsed":10,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"22910450-2a3f-49b9-9f8a-8937bc40182b"},"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.9460328420055449"]},"metadata":{},"execution_count":8}]},{"cell_type":"code","source":["# Case Study\n","up = [15, 70, 14, 4, 2, 5, 8, 37, 21, 52, 28, 147, 61, 30, 23, 40, 37, 61, 54, 18, 12, 68]\n","down = [0, 2, 2, 2, 15, 2, 6, 5, 23, 8, 12, 2, 1, 1, 5, 1, 2, 6, 2, 0, 2, 2]\n","comments = pd.DataFrame({\"up\": up, \"down\": down})\n","\n","\n","\n","# score_pos_neg_diff\n","comments[\"score_pos_neg_diff\"] = comments.apply(lambda x: score_up_down_diff(x[\"up\"],\n"," x[\"down\"]), axis=1)\n","\n","# score_average_rating\n","comments[\"score_average_rating\"] = comments.apply(lambda x: score_average_rating(x[\"up\"], x[\"down\"]), axis=1)\n","\n","# wilson_lower_bound\n","comments[\"wilson_lower_bound\"] = comments.apply(lambda x: wilson_lower_bound(x[\"up\"], x[\"down\"]), axis=1)\n","\n","\n","\n","comments.sort_values(\"wilson_lower_bound\", ascending=False)"],"metadata":{"id":"9hWS01W5_aBL","colab":{"base_uri":"https://localhost:8080/","height":739},"executionInfo":{"status":"ok","timestamp":1667500150354,"user_tz":-180,"elapsed":21,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}},"outputId":"835808fd-7398-4b86-8510-03500bd7a24f"},"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" up down score_pos_neg_diff score_average_rating wilson_lower_bound\n","11 147 2 145 0.98658 0.95238\n","12 61 1 60 0.98387 0.91413\n","1 70 2 68 0.97222 0.90426\n","21 68 2 66 0.97143 0.90168\n","18 54 2 52 0.96429 0.87881\n","15 40 1 39 0.97561 0.87405\n","13 30 1 29 0.96774 0.83806\n","16 37 2 35 0.94872 0.83114\n","19 18 0 18 1.00000 0.82412\n","17 61 6 55 0.91045 0.81807\n","0 15 0 15 1.00000 0.79612\n","9 52 8 44 0.86667 0.75835\n","7 37 5 32 0.88095 0.75000\n","14 23 5 18 0.82143 0.64409\n","2 14 2 12 0.87500 0.63977\n","20 12 2 10 0.85714 0.60059\n","10 28 12 16 0.70000 0.54570\n","5 5 2 3 0.71429 0.35893\n","8 21 23 -2 0.47727 0.33755\n","6 8 6 2 0.57143 0.32591\n","3 4 2 2 0.66667 0.29999\n","4 2 15 -13 0.11765 0.03288"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
updownscore_pos_neg_diffscore_average_ratingwilson_lower_bound
1114721450.986580.95238
12611600.983870.91413
1702680.972220.90426
21682660.971430.90168
18542520.964290.87881
15401390.975610.87405
13301290.967740.83806
16372350.948720.83114
19180181.000000.82412
17616550.910450.81807
0150151.000000.79612
9528440.866670.75835
7375320.880950.75000
14235180.821430.64409
2142120.875000.63977
20122100.857140.60059
102812160.700000.54570
55230.714290.35893
82123-20.477270.33755
68620.571430.32591
34220.666670.29999
4215-130.117650.03288
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":9}]},{"cell_type":"code","source":["\n","comments = pd.DataFrame({\"up\": up, \"down\": down})"],"metadata":{"id":"7Q1w0Po7vhRs","executionInfo":{"status":"aborted","timestamp":1667500114390,"user_tz":-180,"elapsed":37,"user":{"displayName":"melisa gözet","userId":"05470559865422977892"}}},"execution_count":null,"outputs":[]}]} -------------------------------------------------------------------------------- /week_6 (03.11.22-09.11.22)/Ab_Testing_Melisa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/week_6 (03.11.22-09.11.22)/Ab_Testing_Melisa.pdf -------------------------------------------------------------------------------- /week_6 (03.11.22-09.11.22)/ab_testing.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/week_6 (03.11.22-09.11.22)/ab_testing.xlsx -------------------------------------------------------------------------------- /week_7 (10.11.22-16.11.22)/item_based_recommender_melisa.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "axsR2rQbIRzw" 7 | }, 8 | "source": [ 9 | "## **Item-Based Collaborative Filtering**\n", 10 | "\n", 11 | "### **İş Problemi**\n", 12 | "\n", 13 | "* Online bir film izleme platformu (örneğin kuzukuzu.tv) iş birlikçi filtreleme yöntemi ile bir öneri sistemi geliştirmek istemektedir. \n", 14 | "\n", 15 | "* İçerik temelli öneri sistemlerini deneyen şirket topluluğunun kanaatlerini barındıracak şekilde öneriler geliştirmek istemektedir. \n", 16 | "\n", 17 | "* Kullanıcıları bir filmi beğendiğinde o film ile benzer beğenilme örüntüsüne sahip olan diğer filmler önerilmektedir. \n", 18 | "\n", 19 | "### **Veri Seti Hikayesi**\n", 20 | "\n", 21 | "* Veri seti MovieLens tarafından sağlanmıştır.\n", 22 | "\n", 23 | "* İçerisinde filmler ve bu filmlere verilen puanları barındırmaktadır.\n", 24 | "\n", 25 | "* Veri Seti yaklaşık 27000 film için yaklaşık 2.000.000 derecelendirme içermektedir. \n", 26 | "\n", 27 | "* Veri seti: https://grouplens.org/datasets/movielens/" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": { 33 | "id": "6_JSYW3MLxsq" 34 | }, 35 | "source": [ 36 | "#### **Adım 1: Veri Setinin Hazırlanması**" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "colab": { 44 | "base_uri": "https://localhost:8080/", 45 | "height": 206 46 | }, 47 | "id": "uk2LQeqXIMpj", 48 | "outputId": "773cf240-28ee-416d-ab48-6d1f2104b641" 49 | }, 50 | "outputs": [ 51 | { 52 | "output_type": "execute_result", 53 | "data": { 54 | "text/plain": [ 55 | " movieId title genres \\\n", 56 | "0 1 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy \n", 57 | "1 1 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy \n", 58 | "2 1 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy \n", 59 | "3 1 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy \n", 60 | "4 1 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy \n", 61 | "\n", 62 | " userId rating timestamp \n", 63 | "0 3.0 4.0 1999-12-11 13:36:47 \n", 64 | "1 6.0 5.0 1997-03-13 17:50:52 \n", 65 | "2 8.0 4.0 1996-06-05 13:37:51 \n", 66 | "3 10.0 4.0 1999-11-25 02:44:47 \n", 67 | "4 11.0 4.5 2009-01-02 01:13:41 " 68 | ], 69 | "text/html": [ 70 | "\n", 71 | "
\n", 72 | "
\n", 73 | "
\n", 74 | "\n", 87 | "\n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | "
movieIdtitlegenresuserIdratingtimestamp
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy3.04.01999-12-11 13:36:47
11Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy6.05.01997-03-13 17:50:52
21Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy8.04.01996-06-05 13:37:51
31Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy10.04.01999-11-25 02:44:47
41Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy11.04.52009-01-02 01:13:41
\n", 147 | "
\n", 148 | " \n", 158 | " \n", 159 | " \n", 196 | "\n", 197 | " \n", 221 | "
\n", 222 | "
\n", 223 | " " 224 | ] 225 | }, 226 | "metadata": {}, 227 | "execution_count": 1 228 | } 229 | ], 230 | "source": [ 231 | "import pandas as pd\n", 232 | "pd.set_option('display.max_columns', 500)\n", 233 | "movie = pd.read_csv('/content/drive/MyDrive/DSMLBC10/week_7 (10.11.22-16.11.22)/datasets/movie_lens_dataset/movie.csv')\n", 234 | "rating = pd.read_csv('/content/drive/MyDrive/DSMLBC10/week_7 (10.11.22-16.11.22)/datasets/movie_lens_dataset/rating.csv')\n", 235 | "df = movie.merge(rating, how=\"left\", on=\"movieId\")\n", 236 | "df.head()" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": { 242 | "id": "F6egdskXMQy4" 243 | }, 244 | "source": [ 245 | "#### **Adım 2: User Movie Df'inin Oluşturulması**" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": { 252 | "colab": { 253 | "base_uri": "https://localhost:8080/" 254 | }, 255 | "id": "KqxZZ1PRMPNZ", 256 | "outputId": "f5e7e6d1-5142-4cd3-b70c-1bd702e27672" 257 | }, 258 | "outputs": [ 259 | { 260 | "output_type": "execute_result", 261 | "data": { 262 | "text/plain": [ 263 | "(20000797, 6)" 264 | ] 265 | }, 266 | "metadata": {}, 267 | "execution_count": 2 268 | } 269 | ], 270 | "source": [ 271 | "df.shape" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": { 278 | "colab": { 279 | "base_uri": "https://localhost:8080/" 280 | }, 281 | "id": "xqwJgdLpMWT0", 282 | "outputId": "d6f3340d-213d-497f-b6da-9cd71e36e453" 283 | }, 284 | "outputs": [ 285 | { 286 | "output_type": "execute_result", 287 | "data": { 288 | "text/plain": [ 289 | "27262" 290 | ] 291 | }, 292 | "metadata": {}, 293 | "execution_count": 3 294 | } 295 | ], 296 | "source": [ 297 | "df[\"title\"].nunique()" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": { 304 | "colab": { 305 | "base_uri": "https://localhost:8080/" 306 | }, 307 | "id": "34TVVMvKMZNH", 308 | "outputId": "90a965f9-5948-4513-bfe6-0aa2c2870284" 309 | }, 310 | "outputs": [ 311 | { 312 | "output_type": "execute_result", 313 | "data": { 314 | "text/plain": [ 315 | "Pulp Fiction (1994) 67310\n", 316 | "Forrest Gump (1994) 66172\n", 317 | "Shawshank Redemption, The (1994) 63366\n", 318 | "Silence of the Lambs, The (1991) 63299\n", 319 | "Jurassic Park (1993) 59715\n", 320 | "Name: title, dtype: int64" 321 | ] 322 | }, 323 | "metadata": {}, 324 | "execution_count": 4 325 | } 326 | ], 327 | "source": [ 328 | "df[\"title\"].value_counts().head()" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "metadata": { 335 | "colab": { 336 | "base_uri": "https://localhost:8080/" 337 | }, 338 | "id": "UkSpP6hsMbMS", 339 | "outputId": "2e1b2b79-4a0f-43a3-a045-be0c70ce7657" 340 | }, 341 | "outputs": [ 342 | { 343 | "output_type": "execute_result", 344 | "data": { 345 | "text/plain": [ 346 | "(9050403, 6)" 347 | ] 348 | }, 349 | "metadata": {}, 350 | "execution_count": 5 351 | } 352 | ], 353 | "source": [ 354 | "comment_counts = pd.DataFrame(df[\"title\"].value_counts())\n", 355 | "rare_movies = comment_counts[comment_counts[\"title\"] <= 10000].index\n", 356 | "common_movies = df[~df[\"title\"].isin(rare_movies)]\n", 357 | "common_movies.shape" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": { 364 | "colab": { 365 | "base_uri": "https://localhost:8080/" 366 | }, 367 | "id": "V9Uhj-ijMfqb", 368 | "outputId": "6a47a4e1-57f3-48a1-864f-4a3502c25a18" 369 | }, 370 | "outputs": [ 371 | { 372 | "output_type": "execute_result", 373 | "data": { 374 | "text/plain": [ 375 | "462" 376 | ] 377 | }, 378 | "metadata": {}, 379 | "execution_count": 6 380 | } 381 | ], 382 | "source": [ 383 | "common_movies[\"title\"].nunique()" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "metadata": { 390 | "colab": { 391 | "base_uri": "https://localhost:8080/" 392 | }, 393 | "id": "XoEyAqazMiny", 394 | "outputId": "80de1ac0-2c4f-4d6d-f1c2-6eb7560dba18" 395 | }, 396 | "outputs": [ 397 | { 398 | "output_type": "execute_result", 399 | "data": { 400 | "text/plain": [ 401 | "27262" 402 | ] 403 | }, 404 | "metadata": {}, 405 | "execution_count": 7 406 | } 407 | ], 408 | "source": [ 409 | "df[\"title\"].nunique()" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": null, 415 | "metadata": { 416 | "colab": { 417 | "base_uri": "https://localhost:8080/" 418 | }, 419 | "id": "SLDgi8d-Mkrq", 420 | "outputId": "89630291-1405-480e-d90f-ac0d02441d08" 421 | }, 422 | "outputs": [ 423 | { 424 | "output_type": "execute_result", 425 | "data": { 426 | "text/plain": [ 427 | "(137658, 462)" 428 | ] 429 | }, 430 | "metadata": {}, 431 | "execution_count": 8 432 | } 433 | ], 434 | "source": [ 435 | "user_movie_df = common_movies.pivot_table(index=[\"userId\"], columns=[\"title\"], values=\"rating\")\n", 436 | "user_movie_df.shape" 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": null, 442 | "metadata": { 443 | "colab": { 444 | "base_uri": "https://localhost:8080/" 445 | }, 446 | "id": "bfKfhyFXMnwH", 447 | "outputId": "fb40b72d-066d-4928-c399-0aaa7991047d" 448 | }, 449 | "outputs": [ 450 | { 451 | "output_type": "execute_result", 452 | "data": { 453 | "text/plain": [ 454 | "Index(['10 Things I Hate About You (1999)', '12 Angry Men (1957)',\n", 455 | " '2001: A Space Odyssey (1968)', '28 Days Later (2002)', '300 (2007)',\n", 456 | " 'A.I. Artificial Intelligence (2001)', 'Abyss, The (1989)',\n", 457 | " 'Ace Ventura: Pet Detective (1994)',\n", 458 | " 'Ace Ventura: When Nature Calls (1995)', 'Addams Family Values (1993)',\n", 459 | " ...\n", 460 | " 'Wild Wild West (1999)', 'William Shakespeare's Romeo + Juliet (1996)',\n", 461 | " 'Willy Wonka & the Chocolate Factory (1971)', 'Witness (1985)',\n", 462 | " 'Wizard of Oz, The (1939)', 'X-Files: Fight the Future, The (1998)',\n", 463 | " 'X-Men (2000)', 'X2: X-Men United (2003)', 'You've Got Mail (1998)',\n", 464 | " 'Young Frankenstein (1974)'],\n", 465 | " dtype='object', name='title', length=462)" 466 | ] 467 | }, 468 | "metadata": {}, 469 | "execution_count": 9 470 | } 471 | ], 472 | "source": [ 473 | "user_movie_df.columns" 474 | ] 475 | }, 476 | { 477 | "cell_type": "markdown", 478 | "metadata": { 479 | "id": "xI2Tcpk1MqKt" 480 | }, 481 | "source": [ 482 | "#### **Adım 3: Item-Based Film Önerilerinin Yapılması**" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": null, 488 | "metadata": { 489 | "id": "29AsvGj5Ms0-" 490 | }, 491 | "outputs": [], 492 | "source": [ 493 | "movie_name = \"Matrix, The (1999)\"" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": null, 499 | "metadata": { 500 | "id": "shmP_hdbM403" 501 | }, 502 | "outputs": [], 503 | "source": [ 504 | "movie_name = \"X-Men (2000)\"" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": null, 510 | "metadata": { 511 | "id": "QgzQbfnyM2h8" 512 | }, 513 | "outputs": [], 514 | "source": [ 515 | "movie_name = user_movie_df[movie_name]" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "metadata": { 522 | "colab": { 523 | "base_uri": "https://localhost:8080/" 524 | }, 525 | "id": "9bMEtG0OM4PN", 526 | "outputId": "70cd3ee8-8018-4259-9fd1-9e657de2be11" 527 | }, 528 | "outputs": [ 529 | { 530 | "output_type": "execute_result", 531 | "data": { 532 | "text/plain": [ 533 | "title\n", 534 | "X-Men (2000) 1.000000\n", 535 | "X2: X-Men United (2003) 0.716946\n", 536 | "Spider-Man (2002) 0.492376\n", 537 | "Iron Man (2008) 0.458369\n", 538 | "Spider-Man 2 (2004) 0.422594\n", 539 | "Blade (1998) 0.395497\n", 540 | "Men in Black (a.k.a. MIB) (1997) 0.394806\n", 541 | "Pirates of the Caribbean: The Curse of the Black Pearl (2003) 0.383056\n", 542 | "Mummy, The (1999) 0.376553\n", 543 | "Batman Begins (2005) 0.375067\n", 544 | "dtype: float64" 545 | ] 546 | }, 547 | "metadata": {}, 548 | "execution_count": 17 549 | } 550 | ], 551 | "source": [ 552 | "user_movie_df.corrwith(movie_name).sort_values(ascending=False).head(10)" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "execution_count": null, 558 | "metadata": { 559 | "colab": { 560 | "base_uri": "https://localhost:8080/" 561 | }, 562 | "id": "9zPha2-uM6Zy", 563 | "outputId": "2d47ebd6-058f-4248-d9d2-4b1a50776a8c" 564 | }, 565 | "outputs": [ 566 | { 567 | "output_type": "execute_result", 568 | "data": { 569 | "text/plain": [ 570 | "title\n", 571 | "True Romance (1993) 1.000000\n", 572 | "Reservoir Dogs (1992) 0.339518\n", 573 | "Scarface (1983) 0.326247\n", 574 | "Pulp Fiction (1994) 0.323993\n", 575 | "Natural Born Killers (1994) 0.294575\n", 576 | "From Dusk Till Dawn (1996) 0.293865\n", 577 | "Kill Bill: Vol. 2 (2004) 0.286415\n", 578 | "Sin City (2005) 0.285757\n", 579 | "Desperado (1995) 0.284598\n", 580 | "Kill Bill: Vol. 1 (2003) 0.275683\n", 581 | "dtype: float64" 582 | ] 583 | }, 584 | "metadata": {}, 585 | "execution_count": 18 586 | } 587 | ], 588 | "source": [ 589 | "movie_name = pd.Series(user_movie_df.columns).sample(1).values[0]\n", 590 | "movie_name = user_movie_df[movie_name]\n", 591 | "user_movie_df.corrwith(movie_name).sort_values(ascending=False).head(10)" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": null, 597 | "metadata": { 598 | "colab": { 599 | "base_uri": "https://localhost:8080/" 600 | }, 601 | "id": "rKiutOL0M-Fq", 602 | "outputId": "a25547cc-9452-4181-f7ef-c1d48aae8dc3" 603 | }, 604 | "outputs": [ 605 | { 606 | "output_type": "execute_result", 607 | "data": { 608 | "text/plain": [ 609 | "[]" 610 | ] 611 | }, 612 | "metadata": {}, 613 | "execution_count": 19 614 | } 615 | ], 616 | "source": [ 617 | "def check_film(keyword, user_movie_df):\n", 618 | " return [col for col in user_movie_df.columns if keyword in col]\n", 619 | "\n", 620 | "check_film(\"Insomnia\", user_movie_df)" 621 | ] 622 | }, 623 | { 624 | "cell_type": "markdown", 625 | "metadata": { 626 | "id": "O3sgSVSEM-4w" 627 | }, 628 | "source": [ 629 | "#### **Adım 4: Çalışma Scriptinin Hazırlanması**" 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "execution_count": null, 635 | "metadata": { 636 | "id": "y6Hf1RpVNCCA" 637 | }, 638 | "outputs": [], 639 | "source": [ 640 | "def create_user_movie_df():\n", 641 | " import pandas as pd\n", 642 | " movie = pd.read_csv('/content/drive/MyDrive/DSMLBC10/week_7 (10.11.22-16.11.22)/datasets/movie_lens_dataset/movie.csv')\n", 643 | " rating = pd.read_csv('/content/drive/MyDrive/DSMLBC10/week_7 (10.11.22-16.11.22)/datasets/movie_lens_dataset/rating.csv')\n", 644 | " df = movie.merge(rating, how=\"left\", on=\"movieId\")\n", 645 | " comment_counts = pd.DataFrame(df[\"title\"].value_counts())\n", 646 | " rare_movies = comment_counts[comment_counts[\"title\"] <= 10000].index\n", 647 | " common_movies = df[~df[\"title\"].isin(rare_movies)]\n", 648 | " user_movie_df = common_movies.pivot_table(index=[\"userId\"], columns=[\"title\"], values=\"rating\")\n", 649 | " return user_movie_df\n", 650 | "\n", 651 | "user_movie_df = create_user_movie_df()" 652 | ] 653 | }, 654 | { 655 | "cell_type": "code", 656 | "execution_count": null, 657 | "metadata": { 658 | "id": "_YWCLrQpNEXO" 659 | }, 660 | "outputs": [], 661 | "source": [ 662 | "def item_based_recommender(movie_name, user_movie_df):\n", 663 | " movie_name = user_movie_df[movie_name]\n", 664 | " return user_movie_df.corrwith(movie_name).sort_values(ascending=False).head(10)" 665 | ] 666 | }, 667 | { 668 | "cell_type": "code", 669 | "execution_count": null, 670 | "metadata": { 671 | "colab": { 672 | "base_uri": "https://localhost:8080/" 673 | }, 674 | "id": "LDdkZH2uNF1C", 675 | "outputId": "fc3cdf51-ad24-432c-f531-3af23e96f905" 676 | }, 677 | "outputs": [ 678 | { 679 | "output_type": "execute_result", 680 | "data": { 681 | "text/plain": [ 682 | "title\n", 683 | "Matrix, The (1999) 1.000000\n", 684 | "Matrix Reloaded, The (2003) 0.516906\n", 685 | "Matrix Revolutions, The (2003) 0.449588\n", 686 | "Blade (1998) 0.334493\n", 687 | "Terminator 2: Judgment Day (1991) 0.333882\n", 688 | "Minority Report (2002) 0.332434\n", 689 | "Mission: Impossible (1996) 0.320815\n", 690 | "Lord of the Rings: The Fellowship of the Ring, The (2001) 0.318726\n", 691 | "Lord of the Rings: The Two Towers, The (2002) 0.318086\n", 692 | "Lord of the Rings: The Return of the King, The (2003) 0.314241\n", 693 | "dtype: float64" 694 | ] 695 | }, 696 | "metadata": {}, 697 | "execution_count": 22 698 | } 699 | ], 700 | "source": [ 701 | "item_based_recommender(\"Matrix, The (1999)\", user_movie_df)" 702 | ] 703 | }, 704 | { 705 | "cell_type": "code", 706 | "execution_count": null, 707 | "metadata": { 708 | "id": "rmQCL6NwNHRQ" 709 | }, 710 | "outputs": [], 711 | "source": [ 712 | "movie_name = pd.Series(user_movie_df.columns).sample(1).values[0]" 713 | ] 714 | }, 715 | { 716 | "cell_type": "code", 717 | "execution_count": null, 718 | "metadata": { 719 | "colab": { 720 | "base_uri": "https://localhost:8080/" 721 | }, 722 | "id": "v6Djsr_sNKB8", 723 | "outputId": "64854487-91c4-4f21-df24-40a225b2b03b" 724 | }, 725 | "outputs": [ 726 | { 727 | "output_type": "execute_result", 728 | "data": { 729 | "text/plain": [ 730 | "title\n", 731 | "Remains of the Day, The (1993) 1.000000\n", 732 | "Sense and Sensibility (1995) 0.398029\n", 733 | "Little Women (1994) 0.310405\n", 734 | "Talented Mr. Ripley, The (1999) 0.307695\n", 735 | "Postman, The (Postino, Il) (1994) 0.296924\n", 736 | "Piano, The (1993) 0.292561\n", 737 | "Crying Game, The (1992) 0.291792\n", 738 | "Gandhi (1982) 0.287449\n", 739 | "Much Ado About Nothing (1993) 0.286938\n", 740 | "Quiz Show (1994) 0.281122\n", 741 | "dtype: float64" 742 | ] 743 | }, 744 | "metadata": {}, 745 | "execution_count": 24 746 | } 747 | ], 748 | "source": [ 749 | "item_based_recommender(movie_name, user_movie_df)" 750 | ] 751 | } 752 | ], 753 | "metadata": { 754 | "colab": { 755 | "provenance": [] 756 | }, 757 | "kernelspec": { 758 | "display_name": "Python 3", 759 | "name": "python3" 760 | }, 761 | "language_info": { 762 | "name": "python" 763 | } 764 | }, 765 | "nbformat": 4, 766 | "nbformat_minor": 0 767 | } -------------------------------------------------------------------------------- /week_7 (10.11.22-16.11.22)/matrix_factorization_melisa.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "## **Model-Based Collaborative Filtering: Matrix Factorization**\n", 21 | "\n", 22 | "### **Veri Seti Hikayesi**\n", 23 | "\n", 24 | "* Veri seti MovieLens tarafından sağlanmıştır.\n", 25 | "\n", 26 | "* İçerisinde filmler ve bu filmlere verilen puanları barındırmaktadır.\n", 27 | "\n", 28 | "* Veri Seti yaklaşık 27000 film için yaklaşık 2.000.000 derecelendirme içermektedir. \n", 29 | "\n", 30 | "* Veri seti: https://grouplens.org/datasets/movielens/" 31 | ], 32 | "metadata": { 33 | "id": "h31gKG8LJAy0" 34 | } 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 1, 39 | "metadata": { 40 | "colab": { 41 | "base_uri": "https://localhost:8080/" 42 | }, 43 | "id": "34EJEVxx4lCH", 44 | "outputId": "0c1e6268-5c08-44d4-bb89-c1ae3c63779a" 45 | }, 46 | "outputs": [ 47 | { 48 | "output_type": "stream", 49 | "name": "stdout", 50 | "text": [ 51 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 52 | "Collecting surprise\n", 53 | " Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)\n", 54 | "Collecting scikit-surprise\n", 55 | " Downloading scikit-surprise-1.1.1.tar.gz (11.8 MB)\n", 56 | "\u001b[K |████████████████████████████████| 11.8 MB 6.5 MB/s \n", 57 | "\u001b[?25hRequirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-surprise->surprise) (1.2.0)\n", 58 | "Requirement already satisfied: numpy>=1.11.2 in /usr/local/lib/python3.7/dist-packages (from scikit-surprise->surprise) (1.21.6)\n", 59 | "Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-surprise->surprise) (1.7.3)\n", 60 | "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from scikit-surprise->surprise) (1.15.0)\n", 61 | "Building wheels for collected packages: scikit-surprise\n", 62 | " Building wheel for scikit-surprise (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 63 | " Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1633955 sha256=9404659e66cfc0c518939bd4ac3a853958c8305c66e9512901de82e7fc3290d0\n", 64 | " Stored in directory: /root/.cache/pip/wheels/76/44/74/b498c42be47b2406bd27994e16c5188e337c657025ab400c1c\n", 65 | "Successfully built scikit-surprise\n", 66 | "Installing collected packages: scikit-surprise, surprise\n", 67 | "Successfully installed scikit-surprise-1.1.1 surprise-0.1\n" 68 | ] 69 | } 70 | ], 71 | "source": [ 72 | "!pip install surprise" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "source": [ 78 | "import pandas as pd\n", 79 | "from surprise import Reader, SVD, Dataset, accuracy\n", 80 | "from surprise.model_selection import GridSearchCV, train_test_split, cross_validate\n", 81 | "pd.set_option('display.max_columns', None)" 82 | ], 83 | "metadata": { 84 | "id": "RqkI4M4-4qLC" 85 | }, 86 | "execution_count": 2, 87 | "outputs": [] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "source": [ 92 | "#### **Adım 1: Veri Setinin Hazırlanması (Data Preparation)**" 93 | ], 94 | "metadata": { 95 | "id": "vaGnqb4oJTnq" 96 | } 97 | }, 98 | { 99 | "cell_type": "code", 100 | "source": [ 101 | "movie = pd.read_csv('/content/drive/MyDrive/DSMLBC10/week_7 (10.11.22-16.11.22)/datasets/movie_lens_dataset/movie.csv')\n", 102 | "rating = pd.read_csv('/content/drive/MyDrive/DSMLBC10/week_7 (10.11.22-16.11.22)/datasets/movie_lens_dataset/rating.csv')\n", 103 | "df = movie.merge(rating, how=\"left\", on=\"movieId\")\n", 104 | "df.head()" 105 | ], 106 | "metadata": { 107 | "id": "eKxUqmjGI5fs", 108 | "colab": { 109 | "base_uri": "https://localhost:8080/", 110 | "height": 206 111 | }, 112 | "outputId": "6b9efa15-a977-44ff-9ff8-2b0f85d859a5" 113 | }, 114 | "execution_count": 3, 115 | "outputs": [ 116 | { 117 | "output_type": "execute_result", 118 | "data": { 119 | "text/plain": [ 120 | " movieId title genres \\\n", 121 | "0 1 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy \n", 122 | "1 1 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy \n", 123 | "2 1 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy \n", 124 | "3 1 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy \n", 125 | "4 1 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy \n", 126 | "\n", 127 | " userId rating timestamp \n", 128 | "0 3.0 4.0 1999-12-11 13:36:47 \n", 129 | "1 6.0 5.0 1997-03-13 17:50:52 \n", 130 | "2 8.0 4.0 1996-06-05 13:37:51 \n", 131 | "3 10.0 4.0 1999-11-25 02:44:47 \n", 132 | "4 11.0 4.5 2009-01-02 01:13:41 " 133 | ], 134 | "text/html": [ 135 | "\n", 136 | "
\n", 137 | "
\n", 138 | "
\n", 139 | "\n", 152 | "\n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | "
movieIdtitlegenresuserIdratingtimestamp
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy3.04.01999-12-11 13:36:47
11Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy6.05.01997-03-13 17:50:52
21Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy8.04.01996-06-05 13:37:51
31Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy10.04.01999-11-25 02:44:47
41Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy11.04.52009-01-02 01:13:41
\n", 212 | "
\n", 213 | " \n", 223 | " \n", 224 | " \n", 261 | "\n", 262 | " \n", 286 | "
\n", 287 | "
\n", 288 | " " 289 | ] 290 | }, 291 | "metadata": {}, 292 | "execution_count": 3 293 | } 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "source": [ 299 | "movie_ids = [130219, 356, 4422, 541]\n", 300 | "movies = [\"The Dark Knight (2011)\",\n", 301 | " \"Cries and Whispers (Viskningar och rop) (1972)\",\n", 302 | " \"Forrest Gump (1994)\",\n", 303 | " \"Blade Runner (1982)\"]\n", 304 | "sample_df = df[df.movieId.isin(movie_ids)]\n", 305 | "sample_df.head()" 306 | ], 307 | "metadata": { 308 | "id": "vBnIxOSyJXC3", 309 | "colab": { 310 | "base_uri": "https://localhost:8080/", 311 | "height": 206 312 | }, 313 | "outputId": "5c78b7a0-e04b-46dd-d30d-c5a047589ffe" 314 | }, 315 | "execution_count": 4, 316 | "outputs": [ 317 | { 318 | "output_type": "execute_result", 319 | "data": { 320 | "text/plain": [ 321 | " movieId title genres userId \\\n", 322 | "2457839 356 Forrest Gump (1994) Comedy|Drama|Romance|War 4.0 \n", 323 | "2457840 356 Forrest Gump (1994) Comedy|Drama|Romance|War 7.0 \n", 324 | "2457841 356 Forrest Gump (1994) Comedy|Drama|Romance|War 8.0 \n", 325 | "2457842 356 Forrest Gump (1994) Comedy|Drama|Romance|War 9.0 \n", 326 | "2457843 356 Forrest Gump (1994) Comedy|Drama|Romance|War 10.0 \n", 327 | "\n", 328 | " rating timestamp \n", 329 | "2457839 4.0 1996-08-24 09:28:42 \n", 330 | "2457840 4.0 2002-01-16 19:02:55 \n", 331 | "2457841 5.0 1996-06-05 13:44:19 \n", 332 | "2457842 4.0 2001-07-01 20:26:38 \n", 333 | "2457843 3.0 1999-11-25 02:32:02 " 334 | ], 335 | "text/html": [ 336 | "\n", 337 | "
\n", 338 | "
\n", 339 | "
\n", 340 | "\n", 353 | "\n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | "
movieIdtitlegenresuserIdratingtimestamp
2457839356Forrest Gump (1994)Comedy|Drama|Romance|War4.04.01996-08-24 09:28:42
2457840356Forrest Gump (1994)Comedy|Drama|Romance|War7.04.02002-01-16 19:02:55
2457841356Forrest Gump (1994)Comedy|Drama|Romance|War8.05.01996-06-05 13:44:19
2457842356Forrest Gump (1994)Comedy|Drama|Romance|War9.04.02001-07-01 20:26:38
2457843356Forrest Gump (1994)Comedy|Drama|Romance|War10.03.01999-11-25 02:32:02
\n", 413 | "
\n", 414 | " \n", 424 | " \n", 425 | " \n", 462 | "\n", 463 | " \n", 487 | "
\n", 488 | "
\n", 489 | " " 490 | ] 491 | }, 492 | "metadata": {}, 493 | "execution_count": 4 494 | } 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "source": [ 500 | "sample_df.shape" 501 | ], 502 | "metadata": { 503 | "id": "IzTp873uJaen", 504 | "colab": { 505 | "base_uri": "https://localhost:8080/" 506 | }, 507 | "outputId": "4d40d9c4-10f2-4872-c875-eeb3595e4d2d" 508 | }, 509 | "execution_count": 5, 510 | "outputs": [ 511 | { 512 | "output_type": "execute_result", 513 | "data": { 514 | "text/plain": [ 515 | "(97343, 6)" 516 | ] 517 | }, 518 | "metadata": {}, 519 | "execution_count": 5 520 | } 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "source": [ 526 | "user_movie_df = sample_df.pivot_table(index=[\"userId\"],\n", 527 | " columns=[\"title\"],\n", 528 | " values=\"rating\")\n", 529 | "user_movie_df.shape" 530 | ], 531 | "metadata": { 532 | "id": "fVfVZNCuJbwo", 533 | "colab": { 534 | "base_uri": "https://localhost:8080/" 535 | }, 536 | "outputId": "df5dc2da-c4fb-4612-a2a6-b89e249c2501" 537 | }, 538 | "execution_count": 6, 539 | "outputs": [ 540 | { 541 | "output_type": "execute_result", 542 | "data": { 543 | "text/plain": [ 544 | "(76918, 4)" 545 | ] 546 | }, 547 | "metadata": {}, 548 | "execution_count": 6 549 | } 550 | ] 551 | }, 552 | { 553 | "cell_type": "code", 554 | "source": [ 555 | "reader = Reader(rating_scale=(1, 5))\n", 556 | "\n", 557 | "data = Dataset.load_from_df(sample_df[['userId',\n", 558 | " 'movieId',\n", 559 | " 'rating']], reader)\n", 560 | "data" 561 | ], 562 | "metadata": { 563 | "id": "tKZ-g0l8Jesw", 564 | "colab": { 565 | "base_uri": "https://localhost:8080/" 566 | }, 567 | "outputId": "4d06bcb0-9eae-44ab-b746-c092c6391aa6" 568 | }, 569 | "execution_count": 7, 570 | "outputs": [ 571 | { 572 | "output_type": "execute_result", 573 | "data": { 574 | "text/plain": [ 575 | "" 576 | ] 577 | }, 578 | "metadata": {}, 579 | "execution_count": 7 580 | } 581 | ] 582 | }, 583 | { 584 | "cell_type": "markdown", 585 | "source": [ 586 | "#### **Adım 2: Modelleme**" 587 | ], 588 | "metadata": { 589 | "id": "J1ngJ7a7JjYE" 590 | } 591 | }, 592 | { 593 | "cell_type": "code", 594 | "source": [ 595 | "trainset, testset = train_test_split(data, test_size=.25)\n", 596 | "svd_model = SVD()\n", 597 | "svd_model.fit(trainset)\n", 598 | "predictions = svd_model.test(testset)" 599 | ], 600 | "metadata": { 601 | "id": "0K9oj4DCJh-f" 602 | }, 603 | "execution_count": 8, 604 | "outputs": [] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "source": [ 609 | "accuracy.rmse(predictions)" 610 | ], 611 | "metadata": { 612 | "id": "ZuTnNftVJpXe", 613 | "colab": { 614 | "base_uri": "https://localhost:8080/" 615 | }, 616 | "outputId": "5878e6ac-a5b3-4231-d75c-2e94ed555b82" 617 | }, 618 | "execution_count": 9, 619 | "outputs": [ 620 | { 621 | "output_type": "stream", 622 | "name": "stdout", 623 | "text": [ 624 | "RMSE: 0.9354\n" 625 | ] 626 | }, 627 | { 628 | "output_type": "execute_result", 629 | "data": { 630 | "text/plain": [ 631 | "0.9353961798282115" 632 | ] 633 | }, 634 | "metadata": {}, 635 | "execution_count": 9 636 | } 637 | ] 638 | }, 639 | { 640 | "cell_type": "code", 641 | "source": [ 642 | "svd_model.predict(uid=1.0, iid=541, verbose=True)" 643 | ], 644 | "metadata": { 645 | "id": "EB8c5yxnJqxY", 646 | "colab": { 647 | "base_uri": "https://localhost:8080/" 648 | }, 649 | "outputId": "f6df30e6-abc7-413f-a156-6e845534f554" 650 | }, 651 | "execution_count": 10, 652 | "outputs": [ 653 | { 654 | "output_type": "stream", 655 | "name": "stdout", 656 | "text": [ 657 | "user: 1.0 item: 541 r_ui = None est = 3.87 {'was_impossible': False}\n" 658 | ] 659 | }, 660 | { 661 | "output_type": "execute_result", 662 | "data": { 663 | "text/plain": [ 664 | "Prediction(uid=1.0, iid=541, r_ui=None, est=3.86777281590454, details={'was_impossible': False})" 665 | ] 666 | }, 667 | "metadata": {}, 668 | "execution_count": 10 669 | } 670 | ] 671 | }, 672 | { 673 | "cell_type": "code", 674 | "source": [ 675 | "svd_model.predict(uid=1.0, iid=356, verbose=True)" 676 | ], 677 | "metadata": { 678 | "id": "Mn4fz5JoJsZC", 679 | "colab": { 680 | "base_uri": "https://localhost:8080/" 681 | }, 682 | "outputId": "a7d27c50-3f26-4377-cad8-656c3c3d530d" 683 | }, 684 | "execution_count": 11, 685 | "outputs": [ 686 | { 687 | "output_type": "stream", 688 | "name": "stdout", 689 | "text": [ 690 | "user: 1.0 item: 356 r_ui = None est = 4.23 {'was_impossible': False}\n" 691 | ] 692 | }, 693 | { 694 | "output_type": "execute_result", 695 | "data": { 696 | "text/plain": [ 697 | "Prediction(uid=1.0, iid=356, r_ui=None, est=4.226473402015522, details={'was_impossible': False})" 698 | ] 699 | }, 700 | "metadata": {}, 701 | "execution_count": 11 702 | } 703 | ] 704 | }, 705 | { 706 | "cell_type": "code", 707 | "source": [ 708 | "sample_df[sample_df[\"userId\"] == 1]" 709 | ], 710 | "metadata": { 711 | "id": "k4zKKIoXJtiP", 712 | "colab": { 713 | "base_uri": "https://localhost:8080/", 714 | "height": 81 715 | }, 716 | "outputId": "ead2eca6-ef5a-4b21-fa34-73186cdc00e9" 717 | }, 718 | "execution_count": 12, 719 | "outputs": [ 720 | { 721 | "output_type": "execute_result", 722 | "data": { 723 | "text/plain": [ 724 | " movieId title genres userId rating \\\n", 725 | "3612352 541 Blade Runner (1982) Action|Sci-Fi|Thriller 1.0 4.0 \n", 726 | "\n", 727 | " timestamp \n", 728 | "3612352 2005-04-02 23:30:03 " 729 | ], 730 | "text/html": [ 731 | "\n", 732 | "
\n", 733 | "
\n", 734 | "
\n", 735 | "\n", 748 | "\n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | "
movieIdtitlegenresuserIdratingtimestamp
3612352541Blade Runner (1982)Action|Sci-Fi|Thriller1.04.02005-04-02 23:30:03
\n", 772 | "
\n", 773 | " \n", 783 | " \n", 784 | " \n", 821 | "\n", 822 | " \n", 846 | "
\n", 847 | "
\n", 848 | " " 849 | ] 850 | }, 851 | "metadata": {}, 852 | "execution_count": 12 853 | } 854 | ] 855 | }, 856 | { 857 | "cell_type": "markdown", 858 | "source": [ 859 | "#### **Adım 3: Model Tuning**" 860 | ], 861 | "metadata": { 862 | "id": "35qHHS8AJvNw" 863 | } 864 | }, 865 | { 866 | "cell_type": "code", 867 | "source": [ 868 | "param_grid = {'n_epochs': [5, 10, 20],\n", 869 | " 'lr_all': [0.002, 0.005, 0.007]}" 870 | ], 871 | "metadata": { 872 | "id": "W6vpEE2_Jx6j" 873 | }, 874 | "execution_count": 13, 875 | "outputs": [] 876 | }, 877 | { 878 | "cell_type": "code", 879 | "source": [ 880 | "gs = GridSearchCV(SVD,\n", 881 | " param_grid,\n", 882 | " measures=['rmse', 'mae'],\n", 883 | " cv=3,\n", 884 | " n_jobs=-1,\n", 885 | " joblib_verbose=True)\n", 886 | "gs.fit(data)" 887 | ], 888 | "metadata": { 889 | "id": "vcaMnYh6JzMK", 890 | "colab": { 891 | "base_uri": "https://localhost:8080/" 892 | }, 893 | "outputId": "f9d1c626-094d-4e2d-8c59-21d9733951fc" 894 | }, 895 | "execution_count": 14, 896 | "outputs": [ 897 | { 898 | "output_type": "stream", 899 | "name": "stderr", 900 | "text": [ 901 | "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n", 902 | "[Parallel(n_jobs=-1)]: Done 27 out of 27 | elapsed: 2.1min finished\n" 903 | ] 904 | } 905 | ] 906 | }, 907 | { 908 | "cell_type": "code", 909 | "source": [ 910 | "gs.best_score['rmse']" 911 | ], 912 | "metadata": { 913 | "id": "m1DBLPdoJ3hb", 914 | "colab": { 915 | "base_uri": "https://localhost:8080/" 916 | }, 917 | "outputId": "de450696-5b67-43dc-a9da-bd4da843a127" 918 | }, 919 | "execution_count": 15, 920 | "outputs": [ 921 | { 922 | "output_type": "execute_result", 923 | "data": { 924 | "text/plain": [ 925 | "0.9307239424125795" 926 | ] 927 | }, 928 | "metadata": {}, 929 | "execution_count": 15 930 | } 931 | ] 932 | }, 933 | { 934 | "cell_type": "code", 935 | "source": [ 936 | "gs.best_params['rmse']" 937 | ], 938 | "metadata": { 939 | "id": "BQhyA4zAJ6E_", 940 | "colab": { 941 | "base_uri": "https://localhost:8080/" 942 | }, 943 | "outputId": "cf1918a7-c146-4499-8383-70f76a508609" 944 | }, 945 | "execution_count": 16, 946 | "outputs": [ 947 | { 948 | "output_type": "execute_result", 949 | "data": { 950 | "text/plain": [ 951 | "{'n_epochs': 5, 'lr_all': 0.002}" 952 | ] 953 | }, 954 | "metadata": {}, 955 | "execution_count": 16 956 | } 957 | ] 958 | }, 959 | { 960 | "cell_type": "markdown", 961 | "source": [ 962 | "#### **Adım 4: Final Model ve Tahmin**" 963 | ], 964 | "metadata": { 965 | "id": "0K56OaOgJ8LL" 966 | } 967 | }, 968 | { 969 | "cell_type": "code", 970 | "source": [ 971 | "dir(svd_model)\n", 972 | "svd_model.n_epochs" 973 | ], 974 | "metadata": { 975 | "id": "fYk9ofdrJ9tM", 976 | "colab": { 977 | "base_uri": "https://localhost:8080/" 978 | }, 979 | "outputId": "d06f1606-af1e-4e4c-fb08-1b62243b0e73" 980 | }, 981 | "execution_count": 17, 982 | "outputs": [ 983 | { 984 | "output_type": "execute_result", 985 | "data": { 986 | "text/plain": [ 987 | "20" 988 | ] 989 | }, 990 | "metadata": {}, 991 | "execution_count": 17 992 | } 993 | ] 994 | }, 995 | { 996 | "cell_type": "code", 997 | "source": [ 998 | "svd_model = SVD(**gs.best_params['rmse'])" 999 | ], 1000 | "metadata": { 1001 | "id": "bmvkuN5-KEL8" 1002 | }, 1003 | "execution_count": 18, 1004 | "outputs": [] 1005 | }, 1006 | { 1007 | "cell_type": "code", 1008 | "source": [ 1009 | "data = data.build_full_trainset()\n", 1010 | "svd_model.fit(data)" 1011 | ], 1012 | "metadata": { 1013 | "id": "7xTWqGr_KC0v", 1014 | "colab": { 1015 | "base_uri": "https://localhost:8080/" 1016 | }, 1017 | "outputId": "b6a0e1e8-632f-44d3-e19a-0df6615027fd" 1018 | }, 1019 | "execution_count": 19, 1020 | "outputs": [ 1021 | { 1022 | "output_type": "execute_result", 1023 | "data": { 1024 | "text/plain": [ 1025 | "" 1026 | ] 1027 | }, 1028 | "metadata": {}, 1029 | "execution_count": 19 1030 | } 1031 | ] 1032 | }, 1033 | { 1034 | "cell_type": "code", 1035 | "source": [ 1036 | "svd_model.predict(uid=1.0, iid=541, verbose=True)" 1037 | ], 1038 | "metadata": { 1039 | "id": "DSvapItXKBCE", 1040 | "colab": { 1041 | "base_uri": "https://localhost:8080/" 1042 | }, 1043 | "outputId": "dba2c483-e64e-4101-f1af-5ea22f3519f7" 1044 | }, 1045 | "execution_count": 20, 1046 | "outputs": [ 1047 | { 1048 | "output_type": "stream", 1049 | "name": "stdout", 1050 | "text": [ 1051 | "user: 1.0 item: 541 r_ui = None est = 4.21 {'was_impossible': False}\n" 1052 | ] 1053 | }, 1054 | { 1055 | "output_type": "execute_result", 1056 | "data": { 1057 | "text/plain": [ 1058 | "Prediction(uid=1.0, iid=541, r_ui=None, est=4.212630303654173, details={'was_impossible': False})" 1059 | ] 1060 | }, 1061 | "metadata": {}, 1062 | "execution_count": 20 1063 | } 1064 | ] 1065 | } 1066 | ] 1067 | } -------------------------------------------------------------------------------- /week_7 (10.11.22-16.11.22)/recommendation_systems_melisa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/week_7 (10.11.22-16.11.22)/recommendation_systems_melisa.pdf -------------------------------------------------------------------------------- /week_9 (24.11.22-30.11.22)/Soledad Galli_Feature Engineering Cookbook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/week_9 (24.11.22-30.11.22)/Soledad Galli_Feature Engineering Cookbook.pdf -------------------------------------------------------------------------------- /week__10 (01.12.22-07.12.22)/makine_öğrenmesi_doğrusal_regresyon_melisa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/week__10 (01.12.22-07.12.22)/makine_öğrenmesi_doğrusal_regresyon_melisa.pdf -------------------------------------------------------------------------------- /week__10 (01.12.22-07.12.22)/makine_öğrenmesi_temel_kavramlar_melisa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/week__10 (01.12.22-07.12.22)/makine_öğrenmesi_temel_kavramlar_melisa.pdf -------------------------------------------------------------------------------- /week__12 (15.12.22-21.12.22)/cart_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/melisagozet/DataScienceMachineLearningBootcampMiuul/d9d571e6b27c33e09d121a3804ee231460ed55b3/week__12 (15.12.22-21.12.22)/cart_final.png -------------------------------------------------------------------------------- /week__13 (22.12.22-28.12.22)/makine_öğrenmesi_pipeline_II_III.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "### **7. End-to-End Diabetes Machine Learning Pipeline II**\n", 21 | "\n", 22 | "\n", 23 | "* Projenin nihayi amacını belirlendi. Çeşitli incelemeler yapıldı. Şimdi kişisel bir bilgisayarda işletim sistemi seviyesinden çalıştırılması için gerekli adımlar gösterilecektir." 24 | ], 25 | "metadata": { 26 | "id": "S7Vk4nfiMLrf" 27 | } 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "metadata": { 33 | "id": "AKIMt-9HLyqK" 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "import joblib\n", 38 | "import pandas as pd\n", 39 | "from lightgbm import LGBMClassifier\n", 40 | "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier, AdaBoostClassifier\n", 41 | "from sklearn.linear_model import LogisticRegression\n", 42 | "from sklearn.model_selection import cross_validate, GridSearchCV\n", 43 | "from sklearn.neighbors import KNeighborsClassifier\n", 44 | "from sklearn.preprocessing import StandardScaler\n", 45 | "from sklearn.svm import SVC\n", 46 | "from sklearn.tree import DecisionTreeClassifier\n", 47 | "from xgboost import XGBClassifier" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "source": [ 53 | "### **Helper Functions**\n", 54 | "\n", 55 | "#### **# Data Preprocessing & Feature Engineering**" 56 | ], 57 | "metadata": { 58 | "id": "10qsLDVlMV6q" 59 | } 60 | }, 61 | { 62 | "cell_type": "code", 63 | "source": [ 64 | "def grab_col_names(dataframe, cat_th=10, car_th=20):\n", 65 | " \"\"\"\n", 66 | "\n", 67 | " Veri setindeki kategorik, numerik ve kategorik fakat kardinal değişkenlerin isimlerini verir.\n", 68 | " Not: Kategorik değişkenlerin içerisine numerik görünümlü kategorik değişkenler de dahildir.\n", 69 | "\n", 70 | " Parameters\n", 71 | " ------\n", 72 | " dataframe: dataframe\n", 73 | " Değişken isimleri alınmak istenilen dataframe\n", 74 | " cat_th: int, optional\n", 75 | " numerik fakat kategorik olan değişkenler için sınıf eşik değeri\n", 76 | " car_th: int, optinal\n", 77 | " kategorik fakat kardinal değişkenler için sınıf eşik değeri\n", 78 | "\n", 79 | " Returns\n", 80 | " ------\n", 81 | " cat_cols: list\n", 82 | " Kategorik değişken listesi\n", 83 | " num_cols: list\n", 84 | " Numerik değişken listesi\n", 85 | " cat_but_car: list\n", 86 | " Kategorik görünümlü kardinal değişken listesi\n", 87 | "\n", 88 | " Examples\n", 89 | " ------\n", 90 | " import seaborn as sns\n", 91 | " df = sns.load_dataset(\"iris\")\n", 92 | " print(grab_col_names(df))\n", 93 | "\n", 94 | "\n", 95 | " Notes\n", 96 | " ------\n", 97 | " cat_cols + num_cols + cat_but_car = toplam değişken sayısı\n", 98 | " num_but_cat cat_cols'un içerisinde.\n", 99 | " Return olan 3 liste toplamı toplam değişken sayısına eşittir: cat_cols + num_cols + cat_but_car = değişken sayısı\n", 100 | "\n", 101 | " \"\"\"\n", 102 | "\n", 103 | " # cat_cols, cat_but_car\n", 104 | " cat_cols = [col for col in dataframe.columns if dataframe[col].dtypes == \"O\"]\n", 105 | " num_but_cat = [col for col in dataframe.columns if dataframe[col].nunique() < cat_th and\n", 106 | " dataframe[col].dtypes != \"O\"]\n", 107 | " cat_but_car = [col for col in dataframe.columns if dataframe[col].nunique() > car_th and\n", 108 | " dataframe[col].dtypes == \"O\"]\n", 109 | " cat_cols = cat_cols + num_but_cat\n", 110 | " cat_cols = [col for col in cat_cols if col not in cat_but_car]\n", 111 | "\n", 112 | " # num_cols\n", 113 | " num_cols = [col for col in dataframe.columns if dataframe[col].dtypes != \"O\"]\n", 114 | " num_cols = [col for col in num_cols if col not in num_but_cat]\n", 115 | "\n", 116 | " # print(f\"Observations: {dataframe.shape[0]}\")\n", 117 | " # print(f\"Variables: {dataframe.shape[1]}\")\n", 118 | " # print(f'cat_cols: {len(cat_cols)}')\n", 119 | " # print(f'num_cols: {len(num_cols)}')\n", 120 | " # print(f'cat_but_car: {len(cat_but_car)}')\n", 121 | " # print(f'num_but_cat: {len(num_but_cat)}')\n", 122 | " return cat_cols, num_cols, cat_but_car\n", 123 | "\n", 124 | "def outlier_thresholds(dataframe, col_name, q1=0.25, q3=0.75):\n", 125 | " quartile1 = dataframe[col_name].quantile(q1)\n", 126 | " quartile3 = dataframe[col_name].quantile(q3)\n", 127 | " interquantile_range = quartile3 - quartile1\n", 128 | " up_limit = quartile3 + 1.5 * interquantile_range\n", 129 | " low_limit = quartile1 - 1.5 * interquantile_range\n", 130 | " return low_limit, up_limit\n", 131 | "\n", 132 | "def replace_with_thresholds(dataframe, variable):\n", 133 | " low_limit, up_limit = outlier_thresholds(dataframe, variable)\n", 134 | " dataframe.loc[(dataframe[variable] < low_limit), variable] = low_limit\n", 135 | " dataframe.loc[(dataframe[variable] > up_limit), variable] = up_limit\n", 136 | "\n", 137 | "def one_hot_encoder(dataframe, categorical_cols, drop_first=False):\n", 138 | " dataframe = pd.get_dummies(dataframe, columns=categorical_cols, drop_first=drop_first)\n", 139 | " return dataframe\n", 140 | "\n", 141 | "def diabetes_data_prep(dataframe):\n", 142 | " dataframe.columns = [col.upper() for col in dataframe.columns]\n", 143 | "\n", 144 | " # Glucose\n", 145 | " dataframe['NEW_GLUCOSE_CAT'] = pd.cut(x=dataframe['GLUCOSE'], bins=[-1, 139, 200], labels=[\"normal\", \"prediabetes\"])\n", 146 | "\n", 147 | " # Age\n", 148 | " dataframe.loc[(dataframe['AGE'] < 35), \"NEW_AGE_CAT\"] = 'young'\n", 149 | " dataframe.loc[(dataframe['AGE'] >= 35) & (dataframe['AGE'] <= 55), \"NEW_AGE_CAT\"] = 'middleage'\n", 150 | " dataframe.loc[(dataframe['AGE'] > 55), \"NEW_AGE_CAT\"] = 'old'\n", 151 | "\n", 152 | " # BMI\n", 153 | " dataframe['NEW_BMI_RANGE'] = pd.cut(x=dataframe['BMI'], bins=[-1, 18.5, 24.9, 29.9, 100],\n", 154 | " labels=[\"underweight\", \"healty\", \"overweight\", \"obese\"])\n", 155 | "\n", 156 | " # BloodPressure\n", 157 | " dataframe['NEW_BLOODPRESSURE'] = pd.cut(x=dataframe['BLOODPRESSURE'], bins=[-1, 79, 89, 123],\n", 158 | " labels=[\"normal\", \"hs1\", \"hs2\"])\n", 159 | "\n", 160 | " cat_cols, num_cols, cat_but_car = grab_col_names(dataframe, cat_th=5, car_th=20)\n", 161 | "\n", 162 | " cat_cols = [col for col in cat_cols if \"OUTCOME\" not in col]\n", 163 | "\n", 164 | " df = one_hot_encoder(dataframe, cat_cols, drop_first=True)\n", 165 | "\n", 166 | " cat_cols, num_cols, cat_but_car = grab_col_names(df, cat_th=5, car_th=20)\n", 167 | "\n", 168 | " replace_with_thresholds(df, \"INSULIN\")\n", 169 | "\n", 170 | " X_scaled = StandardScaler().fit_transform(df[num_cols])\n", 171 | " df[num_cols] = pd.DataFrame(X_scaled, columns=df[num_cols].columns)\n", 172 | "\n", 173 | " y = df[\"OUTCOME\"]\n", 174 | " X = df.drop([\"OUTCOME\"], axis=1)\n", 175 | "\n", 176 | " return X, y\n", 177 | "\n", 178 | "# Base Models\n", 179 | "def base_models(X, y, scoring=\"roc_auc\"):\n", 180 | " print(\"Base Models....\")\n", 181 | " classifiers = [('LR', LogisticRegression()),\n", 182 | " ('KNN', KNeighborsClassifier()),\n", 183 | " (\"SVC\", SVC()),\n", 184 | " (\"CART\", DecisionTreeClassifier()),\n", 185 | " (\"RF\", RandomForestClassifier()),\n", 186 | " ('Adaboost', AdaBoostClassifier()),\n", 187 | " ('GBM', GradientBoostingClassifier()),\n", 188 | " ('XGBoost', XGBClassifier(use_label_encoder=False, eval_metric='logloss')),\n", 189 | " ('LightGBM', LGBMClassifier()),\n", 190 | " # ('CatBoost', CatBoostClassifier(verbose=False))\n", 191 | " ]\n", 192 | "\n", 193 | " for name, classifier in classifiers:\n", 194 | " cv_results = cross_validate(classifier, X, y, cv=3, scoring=scoring)\n", 195 | " print(f\"{scoring}: {round(cv_results['test_score'].mean(), 4)} ({name}) \")\n", 196 | "\n", 197 | "# Hyperparameter Optimization\n", 198 | "\n", 199 | "# config.py : Dışarıdan ayarlanabilecek değişkenleri barındırır. Verinin okunma kaynağıdır. Path kaynağıdır.\n", 200 | "\n", 201 | "knn_params = {\"n_neighbors\": range(2, 50)}\n", 202 | "\n", 203 | "cart_params = {'max_depth': range(1, 20),\n", 204 | " \"min_samples_split\": range(2, 30)}\n", 205 | "\n", 206 | "rf_params = {\"max_depth\": [8, 15, None],\n", 207 | " \"max_features\": [5, 7, \"auto\"],\n", 208 | " \"min_samples_split\": [15, 20],\n", 209 | " \"n_estimators\": [200, 300]}\n", 210 | "\n", 211 | "xgboost_params = {\"learning_rate\": [0.1, 0.01],\n", 212 | " \"max_depth\": [5, 8],\n", 213 | " \"n_estimators\": [100, 200],\n", 214 | " \"colsample_bytree\": [0.5, 1]}\n", 215 | "\n", 216 | "lightgbm_params = {\"learning_rate\": [0.01, 0.1],\n", 217 | " \"n_estimators\": [300, 500],\n", 218 | " \"colsample_bytree\": [0.7, 1]}\n", 219 | "\n", 220 | "classifiers = [('KNN', KNeighborsClassifier(), knn_params),\n", 221 | " (\"CART\", DecisionTreeClassifier(), cart_params),\n", 222 | " (\"RF\", RandomForestClassifier(), rf_params),\n", 223 | " ('XGBoost', XGBClassifier(use_label_encoder=False, eval_metric='logloss'), xgboost_params),\n", 224 | " ('LightGBM', LGBMClassifier(), lightgbm_params)]\n", 225 | "\n", 226 | "def hyperparameter_optimization(X, y, cv=3, scoring=\"roc_auc\"):\n", 227 | " print(\"Hyperparameter Optimization....\")\n", 228 | " best_models = {}\n", 229 | " for name, classifier, params in classifiers:\n", 230 | " print(f\"########## {name} ##########\")\n", 231 | " cv_results = cross_validate(classifier, X, y, cv=cv, scoring=scoring)\n", 232 | " print(f\"{scoring} (Before): {round(cv_results['test_score'].mean(), 4)}\")\n", 233 | "\n", 234 | " gs_best = GridSearchCV(classifier, params, cv=cv, n_jobs=-1, verbose=False).fit(X, y)\n", 235 | " final_model = classifier.set_params(**gs_best.best_params_)\n", 236 | "\n", 237 | " cv_results = cross_validate(final_model, X, y, cv=cv, scoring=scoring)\n", 238 | " print(f\"{scoring} (After): {round(cv_results['test_score'].mean(), 4)}\")\n", 239 | " print(f\"{name} best params: {gs_best.best_params_}\", end=\"\\n\\n\")\n", 240 | " best_models[name] = final_model\n", 241 | " return best_models\n", 242 | "\n", 243 | "# Stacking & Ensemble Learning\n", 244 | "def voting_classifier(best_models, X, y):\n", 245 | " print(\"Voting Classifier...\")\n", 246 | " voting_clf = VotingClassifier(estimators=[('KNN', best_models[\"KNN\"]), ('RF', best_models[\"RF\"]),\n", 247 | " ('LightGBM', best_models[\"LightGBM\"])],\n", 248 | " voting='soft').fit(X, y)\n", 249 | " cv_results = cross_validate(voting_clf, X, y, cv=3, scoring=[\"accuracy\", \"f1\", \"roc_auc\"])\n", 250 | " print(f\"Accuracy: {cv_results['test_accuracy'].mean()}\")\n", 251 | " print(f\"F1Score: {cv_results['test_f1'].mean()}\")\n", 252 | " print(f\"ROC_AUC: {cv_results['test_roc_auc'].mean()}\")\n", 253 | " return voting_clf\n" 254 | ], 255 | "metadata": { 256 | "id": "kaw_1-HEMXA4" 257 | }, 258 | "execution_count": 2, 259 | "outputs": [] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "source": [ 264 | "from google.colab import drive\n", 265 | "drive.mount(\"/content/gdrive\")" 266 | ], 267 | "metadata": { 268 | "colab": { 269 | "base_uri": "https://localhost:8080/" 270 | }, 271 | "id": "T1axQKVeM7r7", 272 | "outputId": "773e5e04-00d7-4aa6-f65b-f1ee20e73499" 273 | }, 274 | "execution_count": 3, 275 | "outputs": [ 276 | { 277 | "output_type": "stream", 278 | "name": "stdout", 279 | "text": [ 280 | "Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n" 281 | ] 282 | } 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "source": [ 288 | "# Pipeline Main Function\n", 289 | "################################################\n", 290 | "\n", 291 | "def main():\n", 292 | " df = pd.read_csv(\"/content/gdrive/MyDrive/DSMLBC10/week_11 (08.12.22-14.12.22) ml/dataset/diabetes.csv\")\n", 293 | " X, y = diabetes_data_prep(df) #yukarıdaki fonksiyonların tümü burada yapılır.\n", 294 | " base_models(X, y)\n", 295 | " best_models = hyperparameter_optimization(X, y)\n", 296 | " voting_clf = voting_classifier(best_models, X, y)\n", 297 | " joblib.dump(voting_clf, \"/content/gdrive/MyDrive/DSMLBC10/week_13 (22.12.22-28.12.22) ml/voting_clf.pkl\")\n", 298 | " return voting_clf\n", 299 | "\n", 300 | "if __name__ == \"__main__\":\n", 301 | " print(\"İşlem başladı\")\n", 302 | " main()\n", 303 | "\n", 304 | "# git github entegrasyonu\n", 305 | "# makefile (kod otamasyon aracı - Linux)\n", 306 | "# veri tabanlarından veriyi okuma\n", 307 | "# log\n", 308 | "# fonksiyonları class çevirme\n", 309 | "# docker\n", 310 | "# requirement.txt" 311 | ], 312 | "metadata": { 313 | "colab": { 314 | "base_uri": "https://localhost:8080/" 315 | }, 316 | "id": "e3nS_SPMMyL8", 317 | "outputId": "5970ff69-2ca9-404a-fcb3-564db576a710" 318 | }, 319 | "execution_count": 4, 320 | "outputs": [ 321 | { 322 | "output_type": "stream", 323 | "name": "stdout", 324 | "text": [ 325 | "İşlem başladı\n", 326 | "Base Models....\n", 327 | "roc_auc: 0.8409 (LR) \n", 328 | "roc_auc: 0.791 (KNN) \n", 329 | "roc_auc: 0.8355 (SVC) \n", 330 | "roc_auc: 0.6691 (CART) \n", 331 | "roc_auc: 0.8275 (RF) \n", 332 | "roc_auc: 0.8196 (Adaboost) \n", 333 | "roc_auc: 0.8254 (GBM) \n", 334 | "roc_auc: 0.8309 (XGBoost) \n", 335 | "roc_auc: 0.8061 (LightGBM) \n", 336 | "Hyperparameter Optimization....\n", 337 | "########## KNN ##########\n", 338 | "roc_auc (Before): 0.791\n", 339 | "roc_auc (After): 0.8211\n", 340 | "KNN best params: {'n_neighbors': 20}\n", 341 | "\n", 342 | "########## CART ##########\n", 343 | "roc_auc (Before): 0.6491\n", 344 | "roc_auc (After): 0.7943\n", 345 | "CART best params: {'max_depth': 6, 'min_samples_split': 23}\n", 346 | "\n", 347 | "########## RF ##########\n", 348 | "roc_auc (Before): 0.8255\n", 349 | "roc_auc (After): 0.8368\n", 350 | "RF best params: {'max_depth': 8, 'max_features': 'auto', 'min_samples_split': 15, 'n_estimators': 300}\n", 351 | "\n", 352 | "########## XGBoost ##########\n", 353 | "roc_auc (Before): 0.8309\n", 354 | "roc_auc (After): 0.8241\n", 355 | "XGBoost best params: {'colsample_bytree': 0.5, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100}\n", 356 | "\n", 357 | "########## LightGBM ##########\n", 358 | "roc_auc (Before): 0.8061\n", 359 | "roc_auc (After): 0.8259\n", 360 | "LightGBM best params: {'colsample_bytree': 0.7, 'learning_rate': 0.01, 'n_estimators': 300}\n", 361 | "\n", 362 | "Voting Classifier...\n", 363 | "Accuracy: 0.7682291666666666\n", 364 | "F1Score: 0.6263207896834774\n", 365 | "ROC_AUC: 0.8378718804437962\n" 366 | ] 367 | } 368 | ] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "source": [ 373 | "**Pipeline Aşamaları**\n", 374 | "\n", 375 | "1. İlk önce veriyi veritabanından çek\n", 376 | "2. Veriyi veri ön işleme skriptinden geçir. (dışarıdan çağır ya da bir skriptin içinde tutabilirsin)\n", 377 | "3. Genel modele bir bak -base model- (tercih sana kalmış)\n", 378 | "4. Hiperparametre optimizasyonu\n", 379 | "5. best_modeli kullanarak bir voting classifier oluştur. \n", 380 | "6. Model nesnesini bir yere kaydet (.pkl dump işlemi) training sürecini bir pipeline ile tamamlamış bulunuyoruz.\n", 381 | "\n", 382 | "Komut satırından işletim sistemi seviyesinde kod çalıştırılmak istenirse aşağıdaki kod kullanılmalıdır (Kontrol Mekanızmasıdır).\n", 383 | "\n", 384 | "\n", 385 | "\n" 386 | ], 387 | "metadata": { 388 | "id": "bAM-k6RMNWLR" 389 | } 390 | }, 391 | { 392 | "cell_type": "code", 393 | "source": [ 394 | "#if __name__ == \"__main__\":\n", 395 | " # print(\"İşlem başladı\") \n", 396 | "#rapor, CLI ekleyebilirsin.\n", 397 | " # main() " 398 | ], 399 | "metadata": { 400 | "id": "gqszRycK0DE8" 401 | }, 402 | "execution_count": 5, 403 | "outputs": [] 404 | }, 405 | { 406 | "cell_type": "markdown", 407 | "source": [ 408 | "### **Machine Learning Pipeline III**\n", 409 | "\n", 410 | "#### **Prediction / Scoring**\n", 411 | "\n", 412 | "Model kuruldu, tahminler yapıldı ve işletim sistemine entegre edildi. Peki yeni kayıtlar geldiğinde süreç nasıl işleyecek ? \n", 413 | "\n", 414 | "* **csv, excel, txt formatta yeni kayıtlar geldi.**\n", 415 | "\n", 416 | "* **joblib kullanılarak oluşturulan model çağırılır.**" 417 | ], 418 | "metadata": { 419 | "id": "RJk2bxjki7Pd" 420 | } 421 | }, 422 | { 423 | "cell_type": "code", 424 | "source": [ 425 | "import joblib\n", 426 | "import pandas as pd" 427 | ], 428 | "metadata": { 429 | "id": "uZEcUahdjHwj" 430 | }, 431 | "execution_count": 6, 432 | "outputs": [] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "source": [ 437 | " df = pd.read_csv(\"/content/gdrive/MyDrive/DSMLBC10/week_11 (08.12.22-14.12.22) ml/dataset/diabetes.csv\")\n", 438 | "\n", 439 | "random_user = df.sample(1, random_state=45)\n", 440 | "random_user " 441 | ], 442 | "metadata": { 443 | "colab": { 444 | "base_uri": "https://localhost:8080/", 445 | "height": 81 446 | }, 447 | "id": "u9-Y16iekOJR", 448 | "outputId": "b2d7fbaa-18eb-405c-8c0a-d33155d5f7f1" 449 | }, 450 | "execution_count": 7, 451 | "outputs": [ 452 | { 453 | "output_type": "execute_result", 454 | "data": { 455 | "text/plain": [ 456 | " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n", 457 | "195 5 158 84 41 210 39.4 \n", 458 | "\n", 459 | " DiabetesPedigreeFunction Age Outcome \n", 460 | "195 0.395 29 1 " 461 | ], 462 | "text/html": [ 463 | "\n", 464 | "
\n", 465 | "
\n", 466 | "
\n", 467 | "\n", 480 | "\n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
1955158844121039.40.395291
\n", 510 | "
\n", 511 | " \n", 521 | " \n", 522 | " \n", 559 | "\n", 560 | " \n", 584 | "
\n", 585 | "
\n", 586 | " " 587 | ] 588 | }, 589 | "metadata": {}, 590 | "execution_count": 7 591 | } 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "source": [ 597 | "new_model = joblib.load(\"/content/gdrive/MyDrive/DSMLBC10/week_13 (22.12.22-28.12.22) ml/voting_clf.pkl\")\n", 598 | "\n", 599 | "new_model.predict(random_user)" 600 | ], 601 | "metadata": { 602 | "id": "8U-V5D1gkzIx" 603 | }, 604 | "execution_count": null, 605 | "outputs": [] 606 | }, 607 | { 608 | "cell_type": "markdown", 609 | "source": [ 610 | "### ⚡ **Hata alınması beklenir, çünkü veritabanındaki bilgiler ile yeni gelen veri aynı değildir. İlk önce \"diabetes_data_prep\" fonksiyonundan geçirilmeli ve ardından yeni kullanıcı için tahmin işlemi yapılmalıdır. Burası çok kritik bir noktadır dikkat edilmesi gerekmektedir.** " 611 | ], 612 | "metadata": { 613 | "id": "l2MChztjoLID" 614 | } 615 | }, 616 | { 617 | "cell_type": "code", 618 | "source": [ 619 | "X, y = diabetes_data_prep(df)\n", 620 | "\n", 621 | "random_user = X.sample(1, random_state=50)\n", 622 | "random_user " 623 | ], 624 | "metadata": { 625 | "colab": { 626 | "base_uri": "https://localhost:8080/", 627 | "height": 145 628 | }, 629 | "id": "TkkpyyJmk2Dk", 630 | "outputId": "4c79164c-25be-4d4a-c174-650337ffa1d1" 631 | }, 632 | "execution_count": 9, 633 | "outputs": [ 634 | { 635 | "output_type": "execute_result", 636 | "data": { 637 | "text/plain": [ 638 | " PREGNANCIES GLUCOSE BLOODPRESSURE SKINTHICKNESS INSULIN BMI \\\n", 639 | "575 -0.844885 -0.059293 -1.297896 1.660007 -0.113914 0.445159 \n", 640 | "\n", 641 | " DIABETESPEDIGREEFUNCTION AGE NEW_AGE_CAT_old NEW_AGE_CAT_young \\\n", 642 | "575 -0.579489 -0.701198 0 1 \n", 643 | "\n", 644 | " NEW_GLUCOSE_CAT_prediabetes NEW_BMI_RANGE_healty \\\n", 645 | "575 0 0 \n", 646 | "\n", 647 | " NEW_BMI_RANGE_overweight NEW_BMI_RANGE_obese NEW_BLOODPRESSURE_hs1 \\\n", 648 | "575 0 1 0 \n", 649 | "\n", 650 | " NEW_BLOODPRESSURE_hs2 \n", 651 | "575 0 " 652 | ], 653 | "text/html": [ 654 | "\n", 655 | "
\n", 656 | "
\n", 657 | "
\n", 658 | "\n", 671 | "\n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | "
PREGNANCIESGLUCOSEBLOODPRESSURESKINTHICKNESSINSULINBMIDIABETESPEDIGREEFUNCTIONAGENEW_AGE_CAT_oldNEW_AGE_CAT_youngNEW_GLUCOSE_CAT_prediabetesNEW_BMI_RANGE_healtyNEW_BMI_RANGE_overweightNEW_BMI_RANGE_obeseNEW_BLOODPRESSURE_hs1NEW_BLOODPRESSURE_hs2
575-0.844885-0.059293-1.2978961.660007-0.1139140.445159-0.579489-0.70119801000100
\n", 715 | "
\n", 716 | " \n", 726 | " \n", 727 | " \n", 764 | "\n", 765 | " \n", 789 | "
\n", 790 | "
\n", 791 | " " 792 | ] 793 | }, 794 | "metadata": {}, 795 | "execution_count": 9 796 | } 797 | ] 798 | }, 799 | { 800 | "cell_type": "code", 801 | "source": [ 802 | "new_model = joblib.load(\"/content/gdrive/MyDrive/DSMLBC10/week_13 (22.12.22-28.12.22) ml/voting_clf.pkl\")\n", 803 | "\n", 804 | "new_model.predict(random_user)" 805 | ], 806 | "metadata": { 807 | "colab": { 808 | "base_uri": "https://localhost:8080/" 809 | }, 810 | "id": "rvTZPwfSk-gS", 811 | "outputId": "96054b15-5367-411b-caf1-cc6af57a443b" 812 | }, 813 | "execution_count": 10, 814 | "outputs": [ 815 | { 816 | "output_type": "execute_result", 817 | "data": { 818 | "text/plain": [ 819 | "array([0])" 820 | ] 821 | }, 822 | "metadata": {}, 823 | "execution_count": 10 824 | } 825 | ] 826 | } 827 | ] 828 | } --------------------------------------------------------------------------------