├── .gitignore ├── 13-PCA ├── 1.png └── 2.png ├── 4-SVM ├── SVM.png ├── 4-When_To_Use_SVM.ipynb └── final_touch.ipynb ├── 9-Adaboost ├── auc.png ├── 2-About-Ada-Boost-Classification.ipynb ├── final_touch.ipynb └── 4-About-Ada-Boost-Regression.ipynb ├── 7-Decision Tree ├── DT.jpg ├── 1-When_To_Use_DT-Classifier.ipynb ├── 2-When_To_Use_DT-Regressor.ipynb └── final_touch.ipynb ├── 17-Silhoute Clustering ├── 1.png └── image.png ├── 16-DBSCAN Clustering └── image.png ├── 18-Anomaly Detection ML ├── image.png ├── 2-About-DBSCAN-Anomaly-Detection.ipynb └── healthcare.csv ├── 3-Logistic Regression ├── ROC-AUC.png ├── Logistic-Regression.png └── 2-When_To_Use_Logistic_Regression.ipynb ├── 11-XgBoost ├── XGboost Classifier │ └── auc.png └── All-boostings.ipynb ├── 15-Hierarichal Clustering ├── image.png └── image1.png ├── 8-Random Forest ├── Classification │ ├── auc.png │ └── when_to_use_RF-Classifier.ipynb ├── 1-Ensemble-Techniques │ ├── 2-Bagging.ipynb │ ├── 3-Boosting.ipynb │ └── 1-ensemble.ipynb ├── Regression │ └── when_to_use_RF-Regressor.ipynb └── final_touch.ipynb ├── 10-Gradient Boosting ├── Classification │ ├── auc.png │ └── 2-About-Gradient-Boosting-classifier.ipynb └── Regression │ └── 2-About-gradient-boosting-regression.ipynb ├── 14-K Means Clutering Unsupervised ML └── WCSS.png ├── requirements.txt ├── 1-Complete Linear Regression ├── height-weight.csv ├── economic_index.csv ├── 2-When_To_Use_Simple_Linear_Regression.ipynb ├── 3-When_To_Use_Multiple_Linear_Regression.ipynb ├── 8-When_To_Use_Ridge_Lasso_ElasticNet.ipynb ├── 9-Final_touch.ipynb ├── 4-When_To_Use_Polynomial_Regression.ipynb ├── Algerian_forest_fires_dataset_UPDATE.csv └── Algerian_forest_fires_cleaned_dataset.csv ├── README.md ├── 5-Naive Baye's ├── 4-When_To_Use_Naive_Bayes.ipynb └── final_touch.ipynb ├── 6-K Nearest Neighbor ├── 3-When_To_Use_KNN_Classifier.ipynb ├── 4-When_To_Use_KNN_Regressor.ipynb └── final_touch.ipynb ├── 12-Unsupervised Machine Learning └── Intro.ipynb └── 2-Ridge Lasso And Elasticnet ├── 4-Cv_and_tuning.ipynb ├── 3-final_touch.ipynb ├── Algerian_forest_fires_dataset_UPDATE.csv └── Algerian_forest_fires_cleaned_dataset.csv /.gitignore: -------------------------------------------------------------------------------- 1 | venv/ 2 | .vscode/ 3 | __pycache__/ -------------------------------------------------------------------------------- /13-PCA/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/13-PCA/1.png -------------------------------------------------------------------------------- /13-PCA/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/13-PCA/2.png -------------------------------------------------------------------------------- /4-SVM/SVM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/4-SVM/SVM.png -------------------------------------------------------------------------------- /9-Adaboost/auc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/9-Adaboost/auc.png -------------------------------------------------------------------------------- /7-Decision Tree/DT.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/7-Decision Tree/DT.jpg -------------------------------------------------------------------------------- /17-Silhoute Clustering/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/17-Silhoute Clustering/1.png -------------------------------------------------------------------------------- /16-DBSCAN Clustering/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/16-DBSCAN Clustering/image.png -------------------------------------------------------------------------------- /17-Silhoute Clustering/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/17-Silhoute Clustering/image.png -------------------------------------------------------------------------------- /18-Anomaly Detection ML/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/18-Anomaly Detection ML/image.png -------------------------------------------------------------------------------- /3-Logistic Regression/ROC-AUC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/3-Logistic Regression/ROC-AUC.png -------------------------------------------------------------------------------- /11-XgBoost/XGboost Classifier/auc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/11-XgBoost/XGboost Classifier/auc.png -------------------------------------------------------------------------------- /15-Hierarichal Clustering/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/15-Hierarichal Clustering/image.png -------------------------------------------------------------------------------- /15-Hierarichal Clustering/image1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/15-Hierarichal Clustering/image1.png -------------------------------------------------------------------------------- /8-Random Forest/Classification/auc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/8-Random Forest/Classification/auc.png -------------------------------------------------------------------------------- /10-Gradient Boosting/Classification/auc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/10-Gradient Boosting/Classification/auc.png -------------------------------------------------------------------------------- /14-K Means Clutering Unsupervised ML/WCSS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/14-K Means Clutering Unsupervised ML/WCSS.png -------------------------------------------------------------------------------- /3-Logistic Regression/Logistic-Regression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Brahme27/All-Machine-Learning-Algorithms-Updated/HEAD/3-Logistic Regression/Logistic-Regression.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn 2 | ipykernel 3 | numpy 4 | pandas 5 | matplotlib 6 | seaborn 7 | flask 8 | streamlit 9 | ipython 10 | html5lib 11 | lxml 12 | beautifulsoup4 13 | imblearn 14 | openpyxl 15 | statsmodels 16 | plotly 17 | nbformat 18 | xgboost -------------------------------------------------------------------------------- /1-Complete Linear Regression/height-weight.csv: -------------------------------------------------------------------------------- 1 | Weight,Height 2 | 45,120 3 | 58,135 4 | 48,123 5 | 60,145 6 | 70,160 7 | 78,162 8 | 80,163 9 | 90,175 10 | 95,182 11 | 78,170 12 | 82,176 13 | 95,182 14 | 105,175 15 | 100,183 16 | 85,170 17 | 78,177 18 | 50,140 19 | 65,159 20 | 76,150 21 | 87,167 22 | 45,129 23 | 56,140 24 | 72,160 25 | -------------------------------------------------------------------------------- /1-Complete Linear Regression/economic_index.csv: -------------------------------------------------------------------------------- 1 | ,year,month,interest_rate,unemployment_rate,index_price 2 | 0,2017,12,2.75,5.3,1464 3 | 1,2017,11,2.5,5.3,1394 4 | 2,2017,10,2.5,5.3,1357 5 | 3,2017,9,2.5,5.3,1293 6 | 4,2017,8,2.5,5.4,1256 7 | 5,2017,7,2.5,5.6,1254 8 | 6,2017,6,2.5,5.5,1234 9 | 7,2017,5,2.25,5.5,1195 10 | 8,2017,4,2.25,5.5,1159 11 | 9,2017,3,2.25,5.6,1167 12 | 10,2017,2,2.0,5.7,1130 13 | 11,2017,1,2.0,5.9,1075 14 | 12,2016,12,2.0,6.0,1047 15 | 13,2016,11,1.75,5.9,965 16 | 14,2016,10,1.75,5.8,943 17 | 15,2016,9,1.75,6.1,958 18 | 16,2016,8,1.75,6.2,971 19 | 17,2016,7,1.75,6.1,949 20 | 18,2016,6,1.75,6.1,884 21 | 19,2016,5,1.75,6.1,866 22 | 20,2016,4,1.75,5.9,876 23 | 21,2016,3,1.75,6.2,822 24 | 22,2016,2,1.75,6.2,704 25 | 23,2016,1,1.75,6.1,719 26 | -------------------------------------------------------------------------------- /18-Anomaly Detection ML/2-About-DBSCAN-Anomaly-Detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "b20354c1", 6 | "metadata": {}, 7 | "source": [ 8 | "## **DBSCAN for anomaly detection**:\n", 9 | "\n", 10 | "* **DBSCAN** is a clustering algorithm that groups points based on **density** — areas with many close points form clusters.\n", 11 | "* It has two key parameters:\n", 12 | "\n", 13 | " 1. **eps** – the maximum distance between points to be considered neighbors.\n", 14 | " 2. **min\\_samples** – the minimum number of points required to form a dense region (cluster).\n", 15 | "* **Anomalies (or outliers)** are the points that **do not belong to any cluster**. DBSCAN labels these points as **noise**.\n", 16 | "* So, for anomaly detection:\n", 17 | "\n", 18 | " * Points in clusters → normal points.\n", 19 | " * Points labeled as noise (-1) → anomalies.\n", 20 | "\n", 21 | "**Intuition:** If a point is far from all dense regions, it is unusual — which is exactly what we want when detecting anomalies." 22 | ] 23 | } 24 | ], 25 | "metadata": { 26 | "language_info": { 27 | "name": "python" 28 | } 29 | }, 30 | "nbformat": 4, 31 | "nbformat_minor": 5 32 | } 33 | -------------------------------------------------------------------------------- /1-Complete Linear Regression/2-When_To_Use_Simple_Linear_Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ae2357e8", 6 | "metadata": {}, 7 | "source": [ 8 | "# You use **simple linear regression** when:\n", 9 | "\n", 10 | "1. **You want to see how one thing affects another.**\n", 11 | "\n", 12 | " * Example: Does the number of hours you study affect your exam score?\n", 13 | "\n", 14 | "2. **There are only two variables involved.**\n", 15 | "\n", 16 | " * One is the cause (independent variable → hours studied).\n", 17 | " * One is the effect (dependent variable → exam score).\n", 18 | "\n", 19 | "3. **You expect a straight-line relationship.**\n", 20 | "\n", 21 | " * If you put the data on a graph, the points roughly form a line (not a curve).\n", 22 | "\n", 23 | "\n", 24 | "✅ **Good examples where it makes sense**:\n", 25 | "\n", 26 | "* Predicting height from age (for kids).\n", 27 | "* Predicting car fuel use from distance traveled.\n", 28 | "* Predicting house price from its size.\n", 29 | "\n", 30 | "❌ **Not good to use** if:\n", 31 | "\n", 32 | "* The relationship is curved or complicated.\n", 33 | "* Many factors affect the outcome (then you’d use multiple regression instead)." 34 | ] 35 | } 36 | ], 37 | "metadata": { 38 | "language_info": { 39 | "name": "python" 40 | } 41 | }, 42 | "nbformat": 4, 43 | "nbformat_minor": 5 44 | } 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Complete Machine Learning Algorithms (Updated) 2 | 3 | ### 100% credits to krish Naik 4 | 5 | This repository is a **revised and enhanced version** of the Complete Data Science with Machine Learning and NLP course by [Krish Naik](https://github.com/krishnaik06/Complete-Data-Science-With-Machine-Learning-And-NLP-2024). 6 | 7 | A year ago, I completed the course, implemented everything from the original repo, and uploaded it to GitHub. Now, after revising all the machine learning algorithms, I decided to create this **updated repository** with improvements and additional resources that are extremely helpful for AI/ML enthusiasts, beginners, and for interview preparation. 8 | 9 | ## What’s New in This Repository 10 | 11 | - **Comprehensive Theory:** Each algorithm includes detailed explanations: 12 | - How it works 13 | - When to use it 14 | - Key terminologies 15 | - Advantages and disadvantages 16 | - **Visual Aids:** Related images and diagrams to enhance understanding. 17 | - **Error-Free Code:** Updated code to work with the latest library versions. Previously minor warnings that were present a year ago have been corrected to prevent errors due to version changes. 18 | - **Beginner-Friendly:** Concepts are explained in a way that even a fresher can understand. 19 | 20 | ## Why Use This Repository? 21 | 22 | - Run every notebook cell to see all algorithms in action. 23 | - Read the accompanying theory to strengthen your understanding. 24 | - Perfect for **revision, interview preparation, and hands-on learning**. 25 | - Aimed at providing **the best learning experience** for AI/ML enthusiasts. -------------------------------------------------------------------------------- /5-Naive Baye's/4-When_To_Use_Naive_Bayes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "1b592129", 6 | "metadata": {}, 7 | "source": [ 8 | "# When to use Naive Bayes\n", 9 | "\n", 10 | "1. **When you want to classify things**\n", 11 | "\n", 12 | " * Example: Decide if an email is spam or not.\n", 13 | " * Example: Decide if a review is positive or negative.\n", 14 | "\n", 15 | "2. **When your data has clear clues (features)**\n", 16 | "\n", 17 | " * Example: Words in a message.\n", 18 | " * Example: Measurements of something (like height, weight).\n", 19 | "\n", 20 | "3. **When speed matters**\n", 21 | "\n", 22 | " * Naive Bayes is very fast compared to many other methods.\n", 23 | "\n", 24 | "4. **When you don’t need perfect accuracy but a good simple model**\n", 25 | "\n", 26 | " * It often works surprisingly well even with its “naive” assumption.\n", 27 | "\n", 28 | "---\n", 29 | "\n", 30 | "### Good fits for Naive Bayes\n", 31 | "\n", 32 | "* **Text problems** (spam filtering, sentiment analysis, topic classification).\n", 33 | "* **Simple number problems** (predicting categories based on a few measurements).\n", 34 | "* **Situations where data is large and you need results quickly.**\n", 35 | "\n", 36 | "---\n", 37 | "\n", 38 | "👉 In short:\n", 39 | "Use **Naive Bayes** when you need a **fast, simple, and reasonably good** way to sort things into categories, especially with text or when features are simple clues." 40 | ] 41 | } 42 | ], 43 | "metadata": { 44 | "language_info": { 45 | "name": "python" 46 | } 47 | }, 48 | "nbformat": 4, 49 | "nbformat_minor": 5 50 | } 51 | -------------------------------------------------------------------------------- /7-Decision Tree/1-When_To_Use_DT-Classifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "5f770e05", 6 | "metadata": {}, 7 | "source": [ 8 | "# Use Decision Tree Classifier When : \n", 9 | "\n", 10 | "1. **You want clear, simple rules for making decisions.**\n", 11 | "\n", 12 | " * For example:\n", 13 | "\n", 14 | " * If age < 18 → \"child\"\n", 15 | " * Else if age ≥ 18 and income > 50k → \"adult with high income\"\n", 16 | " * Otherwise → \"adult with low income\"\n", 17 | " * The tree is just a set of “if-then” rules.\n", 18 | "\n", 19 | "2. **You care about understanding the “why” behind predictions.**\n", 20 | "\n", 21 | " * Unlike some complex models (like neural networks), decision trees let you trace exactly *why* a decision was made.\n", 22 | "\n", 23 | "3. **Your data has categories or simple numbers.**\n", 24 | "\n", 25 | " * Works well with things like:\n", 26 | "\n", 27 | " * \"red / blue / green\" (categories)\n", 28 | " * \"temperature = 72\" (numbers)\n", 29 | "\n", 30 | "4. **You don’t have a ton of data.**\n", 31 | "\n", 32 | " * Decision trees don’t need millions of examples — they can work with smaller sets.\n", 33 | "\n", 34 | "5. **You’re okay with a quick, good-enough model.**\n", 35 | "\n", 36 | " * They may not always be the most accurate, but they’re fast to train and easy to use.\n", 37 | "\n", 38 | "👉 In short: **use a decision tree classifier when you want a model that is easy to understand, explains its decisions, and works well with straightforward data.**" 39 | ] 40 | } 41 | ], 42 | "metadata": { 43 | "language_info": { 44 | "name": "python" 45 | } 46 | }, 47 | "nbformat": 4, 48 | "nbformat_minor": 5 49 | } 50 | -------------------------------------------------------------------------------- /7-Decision Tree/2-When_To_Use_DT-Regressor.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "984e249a", 6 | "metadata": {}, 7 | "source": [ 8 | "\n", 9 | "A **decision tree regressor** is almost the same as a classifier, but instead of predicting a *category* (like “yes/no”), it predicts a *number* (like “price = \\$250”).\n", 10 | "\n", 11 | "**Use Decision Tree Regressor When** \n", 12 | "\n", 13 | "1. **You want to predict numbers, not labels.**\n", 14 | "\n", 15 | " * Example: Predicting the price of a house, the temperature tomorrow, or someone’s exam score.\n", 16 | "\n", 17 | "2. **You want simple rules that explain the prediction.**\n", 18 | "\n", 19 | " * For example:\n", 20 | "\n", 21 | " * If house size > 2000 sq ft → predicted price = \\$500,000\n", 22 | " * Else if size ≤ 2000 and neighborhood = “suburbs” → predicted price = \\$300,000\n", 23 | " * Otherwise → \\$200,000\n", 24 | "\n", 25 | "3. **You want an easy-to-interpret model.**\n", 26 | "\n", 27 | " * You can read the tree like a flowchart and see exactly how the number was decided.\n", 28 | "\n", 29 | "4. **Your data has both categories and numbers.**\n", 30 | "\n", 31 | " * Example: “city” (category) + “square footage” (number)\n", 32 | "\n", 33 | "5. **You don’t need extreme accuracy but want fast, clear predictions.**\n", 34 | "\n", 35 | " * Just like with classifiers, they’re not always the *best* model, but they’re quick and transparent.\n", 36 | "\n", 37 | "👉 In short: **use a decision tree regressor when you want to predict a number, using simple “if-then” rules that are easy to follow.**" 38 | ] 39 | } 40 | ], 41 | "metadata": { 42 | "language_info": { 43 | "name": "python" 44 | } 45 | }, 46 | "nbformat": 4, 47 | "nbformat_minor": 5 48 | } 49 | -------------------------------------------------------------------------------- /3-Logistic Regression/2-When_To_Use_Logistic_Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "56c62dc4", 6 | "metadata": {}, 7 | "source": [ 8 | "# ✅ Use Logistic Regression when:\n", 9 | "\n", 10 | "1. **Your outcome is yes/no**\n", 11 | "\n", 12 | " * Example: Will a customer buy (Yes/No)?\n", 13 | " * Example: Is this email spam (Yes/No)?\n", 14 | "\n", 15 | "2. **You want probabilities, not just labels**\n", 16 | "\n", 17 | " * Logistic regression doesn’t just say *Yes* or *No*.\n", 18 | " * It gives a probability, like: “This email is **80% chance spam**.”\n", 19 | "\n", 20 | "3. **The relationship is roughly straight-line-ish**\n", 21 | "\n", 22 | " * If increasing “study hours” usually increases the chance of “pass exam,” logistic regression works well.\n", 23 | "\n", 24 | "4. **You want something simple and explainable**\n", 25 | "\n", 26 | " * You can look at the model and say:\n", 27 | " “If income goes up, the chance of buying goes up by this much.”\n", 28 | "\n", 29 | "---\n", 30 | "\n", 31 | "### ❌ Don’t use Logistic Regression when:\n", 32 | "\n", 33 | "* You need to predict numbers (like house prices).\n", 34 | "* The outcome has **many categories** (like “cat/dog/bird”) → then use multinomial logistic regression or OVR Regression Model.\n", 35 | "* Relationships are super complex and messy (then tree-based or neural nets may work better).\n", 36 | "\n", 37 | "---\n", 38 | "\n", 39 | "👉 In short:\n", 40 | "Use logistic regression when you want a **simple, explainable model** for a **yes/no problem** and you also want to know the **probabilities**.\n" 41 | ] 42 | } 43 | ], 44 | "metadata": { 45 | "language_info": { 46 | "name": "python" 47 | } 48 | }, 49 | "nbformat": 4, 50 | "nbformat_minor": 5 51 | } 52 | -------------------------------------------------------------------------------- /1-Complete Linear Regression/3-When_To_Use_Multiple_Linear_Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "9c946db5", 6 | "metadata": {}, 7 | "source": [ 8 | "# You use **multiple linear regression** when:\n", 9 | "\n", 10 | "1. **You want to see how several things together affect one outcome.**\n", 11 | "\n", 12 | " * Example: Exam score might depend on **hours studied**, **hours slept**, and **number of classes attended**.\n", 13 | "\n", 14 | "2. **There’s one main result you’re interested in (the dependent variable).**\n", 15 | "\n", 16 | " * Example: *exam score, house price, weight, sales revenue*.\n", 17 | "\n", 18 | "3. **There are two or more possible influences (independent variables).**\n", 19 | "\n", 20 | " * Example: *house price* depends on **size**, **location**, and **age of the house**.\n", 21 | "\n", 22 | "4. **The relationship is roughly straight-line for each factor.**\n", 23 | "\n", 24 | " * Meaning, if you keep all other factors steady, changing one will still have a linear effect.\n", 25 | "\n", 26 | "\n", 27 | "✅ **Good examples where it makes sense**:\n", 28 | "\n", 29 | "* Predicting salary from **years of experience**, **education level**, and **skills**.\n", 30 | "* Predicting blood pressure from **age**, **weight**, and **exercise level**.\n", 31 | "* Predicting sales from **advertising spend**, **price of product**, and **season**.\n", 32 | "\n", 33 | "❌ **Not good to use** if:\n", 34 | "\n", 35 | "* The relationships are very curved/nonlinear.\n", 36 | "* The factors affect each other in complicated ways.\n", 37 | "* You don’t have enough data compared to the number of factors." 38 | ] 39 | } 40 | ], 41 | "metadata": { 42 | "language_info": { 43 | "name": "python" 44 | } 45 | }, 46 | "nbformat": 4, 47 | "nbformat_minor": 5 48 | } 49 | -------------------------------------------------------------------------------- /4-SVM/4-When_To_Use_SVM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "9d0c0587", 6 | "metadata": {}, 7 | "source": [ 8 | "# Support Vector Machines\n", 9 | "\n", 10 | "### **SVC (Support Vector Classification)**\n", 11 | "\n", 12 | "* **Task type**: *Classification* → output is a **class label**.\n", 13 | "* **Goal**: Find a boundary (hyperplane) that best separates the different classes.\n", 14 | "* **Output**: Discrete categories (e.g., “cat” vs. “dog”).\n", 15 | "* **Loss function**: Uses **hinge loss** to maximize the margin between classes.\n", 16 | "\n", 17 | "📌 Example:\n", 18 | "\n", 19 | "* Input: height and weight of people.\n", 20 | "* Output: predict **male** or **female**.\n", 21 | "* The SVC draws a line (in 2D) or hyperplane (in higher dimensions) to separate the two groups.\n", 22 | "\n", 23 | "---\n", 24 | "\n", 25 | "### **SVR (Support Vector Regression)**\n", 26 | "\n", 27 | "* **Task type**: *Regression* → output is a **continuous value**.\n", 28 | "* **Goal**: Fit a function that predicts numbers, while ignoring small errors (within a tolerance “ε”).\n", 29 | "* **Output**: Continuous values (e.g., house price = 210k).\n", 30 | "* **Loss function**: Uses **epsilon-insensitive loss** (errors smaller than ε don’t matter).\n", 31 | "\n", 32 | "📌 Example:\n", 33 | "\n", 34 | "* Input: size of a house.\n", 35 | "* Output: predict **price** of the house.\n", 36 | "* SVR doesn’t just draw a line — it tries to keep most predictions within a “tube” around the true values.\n", 37 | "\n", 38 | "---\n", 39 | "\n", 40 | "👉 **Rule of thumb in ML terms**:\n", 41 | "\n", 42 | "* If your **target variable** is categorical → use **SVC**.\n", 43 | "* If your **target variable** is numeric/continuous → use **SVR**." 44 | ] 45 | } 46 | ], 47 | "metadata": { 48 | "language_info": { 49 | "name": "python" 50 | } 51 | }, 52 | "nbformat": 4, 53 | "nbformat_minor": 5 54 | } 55 | -------------------------------------------------------------------------------- /8-Random Forest/Classification/when_to_use_RF-Classifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0f9f2cdf", 6 | "metadata": {}, 7 | "source": [ 8 | "**Random Forest (RF) classifier** is best used when:\n", 9 | "\n", 10 | "1. **Your data has many features** (columns)\n", 11 | "\n", 12 | " * Works well with mixed data: numbers, categories, or both.\n", 13 | "\n", 14 | "2. **You want good accuracy without much tuning**\n", 15 | "\n", 16 | " * Random Forest usually gives strong results “out of the box.”\n", 17 | "\n", 18 | "3. **You want to avoid overfitting**\n", 19 | "\n", 20 | " * A single decision tree might memorize the data too much.\n", 21 | " * Random Forest (many trees + averaging) makes predictions more stable.\n", 22 | "\n", 23 | "4. **Your dataset isn’t too huge**\n", 24 | "\n", 25 | " * It works well on medium-sized datasets (thousands to maybe hundreds of thousands of rows).\n", 26 | " * For very large datasets, it can get slow.\n", 27 | "\n", 28 | "5. **You want feature importance**\n", 29 | "\n", 30 | " * RF can tell you which features (like “age,” “income,” etc.) are most useful for classification.\n", 31 | "\n", 32 | "---\n", 33 | "\n", 34 | "### ✅ Examples where RF Classifier works well\n", 35 | "\n", 36 | "* Predicting if a customer will churn (leave a company).\n", 37 | "* Classifying whether an email is spam or not.\n", 38 | "* Predicting if a patient has a disease (based on lab results).\n", 39 | "* Identifying types of plants/animals based on their traits.\n", 40 | "\n", 41 | "---\n", 42 | "\n", 43 | "Use a **Random Forest classifier** when you want a **reliable, balanced, and fairly accurate model** that doesn’t require much fine-tuning, especially if you’re not sure which single algorithm to pick." 44 | ] 45 | } 46 | ], 47 | "metadata": { 48 | "language_info": { 49 | "name": "python" 50 | } 51 | }, 52 | "nbformat": 4, 53 | "nbformat_minor": 5 54 | } 55 | -------------------------------------------------------------------------------- /8-Random Forest/1-Ensemble-Techniques/2-Bagging.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "2ec71f93", 6 | "metadata": {}, 7 | "source": [ 8 | "### 🟦 Bagging (Bootstrap Aggregating)\n", 9 | "\n", 10 | "**Think of the process like this:**\n", 11 | "\n", 12 | "1. **Start with your dataset**\n", 13 | " Say you have 1000 rows of data about houses (features: size, location, number of rooms → target: price).\n", 14 | "\n", 15 | "2. **Create random samples**\n", 16 | " Bagging makes many new datasets by randomly picking rows from the original dataset (some rows may repeat, some may be left out).\n", 17 | "\n", 18 | "3. **Train multiple models**\n", 19 | " On each random dataset, train the same type of model — for example, decision trees.\n", 20 | "\n", 21 | " * Tree 1 learns one pattern.\n", 22 | " * Tree 2 learns slightly differently because it saw a different dataset.\n", 23 | " * Tree 3 learns something else.\n", 24 | "\n", 25 | "4. **Combine predictions**\n", 26 | " When you want to predict the price of a new house:\n", 27 | "\n", 28 | " * Each tree gives its own prediction.\n", 29 | " * Final result = **average of all trees’ predictions**.\n", 30 | "\n", 31 | "---\n", 32 | "\n", 33 | "### 🔑 In ML terms:\n", 34 | "\n", 35 | "* Bagging = **training models in parallel on different random subsets**.\n", 36 | "* Final prediction = **aggregate (average or vote)** of all models.\n", 37 | "* Benefit = reduces **variance** (less chance of overfitting).\n", 38 | "\n", 39 | "---\n", 40 | "\n", 41 | "✅ **Small Example:**\n", 42 | "Predicting house price for a new home:\n", 43 | "\n", 44 | "* Tree 1 → \\$200k\n", 45 | "* Tree 2 → \\$220k\n", 46 | "* Tree 3 → \\$210k\n", 47 | " **Final Bagging prediction = (200 + 220 + 210) / 3 = \\$210k**" 48 | ] 49 | } 50 | ], 51 | "metadata": { 52 | "language_info": { 53 | "name": "python" 54 | } 55 | }, 56 | "nbformat": 4, 57 | "nbformat_minor": 5 58 | } 59 | -------------------------------------------------------------------------------- /6-K Nearest Neighbor/3-When_To_Use_KNN_Classifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "57f80286", 6 | "metadata": {}, 7 | "source": [ 8 | "# When should you use KNN?\n", 9 | "\n", 10 | "You should use KNN when:\n", 11 | "\n", 12 | "1. **You have labeled examples already**\n", 13 | "\n", 14 | " * For example, you know which animals are cats 🐱 and which are dogs 🐶, and now you want to classify a new animal.\n", 15 | "\n", 16 | "2. **The data isn’t too huge**\n", 17 | "\n", 18 | " * KNN can get slow if you have millions of examples.\n", 19 | "\n", 20 | "3. **You want something simple**\n", 21 | "\n", 22 | " * KNN doesn’t do fancy math behind the scenes. It just looks at who’s closest and votes.\n", 23 | "\n", 24 | "4. **Your categories are clear**\n", 25 | "\n", 26 | " * Works best when the groups (like cats vs dogs) are distinct.\n", 27 | "\n", 28 | "5. **When patterns depend on similarity**\n", 29 | "\n", 30 | " * If “things that look alike usually belong to the same group,” KNN is a good choice.\n", 31 | "\n", 32 | "---\n", 33 | "\n", 34 | "### Example\n", 35 | "\n", 36 | "Say you want to predict whether a fruit is an **apple 🍎** or an **orange 🍊**.\n", 37 | "You already know:\n", 38 | "\n", 39 | "* Apples are usually **red** and **smaller**.\n", 40 | "* Oranges are usually **orange** and **bigger**.\n", 41 | "\n", 42 | "Now, you get a new fruit that is **medium-sized and orange-colored**.\n", 43 | "KNN will check which known fruits it’s most similar to. If most of its nearest neighbors are oranges, it says: *“This is an orange!”*\n", 44 | "\n", 45 | "---\n", 46 | "\n", 47 | "👉 In short:\n", 48 | "Use KNN when your problem is about **classifying new things based on their similarity to known things**, and your dataset is not too large." 49 | ] 50 | } 51 | ], 52 | "metadata": { 53 | "language_info": { 54 | "name": "python" 55 | } 56 | }, 57 | "nbformat": 4, 58 | "nbformat_minor": 5 59 | } 60 | -------------------------------------------------------------------------------- /6-K Nearest Neighbor/4-When_To_Use_KNN_Regressor.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0e3c93f4", 6 | "metadata": {}, 7 | "source": [ 8 | "### What is KNN Regressor?\n", 9 | "\n", 10 | "Think of **KNN Classifier** as “choosing a category” (cat 🐱 or dog 🐶).\n", 11 | "But **KNN Regressor** is about “predicting a number” (like weight, price, or temperature 🌡️).\n", 12 | "\n", 13 | "It works the same way: look at the nearest neighbors, then instead of voting, it **averages their numbers**.\n", 14 | "\n", 15 | "---\n", 16 | "\n", 17 | "### When should you use KNN Regressor?\n", 18 | "\n", 19 | "Use KNN Regressor when:\n", 20 | "\n", 21 | "1. **You want to predict a number, not a label**\n", 22 | "\n", 23 | " * Example: Predicting the **price of a house** 🏠 instead of saying “cheap/expensive.”\n", 24 | "\n", 25 | "2. **Nearby things tend to have similar values**\n", 26 | "\n", 27 | " * Example: Houses in the same neighborhood often cost about the same.\n", 28 | "\n", 29 | "3. **You don’t need super fast predictions on huge data**\n", 30 | "\n", 31 | " * Because KNN checks neighbors every time, it can get slow on very big datasets.\n", 32 | "\n", 33 | "4. **You want something simple and easy to understand**\n", 34 | "\n", 35 | " * KNN is beginner-friendly because it doesn’t require complex math or training.\n", 36 | "\n", 37 | "---\n", 38 | "\n", 39 | "### Example\n", 40 | "\n", 41 | "You want to predict the **price of a new house**.\n", 42 | "You already know:\n", 43 | "\n", 44 | "* House A: 2 bedrooms → \\$200k\n", 45 | "* House B: 3 bedrooms → \\$250k\n", 46 | "* House C: 4 bedrooms → \\$300k\n", 47 | "\n", 48 | "Now, a new house has 3 bedrooms.\n", 49 | "KNN Regressor looks at the “nearest” houses (say, B and C) and averages their prices → around **\\$275k**.\n", 50 | "\n", 51 | "---\n", 52 | "\n", 53 | "👉 In short:\n", 54 | "Use **KNN Classifier** when the answer is a **category**.\n", 55 | "Use **KNN Regressor** when the answer is a **number**." 56 | ] 57 | } 58 | ], 59 | "metadata": { 60 | "language_info": { 61 | "name": "python" 62 | } 63 | }, 64 | "nbformat": 4, 65 | "nbformat_minor": 5 66 | } 67 | -------------------------------------------------------------------------------- /8-Random Forest/Regression/when_to_use_RF-Regressor.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "f8328134", 6 | "metadata": {}, 7 | "source": [ 8 | "### 🌳 Random Forest Regressor\n", 9 | "\n", 10 | "It’s just like the **Random Forest Classifier**, but instead of predicting categories (like *Yes/No, Spam/Not Spam*), it predicts **continuous values** (like house price, temperature, or sales numbers).\n", 11 | "\n", 12 | "---\n", 13 | "\n", 14 | "### ✅ When to use Random Forest Regressor\n", 15 | "\n", 16 | "1. **You want to predict numbers (not categories).**\n", 17 | "\n", 18 | " * Example: predicting house prices, car mileage, or sales revenue.\n", 19 | "\n", 20 | "2. **Your dataset has many features.**\n", 21 | "\n", 22 | " * It can handle both numerical and categorical features well.\n", 23 | "\n", 24 | "3. **You want accuracy without lots of parameter tuning.**\n", 25 | "\n", 26 | " * It usually gives good results straight away.\n", 27 | "\n", 28 | "4. **You’re worried about overfitting.**\n", 29 | "\n", 30 | " * A single decision tree might memorize the data (too specific).\n", 31 | " * Random Forest averages many trees → predictions are more stable.\n", 32 | "\n", 33 | "5. **You want to know feature importance.**\n", 34 | "\n", 35 | " * It tells you which features (e.g., “number of rooms” or “location”) matter most for the prediction.\n", 36 | "\n", 37 | "---\n", 38 | "\n", 39 | "### 📘 Small Example\n", 40 | "\n", 41 | "Say you want to predict house prices.\n", 42 | "\n", 43 | "* You train 10 decision trees on different random samples.\n", 44 | "* For a new house:\n", 45 | "\n", 46 | " * Tree 1 predicts **\\$200k**\n", 47 | " * Tree 2 predicts **\\$210k**\n", 48 | " * Tree 3 predicts **\\$220k**\n", 49 | " * …\n", 50 | "* Final Random Forest prediction = **average of all trees** (e.g., \\$210k).\n", 51 | "\n", 52 | "---\n", 53 | "\n", 54 | "👉 In ML terms (without jargon):\n", 55 | "The **Random Forest Regressor** is great when you need a **stable, accurate prediction for numbers** and don’t want to spend too much time fine-tuning.\n" 56 | ] 57 | } 58 | ], 59 | "metadata": { 60 | "language_info": { 61 | "name": "python" 62 | } 63 | }, 64 | "nbformat": 4, 65 | "nbformat_minor": 5 66 | } 67 | -------------------------------------------------------------------------------- /8-Random Forest/1-Ensemble-Techniques/3-Boosting.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "17028110", 6 | "metadata": {}, 7 | "source": [ 8 | "### 🟨 Boosting \n", 9 | "\n", 10 | "**Idea:** Instead of training all models separately (like in Bagging), Boosting trains models **one after another**, where each new model tries to **fix the mistakes** made by the previous ones. \n", 11 | "\n", 12 | "---\n", 13 | "\n", 14 | "### 📘 Step-by-step with a small example \n", 15 | "\n", 16 | "1. **Dataset** \n", 17 | "Say we want to predict whether a student will pass (Yes/No) based on study hours and attendance. \n", 18 | "\n", 19 | "2. **First model (weak learner)** \n", 20 | "- Train a simple decision tree. \n", 21 | "- It correctly predicts many students but misclassifies some (e.g., says \"Fail\" when they should \"Pass\"). \n", 22 | "\n", 23 | "3. **Focus on mistakes** \n", 24 | "- The algorithm gives more weight to the students that were predicted incorrectly. \n", 25 | "- These “hard” cases become more important for the next model. \n", 26 | "\n", 27 | "4. **Second model** \n", 28 | "- Train another tree, but this time it focuses more on the students the first model got wrong. \n", 29 | "\n", 30 | "5. **Repeat** \n", 31 | "- Add more trees, each one improving where the last ones failed. \n", 32 | "\n", 33 | "6. **Final prediction** \n", 34 | "- Combine all trees’ outputs with weights (better trees get more say). \n", 35 | "- Result: a stronger model that captures patterns much better than a single tree. \n", 36 | "\n", 37 | "---\n", 38 | "\n", 39 | "### ✅ Small Example (classification) \n", 40 | "Predict if a student passes: \n", 41 | "\n", 42 | "- Tree 1: Predicts 8 correct, 2 wrong. \n", 43 | "- Tree 2: Focuses on those 2 mistakes → fixes 1, still misses 1. \n", 44 | "- Tree 3: Focuses on the last mistake → fixes it. \n", 45 | "**Final Boosting model = combines all trees → 10 correct.** \n", 46 | "\n", 47 | "---\n", 48 | "\n", 49 | "### 🔑 In ML terms \n", 50 | "- Boosting = **sequential training of models**, each correcting errors of the previous. \n", 51 | "- Final prediction = **weighted sum of all models**. \n", 52 | "- Benefit = reduces **bias** (turns weak learners into a strong learner). \n", 53 | "\n", 54 | "---\n", 55 | "\n", 56 | "👉 So: \n", 57 | "- **Bagging = parallel teamwork (reduce variance).** \n", 58 | "- **Boosting = sequential teamwork (reduce bias).** " 59 | ] 60 | } 61 | ], 62 | "metadata": { 63 | "language_info": { 64 | "name": "python" 65 | } 66 | }, 67 | "nbformat": 4, 68 | "nbformat_minor": 5 69 | } 70 | -------------------------------------------------------------------------------- /11-XgBoost/All-boostings.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "5cdaa013", 6 | "metadata": {}, 7 | "source": [ 8 | "# 1. **AdaBoost (Adaptive Boosting)**\n", 9 | "\n", 10 | "* **Idea:** Each new weak learner gives more attention (higher weights) to the instances that were misclassified by earlier learners.\n", 11 | "* **Mechanism:**\n", 12 | "\n", 13 | " * Start with equal weights for all samples.\n", 14 | " * After each tree, increase weights of misclassified points and decrease weights of correctly classified ones.\n", 15 | " * Final prediction = weighted vote of all learners.\n", 16 | "* **Strengths:** Simple, works well for clean data.\n", 17 | "* **Weaknesses:** Sensitive to noisy data & outliers, because weights on hard points can become very large.\n", 18 | "\n", 19 | "---\n", 20 | "\n", 21 | "# 2. **Gradient Boosting**\n", 22 | "\n", 23 | "* **Idea:** Instead of adjusting sample weights, it **fits new learners to the residual errors** (the difference between predictions and true values).\n", 24 | "* **Mechanism:**\n", 25 | "\n", 26 | " * Train the first tree.\n", 27 | " * Compute residuals (errors).\n", 28 | " * Train the next tree to predict the residuals.\n", 29 | " * Repeat and combine them.\n", 30 | "* **Strengths:** More flexible (uses gradient descent concept, not just weights).\n", 31 | "* **Weaknesses:** Slower than AdaBoost, prone to overfitting without tuning (requires learning rate, tree depth, etc.).\n", 32 | "\n", 33 | "---\n", 34 | "\n", 35 | "# 3. **XGBoost (Extreme Gradient Boosting)**\n", 36 | "\n", 37 | "* **Idea:** An optimized, regularized version of gradient boosting.\n", 38 | "* **Key Improvements:**\n", 39 | "\n", 40 | " * Uses **second-order derivatives** (like Newton’s method) for more precise gradient updates.\n", 41 | " * Adds **regularization (L1 & L2)** to reduce overfitting.\n", 42 | " * Optimized for speed with parallelization and distributed computing.\n", 43 | " * Automatically handles missing values.\n", 44 | "* **Strengths:** Fast, accurate, and widely used in ML competitions.\n", 45 | "* **Weaknesses:** More complex, can be memory-intensive for large datasets.\n", 46 | "\n", 47 | "---\n", 48 | "\n", 49 | "✅ **In short:**\n", 50 | "\n", 51 | "* **AdaBoost**: reweights misclassified points → focuses on hard examples.\n", 52 | "* **Gradient Boosting**: fits learners to residual errors → uses gradient descent.\n", 53 | "* **XGBoost**: enhanced gradient boosting → faster, regularized, more robust." 54 | ] 55 | } 56 | ], 57 | "metadata": { 58 | "language_info": { 59 | "name": "python" 60 | } 61 | }, 62 | "nbformat": 4, 63 | "nbformat_minor": 5 64 | } 65 | -------------------------------------------------------------------------------- /9-Adaboost/2-About-Ada-Boost-Classification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "e8bc0d39", 6 | "metadata": {}, 7 | "source": [ 8 | "# AdaBoost Classification with two Weak Learners" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "691d98e4", 14 | "metadata": { 15 | "vscode": { 16 | "languageId": "plaintext" 17 | } 18 | }, 19 | "source": [ 20 | "### 🎯 Example: Classifying Spam with 2 Weak Learners\n", 21 | "\n", 22 | "We want to decide if an email is **Spam (1)** or **Not Spam (0).**\n", 23 | "\n", 24 | "We have 4 emails:\n", 25 | "\n", 26 | "```\n", 27 | "Email True Label\n", 28 | "A 0\n", 29 | "B 0\n", 30 | "C 1\n", 31 | "D 1\n", 32 | "```\n", 33 | "\n", 34 | "---\n", 35 | "\n", 36 | "#### Step 1: Start with Equal Weights\n", 37 | "\n", 38 | "Each email is equally important.\n", 39 | "So weight = 1/4 for A, B, C, D.\n", 40 | "\n", 41 | "---\n", 42 | "\n", 43 | "#### Step 2: Train Weak Learner 1\n", 44 | "\n", 45 | "Suppose Weak Learner 1 predicts:\n", 46 | "\n", 47 | "```\n", 48 | "A: 0 (correct)\n", 49 | "B: 1 (wrong)\n", 50 | "C: 1 (correct)\n", 51 | "D: 0 (wrong)\n", 52 | "```\n", 53 | "\n", 54 | "It got B and D wrong.\n", 55 | "👉 AdaBoost increases the weights of **B and D** (so the next learner pays more attention to them).\n", 56 | "\n", 57 | "---\n", 58 | "\n", 59 | "#### Step 3: Train Weak Learner 2\n", 60 | "\n", 61 | "Now, because B and D are \"heavier,\" Weak Learner 2 focuses on them.\n", 62 | "\n", 63 | "Predictions of Weak Learner 2:\n", 64 | "\n", 65 | "```\n", 66 | "A: 0 (correct)\n", 67 | "B: 0 (correct this time ✅)\n", 68 | "C: 1 (correct)\n", 69 | "D: 1 (correct this time ✅)\n", 70 | "```\n", 71 | "\n", 72 | "This one does much better.\n", 73 | "\n", 74 | "---\n", 75 | "\n", 76 | "#### Step 4: Combine the Two Learners\n", 77 | "\n", 78 | "* Weak Learner 1 wasn’t great → gets **lower weight** in voting.\n", 79 | "* Weak Learner 2 was better → gets **higher weight** in voting.\n", 80 | "\n", 81 | "Final decision for each email is made by **weighted majority vote**.\n", 82 | "\n", 83 | "👉 Result: All 4 emails are classified correctly 🎉.\n", 84 | "\n", 85 | "---\n", 86 | "\n", 87 | "### 🔑 Key Takeaway\n", 88 | "\n", 89 | "* Learner 1 made mistakes.\n", 90 | "* AdaBoost adjusted importance (B and D got more weight).\n", 91 | "* Learner 2 fixed those mistakes.\n", 92 | "* Final combination = strong classifier, even though each learner was weak." 93 | ] 94 | } 95 | ], 96 | "metadata": { 97 | "kernelspec": { 98 | "display_name": "venv", 99 | "language": "python", 100 | "name": "python3" 101 | }, 102 | "language_info": { 103 | "name": "python", 104 | "version": "3.10.0" 105 | } 106 | }, 107 | "nbformat": 4, 108 | "nbformat_minor": 5 109 | } 110 | -------------------------------------------------------------------------------- /1-Complete Linear Regression/8-When_To_Use_Ridge_Lasso_ElasticNet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "2b809aab", 6 | "metadata": {}, 7 | "source": [ 8 | "### 🔹 Ridge Regression\n", 9 | "\n", 10 | "Use it when:\n", 11 | "\n", 12 | "* You have **many features**.\n", 13 | "* Some features are not very useful, but you don’t want to throw them away.\n", 14 | "* Ridge **shrinks** their effect (makes their weights smaller) but **keeps all features** in the model.\n", 15 | "\n", 16 | "👉 Think of it as: “I’ll use everything, but I’ll calm down the noisy ones.”\n", 17 | "\n", 18 | "---\n", 19 | "\n", 20 | "### 🔹 Lasso Regression\n", 21 | "\n", 22 | "Use it when:\n", 23 | "\n", 24 | "* You want the model to be **simpler** and only keep the most important features.\n", 25 | "* Lasso can actually make some feature weights **zero** → which means those features get removed.\n", 26 | "\n", 27 | "👉 Think of it as: “I’ll pick only the features that matter and ignore the rest.”\n", 28 | "\n", 29 | "---\n", 30 | "\n", 31 | "### 🔹 Elastic Net\n", 32 | "\n", 33 | "Use it when:\n", 34 | "\n", 35 | "* You want a **balance** between Ridge and Lasso.\n", 36 | "* Useful if you have **lots of features**, some strongly related to each other, and some useless.\n", 37 | "* Elastic Net both **shrinks** and **selects** features.\n", 38 | "\n", 39 | "👉 Think of it as: “I’ll calm down the noisy ones, but also drop the truly useless ones.”\n", 40 | "\n", 41 | "\n", 42 | "✅ **Quick everyday analogy:**\n", 43 | "\n", 44 | "* Ridge = keep all your clothes, but fold them neatly so the closet isn’t messy.\n", 45 | "* Lasso = throw away the clothes you never wear.\n", 46 | "* Elastic Net = do both: fold some, throw some." 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "id": "ed6c7a50", 52 | "metadata": { 53 | "vscode": { 54 | "languageId": "plaintext" 55 | } 56 | }, 57 | "source": [ 58 | "# RidgeCV \n", 59 | "## Ridge + CV → finds best shrink strength.\n", 60 | "\n", 61 | "# LassoCV \n", 62 | "## Lasso + CV → finds best shrink strength while possibly dropping useless features.\n", 63 | "\n", 64 | "# ElasticNetCV \n", 65 | "## ElasticNet + CV → finds best shrink + best mix of Ridge/Lasso." 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "id": "642198f3", 71 | "metadata": { 72 | "vscode": { 73 | "languageId": "plaintext" 74 | } 75 | }, 76 | "source": [ 77 | "# Example with 3 folds and 2 parameter values\n", 78 | "\n", 79 | "Candidate params = [0.1, 1.0]\n", 80 | "\n", 81 | "For alpha = 0.1\n", 82 | "\n", 83 | "Fold 1 → accuracy = 80%\n", 84 | "\n", 85 | "Fold 2 → accuracy = 82%\n", 86 | "\n", 87 | "Fold 3 → accuracy = 79%\n", 88 | "\n", 89 | "Average = 80.3%\n", 90 | "\n", 91 | "For alpha = 1.0\n", 92 | "\n", 93 | "Fold 1 → accuracy = 85%\n", 94 | "\n", 95 | "Fold 2 → accuracy = 83%\n", 96 | "\n", 97 | "Fold 3 → accuracy = 84%\n", 98 | "\n", 99 | "Average = 84%\n", 100 | "\n", 101 | "👉 Cross-validation will select alpha = 1.0 because it had the best average across folds." 102 | ] 103 | } 104 | ], 105 | "metadata": { 106 | "language_info": { 107 | "name": "python" 108 | } 109 | }, 110 | "nbformat": 4, 111 | "nbformat_minor": 5 112 | } 113 | -------------------------------------------------------------------------------- /1-Complete Linear Regression/9-Final_touch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "4bfce82b", 6 | "metadata": {}, 7 | "source": [ 8 | "# Chadvandra meekosame Edanta\n", 9 | "\n", 10 | "### **Regression Types**\n", 11 | "\n", 12 | "Regression is about **predicting a number** based on some input. Think of it like drawing a line (or curve) that best fits your data points.\n", 13 | "\n", 14 | "1. **Simple Regression**\n", 15 | "\n", 16 | " * Only **one input variable** predicts one output.\n", 17 | " * Example: Predict a student’s score based on **hours studied**.\n", 18 | " * Visual: A straight line through a scatter of points.\n", 19 | "\n", 20 | "2. **Multiple Regression**\n", 21 | "\n", 22 | " * Uses **more than one input variable** to predict the output.\n", 23 | " * Example: Predict score based on **hours studied** and **hours slept**.\n", 24 | " * Visual: A plane or hyperplane through data points in higher dimensions.\n", 25 | "\n", 26 | "3. **Polynomial Regression**\n", 27 | "\n", 28 | " * The relationship between input and output is **curvy**, not straight.\n", 29 | " * Example: Predict growth of a plant where growth speeds up, then slows down.\n", 30 | " * Visual: A curve that bends to fit the data instead of a straight line.\n", 31 | "\n", 32 | "---\n", 33 | "\n", 34 | "### **Underfitting vs Overfitting**\n", 35 | "\n", 36 | "1. **Underfitting**\n", 37 | "\n", 38 | " * Model is too **simple** to capture the trend.\n", 39 | " * Result: Bad predictions both on old and new data.\n", 40 | " * Example: Fitting a straight line to a very wiggly dataset.\n", 41 | "\n", 42 | "2. **Overfitting**\n", 43 | "\n", 44 | " * Model is too **complex** and memorizes the training data.\n", 45 | " * Result: Works perfectly on old data but poorly on new data.\n", 46 | " * Example: Drawing a crazy squiggly curve that passes through every training point.\n", 47 | "\n", 48 | "**Goal:** Find the **balance** – complex enough to learn patterns but simple enough to generalize.\n", 49 | "\n", 50 | "---\n", 51 | "\n", 52 | "### **Error Metrics**\n", 53 | "\n", 54 | "These tell us **how good our model is**:\n", 55 | "\n", 56 | "1. **MAE (Mean Absolute Error)**\n", 57 | "\n", 58 | " * Average of the **absolute differences** between predicted and actual values.\n", 59 | " * Easy to understand: “On average, our prediction is off by X units.”\n", 60 | "\n", 61 | "2. **MSE (Mean Squared Error)**\n", 62 | "\n", 63 | " * Average of **squared differences**.\n", 64 | " * Penalizes **big mistakes more** than small ones.\n", 65 | "\n", 66 | "3. **RMSE (Root Mean Squared Error)**\n", 67 | "\n", 68 | " * Square root of MSE.\n", 69 | " * Back to the **same units** as the original data, easy to interpret.\n", 70 | "\n", 71 | "---\n", 72 | "\n", 73 | "### **R² (R-squared) & Adjusted R²**\n", 74 | "\n", 75 | "1. **R²**\n", 76 | "\n", 77 | " * Shows **how much of the variation** in the data your model explains.\n", 78 | " * 1 = perfect fit, 0 = model explains nothing.\n", 79 | "\n", 80 | "2. **Adjusted R²**\n", 81 | "\n", 82 | " * Like R², but **penalizes unnecessary variables**.\n", 83 | " * Useful in multiple regression to check if extra inputs really help.\n", 84 | "\n", 85 | "---\n", 86 | "\n", 87 | "✅ **Summary in One Sentence:**\n", 88 | "\n", 89 | "* Simple/Multiple/Polynomial Regression: How we draw a line/curve through data.\n", 90 | "* Underfitting/Overfitting: Too simple vs too complex.\n", 91 | "* MAE/MSE/RMSE: How wrong our predictions are.\n", 92 | "* R²/Adjusted R²: How well our model explains the data." 93 | ] 94 | } 95 | ], 96 | "metadata": { 97 | "language_info": { 98 | "name": "python" 99 | } 100 | }, 101 | "nbformat": 4, 102 | "nbformat_minor": 5 103 | } 104 | -------------------------------------------------------------------------------- /1-Complete Linear Regression/4-When_To_Use_Polynomial_Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "91efeee9", 6 | "metadata": {}, 7 | "source": [ 8 | "# You use **polynomial regression** when:\n", 9 | "\n", 10 | "1. **The relationship between inputs and output isn’t a straight line.**\n", 11 | "\n", 12 | " * Example: As you study more hours, your exam score first goes up, but after a point, too much study makes you tired and your score goes down. That’s a curve, not a line.\n", 13 | "\n", 14 | "2. **The data looks like a curve when you plot it.**\n", 15 | "\n", 16 | " * Example shapes: U-shaped, upside-down U, or a wave-like curve.\n", 17 | "\n", 18 | "3. **A straight-line model (simple or multiple linear regression) isn’t good enough.**\n", 19 | "\n", 20 | " * Linear regression would miss the bends, while polynomial regression can “bend the line” to follow the curve.\n", 21 | "\n", 22 | "\n", 23 | "✅ **Good examples where it makes sense**:\n", 24 | "\n", 25 | "* Predicting plant growth over time (grows fast, slows down, then stops).\n", 26 | "* Predicting car speed vs. fuel efficiency (too slow → waste fuel, too fast → waste fuel, sweet spot in the middle).\n", 27 | "* Predicting learning rate vs. performance (improves, peaks, then declines).\n", 28 | "\n", 29 | "❌ **Not good to use** if:\n", 30 | "\n", 31 | "* The curve is too messy (you might need other models).\n", 32 | "* You make the polynomial degree too high → the curve wiggles too much and overfits the data." 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "aea191db", 38 | "metadata": { 39 | "vscode": { 40 | "languageId": "plaintext" 41 | } 42 | }, 43 | "source": [ 44 | "# Think about it\n", 45 | "\n", 46 | "### **1. One Independent Feature ($x$)**\n", 47 | "\n", 48 | "**Degree 1 (Linear Regression):**\n", 49 | "\n", 50 | "$$\n", 51 | "y = \\beta_0 + \\beta_1 x\n", 52 | "$$\n", 53 | "\n", 54 | "**Degree 2 (Quadratic / Polynomial Regression):**\n", 55 | "\n", 56 | "$$\n", 57 | "y = \\beta_0 + \\beta_1 x + \\beta_2 x^2\n", 58 | "$$\n", 59 | "\n", 60 | "* $x$ = independent variable\n", 61 | "* $y$ = dependent variable\n", 62 | "* $\\beta_0, \\beta_1, \\beta_2$ = coefficients\n", 63 | "\n", 64 | "---\n", 65 | "\n", 66 | "### **2. Two Independent Features ($x_1, x_2$)**\n", 67 | "\n", 68 | "**Degree 1 (Linear Regression):**\n", 69 | "\n", 70 | "$$\n", 71 | "y = \\beta_0 + \\beta_1 x_1 + \\beta_2 x_2\n", 72 | "$$\n", 73 | "\n", 74 | "**Degree 2 (Polynomial Regression):**\n", 75 | "\n", 76 | "$$\n", 77 | "y = \\beta_0 + \\beta_1 x_1 + \\beta_2 x_2 + \\beta_3 x_1^2 + \\beta_4 x_2^2 + \\beta_5 x_1 x_2\n", 78 | "$$\n", 79 | "\n", 80 | "* Includes **squared terms** for each feature ($x_1^2, x_2^2$)\n", 81 | "* Includes **interaction term** ($x_1 x_2$)\n", 82 | "\n", 83 | "---\n", 84 | "\n", 85 | "✅ **Summary Table:**\n", 86 | "\n", 87 | "| Features | Degree | Equation |\n", 88 | "| -------- | ------ | ------------------------------------------------------------------------------------------- |\n", 89 | "| 1 | 1 | $y = \\beta_0 + \\beta_1 x$ |\n", 90 | "| 1 | 2 | $y = \\beta_0 + \\beta_1 x + \\beta_2 x^2$ |\n", 91 | "| 2 | 1 | $y = \\beta_0 + \\beta_1 x_1 + \\beta_2 x_2$ |\n", 92 | "| 2 | 2 | $y = \\beta_0 + \\beta_1 x_1 + \\beta_2 x_2 + \\beta_3 x_1^2 + \\beta_4 x_2^2 + \\beta_5 x_1 x_2$ |" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "id": "9241cdcb", 98 | "metadata": { 99 | "vscode": { 100 | "languageId": "plaintext" 101 | } 102 | }, 103 | "source": [ 104 | "# ✅ Intuition:\n", 105 | "\n", 106 | "Low degree → simple → underfits if data is curved.\n", 107 | "\n", 108 | "Higher degree → more curvy → fits better → risk of overfitting.\n", 109 | "\n", 110 | "The curve adapts to the shape of your data as you increase the polynomial degree." 111 | ] 112 | } 113 | ], 114 | "metadata": { 115 | "language_info": { 116 | "name": "python" 117 | } 118 | }, 119 | "nbformat": 4, 120 | "nbformat_minor": 5 121 | } 122 | -------------------------------------------------------------------------------- /9-Adaboost/final_touch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "491985e9", 6 | "metadata": {}, 7 | "source": [ 8 | "## 🌟 What AdaBoost Does\n", 9 | "\n", 10 | "AdaBoost is like a coach who trains a team of weak players (simple models) and makes them work together so they become strong.\n", 11 | "\n", 12 | "---\n", 13 | "\n", 14 | "## ⚙️ How It Works (Step by Step)\n", 15 | "\n", 16 | "1. **Start Equal**\n", 17 | "\n", 18 | " * Every training example (data point) is treated as equally important at the beginning.\n", 19 | "\n", 20 | "2. **Train a Weak Learner**\n", 21 | "\n", 22 | " * A very simple model is trained (like a tiny decision tree).\n", 23 | " * It makes predictions.\n", 24 | "\n", 25 | "3. **Check Mistakes**\n", 26 | "\n", 27 | " * See which data points it got wrong.\n", 28 | "\n", 29 | "4. **Increase Focus on Mistakes**\n", 30 | "\n", 31 | " * The wrongly predicted points get **more importance**.\n", 32 | " * Next learner will try harder on those tricky points.\n", 33 | "\n", 34 | "5. **Repeat**\n", 35 | "\n", 36 | " * Train another simple model, again focusing on the errors.\n", 37 | " * Do this many times.\n", 38 | "\n", 39 | "6. **Combine All Models**\n", 40 | "\n", 41 | " * At the end, all weak models are combined.\n", 42 | " * **For classification:** final result is based on a **weighted vote**.\n", 43 | " * **For regression:** final result is a **weighted average**.\n", 44 | "\n", 45 | "---\n", 46 | "\n", 47 | "## 🎯 Key Idea\n", 48 | "\n", 49 | "* Alone, each weak learner is poor.\n", 50 | "* But together, when each one fixes the previous mistakes, they become powerful.\n", 51 | "* That’s why it’s called **“Boosting”** → boosting weak models into a strong one." 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "id": "ee440a86", 57 | "metadata": { 58 | "vscode": { 59 | "languageId": "plaintext" 60 | } 61 | }, 62 | "source": [ 63 | "# Key terminologies" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "id": "faf56899", 69 | "metadata": { 70 | "vscode": { 71 | "languageId": "plaintext" 72 | } 73 | }, 74 | "source": [ 75 | "## 🔑 Important Terms in AdaBoost\n", 76 | "\n", 77 | "### 1. **Weak Learner**\n", 78 | "\n", 79 | "* A very simple model (like a decision stump).\n", 80 | "* On its own, it’s not very accurate.\n", 81 | "* But many weak learners together make a strong model.\n", 82 | "\n", 83 | "---\n", 84 | "\n", 85 | "### 2. **Stump**\n", 86 | "\n", 87 | "* The simplest weak learner: a decision tree with just **one split, one rule**.\n", 88 | "* Example: “Is age > 30?”\n", 89 | "\n", 90 | "---\n", 91 | "\n", 92 | "### 3. **Weights (for data points)**\n", 93 | "\n", 94 | "* Importance values given to each training example.\n", 95 | "* At the start: all points are equal.\n", 96 | "* After each round: wrong points get **higher weight**, so the next learner pays more attention to them.\n", 97 | "\n", 98 | "---\n", 99 | "\n", 100 | "### 4. **Error Rate**\n", 101 | "\n", 102 | "* How many mistakes a weak learner makes (considering the weights).\n", 103 | "* If it’s good (low error), it gets more say in the final decision.\n", 104 | "\n", 105 | "---\n", 106 | "\n", 107 | "### 5. **Alpha (α) = Learner’s Weight**\n", 108 | "\n", 109 | "* A number that shows **how important a weak learner is** in the final model.\n", 110 | "* If the learner is accurate → α is high.\n", 111 | "* If the learner is weak → α is small.\n", 112 | "\n", 113 | "---\n", 114 | "\n", 115 | "### 6. **Boosting**\n", 116 | "\n", 117 | "* The process of **combining many weak learners** into one strong model.\n", 118 | "* Each new learner “boosts” the performance by fixing mistakes.\n", 119 | "\n", 120 | "---\n", 121 | "\n", 122 | "### 7. **Final Model**\n", 123 | "\n", 124 | "* For **classification**: all learners vote, but stronger ones have bigger votes.\n", 125 | "* For **regression**: all learners’ predictions are averaged, but stronger ones count more.\n", 126 | "\n", 127 | "---\n", 128 | "\n", 129 | "✅ That’s really all you need:\n", 130 | "\n", 131 | "* **Weak learner (stump)**\n", 132 | "* **Weights (on data points)**\n", 133 | "* **Error rate**\n", 134 | "* **Alpha (learner importance)**\n", 135 | "* **Boosting (the combining process)**" 136 | ] 137 | } 138 | ], 139 | "metadata": { 140 | "language_info": { 141 | "name": "python" 142 | } 143 | }, 144 | "nbformat": 4, 145 | "nbformat_minor": 5 146 | } 147 | -------------------------------------------------------------------------------- /8-Random Forest/1-Ensemble-Techniques/1-ensemble.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "3e30075b", 6 | "metadata": {}, 7 | "source": [ 8 | "# 🌟 Ensembling in ML\n", 9 | "\n", 10 | "Ensembling means: instead of trusting **one model**, we train **many models** and then combine them to make a stronger final prediction.\n", 11 | "\n", 12 | "---\n", 13 | "\n", 14 | "### 🟦 Bagging (Bootstrap Aggregating)\n", 15 | "\n", 16 | "* **How it works:**\n", 17 | "\n", 18 | " 1. Take your dataset.\n", 19 | " 2. Randomly create different subsets (by sampling with replacement).\n", 20 | " 3. Train a separate model (often the same type, like decision trees) on each subset.\n", 21 | " 4. Combine their predictions:\n", 22 | "\n", 23 | " * For classification → majority vote.\n", 24 | " * For regression → average.\n", 25 | "\n", 26 | "* **ML effect:**\n", 27 | "\n", 28 | " * Reduces **variance** (models won’t overfit as much).\n", 29 | " * Works well with **unstable models** like decision trees.\n", 30 | "\n", 31 | "* **Example:** Random Forest = many decision trees trained with bagging.\n", 32 | "\n", 33 | "---\n", 34 | "\n", 35 | "### 🟨 Boosting\n", 36 | "\n", 37 | "* **How it works:**\n", 38 | "\n", 39 | " 1. Train the first model on the dataset.\n", 40 | " 2. Look at where it makes mistakes (misclassified points or large errors).\n", 41 | " 3. Train the next model, giving **more weight** to those mistakes.\n", 42 | " 4. Repeat this process so each new model focuses on fixing previous errors.\n", 43 | " 5. Combine all models’ predictions (weighted sum).\n", 44 | "\n", 45 | "* **ML effect:**\n", 46 | "\n", 47 | " * Reduces **bias** (turns weak learners into a strong learner).\n", 48 | " * Can capture complex patterns, but might risk overfitting if not controlled.\n", 49 | "\n", 50 | "* **Examples:** AdaBoost, Gradient Boosting, XGBoost, LightGBM.\n", 51 | "\n", 52 | "---\n", 53 | "\n", 54 | "### 🧠 Intuition\n", 55 | "\n", 56 | "* **Bagging** = “Train models **in parallel** on random subsets and combine them.”\n", 57 | "* **Boosting** = “Train models **in sequence**, each fixing the mistakes of the previous one.”\n", 58 | "\n", 59 | "---\n", 60 | "\n", 61 | "👉 In ML:\n", 62 | "\n", 63 | "* Bagging = lowers **variance**.\n", 64 | "* Boosting = lowers **bias**." 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "id": "030e5631", 70 | "metadata": { 71 | "vscode": { 72 | "languageId": "plaintext" 73 | } 74 | }, 75 | "source": [ 76 | "### 🔹 What is **Bias** in ML?\n", 77 | "\n", 78 | "* Bias = **error from wrong assumptions**.\n", 79 | "* If a model is too simple, it might **miss important patterns**.\n", 80 | "* Example: Using a straight line to fit data that actually curves.\n", 81 | "* This is called **underfitting**.\n", 82 | "\n", 83 | "---\n", 84 | "\n", 85 | "### 🔹 What is **Variance** in ML?\n", 86 | "\n", 87 | "* Variance = **error from being too sensitive to the training data**.\n", 88 | "* If a model is too complex, it might **memorize noise** in the data.\n", 89 | "* Example: A decision tree that splits too much and fits perfectly to training data but fails on new data.\n", 90 | "* This is called **overfitting**.\n", 91 | "\n", 92 | "---\n", 93 | "\n", 94 | "### 🟦 Bagging → reduces Variance\n", 95 | "\n", 96 | "* Bagging trains models on different **random subsets** and then averages their predictions.\n", 97 | "* Averaging cancels out the “noise” that an overfitted model might pick up.\n", 98 | "* ✅ So bagging makes unstable models (like deep decision trees) **more stable and less overfitted**.\n", 99 | "\n", 100 | "---\n", 101 | "\n", 102 | "### 🟨 Boosting → reduces Bias\n", 103 | "\n", 104 | "* Boosting builds models **sequentially**, each focusing on mistakes of the previous.\n", 105 | "* This forces the final model to learn patterns it missed earlier.\n", 106 | "* ✅ So boosting makes weak/simple models (like shallow decision trees) **more accurate and less underfitted**.\n", 107 | "\n", 108 | "---\n", 109 | "\n", 110 | "### 🧠 Easy analogy (with ML twist)\n", 111 | "\n", 112 | "* **Bagging** = You ask many decision trees separately, then take their average. Any one tree may overfit, but averaging smooths out the noise → less variance.\n", 113 | "* **Boosting** = You train one weak tree at a time, each trying to fix the errors of the last one. The team of weak learners becomes strong → less bias.\n", 114 | "\n", 115 | "---\n", 116 | "\n", 117 | "👉 In short:\n", 118 | "\n", 119 | "* Bagging fixes the problem of **models being too wiggly (high variance)**.\n", 120 | "* Boosting fixes the problem of **models being too simple (high bias)**." 121 | ] 122 | } 123 | ], 124 | "metadata": { 125 | "language_info": { 126 | "name": "python" 127 | } 128 | }, 129 | "nbformat": 4, 130 | "nbformat_minor": 5 131 | } 132 | -------------------------------------------------------------------------------- /10-Gradient Boosting/Regression/2-About-gradient-boosting-regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7591e88a", 6 | "metadata": {}, 7 | "source": [ 8 | "# Gradient Boosting regressor with Three Weak learners" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "ed7d76d0", 14 | "metadata": {}, 15 | "source": [ 16 | "### Tiny Example (3 weak learners)\n", 17 | "\n", 18 | "**Data:**\n", 19 | "\n", 20 | "* (Size=1 → Price=100)\n", 21 | "* (Size=2 → Price=200)\n", 22 | "* (Size=3 → Price=300)\n", 23 | "\n", 24 | "---\n", 25 | "\n", 26 | "#### Step 1: First weak learner (Tree #1)\n", 27 | "\n", 28 | "Predicts the **average = 200** for everything.\n", 29 | "\n", 30 | "* Size=1 → Predict 200 (error = -100)\n", 31 | "* Size=2 → Predict 200 (error = 0)\n", 32 | "* Size=3 → Predict 200 (error = +100)\n", 33 | "\n", 34 | "---\n", 35 | "\n", 36 | "#### Step 2: Second weak learner (Tree #2)\n", 37 | "\n", 38 | "Learns to **predict the errors** from step 1:\n", 39 | "\n", 40 | "* If Size=1 → predict -100\n", 41 | "* If Size=2 → predict 0\n", 42 | "* If Size=3 → predict +100\n", 43 | "\n", 44 | "Add these corrections to Tree #1:\n", 45 | "\n", 46 | "* Size=1 → 200 + (-100) = 100 ✅\n", 47 | "* Size=2 → 200 + (0) = 200 ✅\n", 48 | "* Size=3 → 200 + (+100) = 300 ✅\n", 49 | "\n", 50 | "Now predictions are perfect.\n", 51 | "\n", 52 | "---\n", 53 | "\n", 54 | "#### Step 3: Third weak learner (Tree #3)\n", 55 | "\n", 56 | "No errors left (all 0), so Tree #3 does nothing.\n", 57 | "\n", 58 | "---\n", 59 | "\n", 60 | "✅ **Final Model = Tree #1 + Tree #2**\n", 61 | "Predictions exactly match the true prices." 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "id": "4bb66ed6", 67 | "metadata": { 68 | "vscode": { 69 | "languageId": "plaintext" 70 | } 71 | }, 72 | "source": [ 73 | "# 🌱 Gradient Boosting Regressor – Key Terms\n", 74 | "\n", 75 | "### 1. **Weak Learner**\n", 76 | "\n", 77 | "* A small, simple model (usually a shallow decision tree).\n", 78 | "* On its own, it’s not very accurate.\n", 79 | "\n", 80 | "---\n", 81 | "\n", 82 | "### 2. **Ensemble**\n", 83 | "\n", 84 | "* The final model is a **collection of many weak trees**.\n", 85 | "* Each new tree improves on the mistakes of the ones before it.\n", 86 | "\n", 87 | "---\n", 88 | "\n", 89 | "### 3. **Loss Function**\n", 90 | "\n", 91 | "* A way to measure prediction error.\n", 92 | "* Common ones for regression:\n", 93 | "\n", 94 | " * **Mean Squared Error (MSE):** penalizes big errors more.\n", 95 | " * **Mean Absolute Error (MAE):** looks at average distance between prediction and true value.\n", 96 | "\n", 97 | "---\n", 98 | "\n", 99 | "### 4. **Residuals (Errors)**\n", 100 | "\n", 101 | "* The difference between the true value and the model’s prediction:\n", 102 | "\n", 103 | " $$\n", 104 | " \\text{Residual} = y - \\hat{y}\n", 105 | " $$\n", 106 | "* Each new tree is trained to predict these residuals (the mistakes).\n", 107 | "\n", 108 | "---\n", 109 | "\n", 110 | "### 5. **Gradient**\n", 111 | "\n", 112 | "* A more general way of saying “direction of error” (especially when using other loss functions).\n", 113 | "* The new tree learns to follow this direction to reduce mistakes.\n", 114 | "\n", 115 | "---\n", 116 | "\n", 117 | "### 6. **Learning Rate**\n", 118 | "\n", 119 | "* A small multiplier that controls how much each new tree affects the model.\n", 120 | "* Small learning rate = slower progress, but usually more accurate in the long run.\n", 121 | "\n", 122 | "---\n", 123 | "\n", 124 | "### 7. **Number of Estimators**\n", 125 | "\n", 126 | "* The number of trees added.\n", 127 | "* More trees = better fit, but too many = risk of overfitting.\n", 128 | "\n", 129 | "---\n", 130 | "\n", 131 | "### 8. **Tree Depth**\n", 132 | "\n", 133 | "* Controls how complex each weak tree is.\n", 134 | "* Shallow trees (depth=1–3) are common, because they focus on small corrections.\n", 135 | "\n", 136 | "---\n", 137 | "\n", 138 | "### 9. **Subsampling**\n", 139 | "\n", 140 | "* Instead of using all data for every tree, we use only part of it.\n", 141 | "* This makes the model more robust and less likely to overfit.\n", 142 | "\n", 143 | "---\n", 144 | "\n", 145 | "### 10. **Regularization**\n", 146 | "\n", 147 | "* Ways to keep the model simpler and prevent overfitting:\n", 148 | "\n", 149 | " * Limit tree depth.\n", 150 | " * Use fewer features per tree.\n", 151 | " * Use a small learning rate.\n", 152 | "\n", 153 | "---\n", 154 | "\n", 155 | "### 11. **Additive Model**\n", 156 | "\n", 157 | "* Gradient boosting builds the model step by step:\n", 158 | "\n", 159 | " $$\n", 160 | " \\text{New prediction} = \\text{Old prediction} + \\text{Small correction}\n", 161 | " $$\n", 162 | "* Over many steps, predictions get closer to the true values." 163 | ] 164 | } 165 | ], 166 | "metadata": { 167 | "language_info": { 168 | "name": "python" 169 | } 170 | }, 171 | "nbformat": 4, 172 | "nbformat_minor": 5 173 | } 174 | -------------------------------------------------------------------------------- /10-Gradient Boosting/Classification/2-About-Gradient-Boosting-classifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "1d59ea08", 6 | "metadata": {}, 7 | "source": [ 8 | "# **Gradient Boosting classifier using 3 weak learners**\n", 9 | "\n", 10 | "We want to classify fruits as **Apple (0)** or **Orange (1)** based on **size**.\n", 11 | "\n", 12 | "**Training data:**\n", 13 | "\n", 14 | "| Size | Label |\n", 15 | "| ---- | ---------- |\n", 16 | "| 1 | Apple (0) |\n", 17 | "| 2 | Apple (0) |\n", 18 | "| 3 | Orange (1) |\n", 19 | "\n", 20 | "---\n", 21 | "\n", 22 | "### Step 1: First weak learner (Tree #1)\n", 23 | "\n", 24 | "* Predicts the **majority class** → Apple (0) for everything.\n", 25 | "\n", 26 | "| Size | True Label | Pred (Tree #1) | Error |\n", 27 | "| ---- | ---------- | -------------- | --------- |\n", 28 | "| 1 | 0 | 0 | ✅ correct |\n", 29 | "| 2 | 0 | 0 | ✅ correct |\n", 30 | "| 3 | 1 | 0 | ❌ mistake |\n", 31 | "\n", 32 | "So, only Size=3 is misclassified.\n", 33 | "\n", 34 | "---\n", 35 | "\n", 36 | "### Step 2: Second weak learner (Tree #2)\n", 37 | "\n", 38 | "This tree focuses on the **residual errors** (the misclassified points).\n", 39 | "\n", 40 | "* Learns: “If Size=3 → predict Orange (1).”\n", 41 | "\n", 42 | "Now combine Tree #1 and Tree #2 (weighted sum of their votes, then apply sigmoid/softmax):\n", 43 | "\n", 44 | "* Size=1 → Still Apple ✅\n", 45 | "* Size=2 → Still Apple ✅\n", 46 | "* Size=3 → Corrected to Orange ✅\n", 47 | "\n", 48 | "---\n", 49 | "\n", 50 | "### Step 3: Third weak learner (Tree #3)\n", 51 | "\n", 52 | "Since predictions are already perfect, this tree doesn’t change anything (errors are all zero).\n", 53 | "\n", 54 | "---\n", 55 | "\n", 56 | "✅ **Final Model = Tree #1 + Tree #2**\n", 57 | "\n", 58 | "* Size=1 → Apple\n", 59 | "* Size=2 → Apple\n", 60 | "* Size=3 → Orange\n", 61 | "\n", 62 | "---\n", 63 | "\n", 64 | "🔑 **Difference vs Regression case:**\n", 65 | "\n", 66 | "* In regression, weak learners predict **numerical residuals**.\n", 67 | "* In classification, weak learners predict **which class was misclassified**, and updates are done in terms of probabilities (log-odds)." 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "id": "b9f80b9b", 73 | "metadata": { 74 | "vscode": { 75 | "languageId": "plaintext" 76 | } 77 | }, 78 | "source": [ 79 | "# 🌱 Gradient Boosting Classifier – Key Terms\n", 80 | "\n", 81 | "### 1. **Weak Learner**\n", 82 | "\n", 83 | "* A very simple model, usually a small decision tree (called a *stump*).\n", 84 | "* On its own, it’s “weak” (not very accurate).\n", 85 | "\n", 86 | "---\n", 87 | "\n", 88 | "### 2. **Ensemble**\n", 89 | "\n", 90 | "* The final Gradient Boosting model is not just one tree.\n", 91 | "* It’s a **team of many weak trees**, each one fixing mistakes of the previous.\n", 92 | "\n", 93 | "---\n", 94 | "\n", 95 | "### 3. **Loss Function**\n", 96 | "\n", 97 | "* A way to measure how wrong the model is.\n", 98 | "* For classification, usually **log-loss** (penalizes wrong class predictions).\n", 99 | "\n", 100 | "---\n", 101 | "\n", 102 | "### 4. **Residuals (Errors)**\n", 103 | "\n", 104 | "* After making predictions, we check where the model went wrong.\n", 105 | "* These mistakes are turned into numbers (gradients) that the next tree tries to fix.\n", 106 | "\n", 107 | "---\n", 108 | "\n", 109 | "### 5. **Gradient**\n", 110 | "\n", 111 | "* Tells us the **direction to move** to reduce errors.\n", 112 | "* Each new tree learns from this direction.\n", 113 | "\n", 114 | "---\n", 115 | "\n", 116 | "### 6. **Learning Rate**\n", 117 | "\n", 118 | "* A small step size that controls how much each new tree contributes.\n", 119 | "* Small learning rate = slower learning, but usually better accuracy.\n", 120 | "\n", 121 | "---\n", 122 | "\n", 123 | "### 7. **Number of Estimators**\n", 124 | "\n", 125 | "* The number of trees we add to the model.\n", 126 | "* More trees = better fit, but too many = risk of overfitting.\n", 127 | "\n", 128 | "---\n", 129 | "\n", 130 | "### 8. **Tree Depth**\n", 131 | "\n", 132 | "* How deep each tree is (how many splits it can make).\n", 133 | "* Shallow trees (like depth=1 or 2) are common, because boosting works best with simple learners.\n", 134 | "\n", 135 | "---\n", 136 | "\n", 137 | "### 9. **Subsampling**\n", 138 | "\n", 139 | "* Instead of using all the data for each tree, we randomly take part of it.\n", 140 | "* Helps prevent overfitting and adds variety.\n", 141 | "\n", 142 | "---\n", 143 | "\n", 144 | "### 10. **Regularization**\n", 145 | "\n", 146 | "* Tricks to stop the model from memorizing the data (overfitting).\n", 147 | "* Examples:\n", 148 | "\n", 149 | " * Keep trees small.\n", 150 | " * Use fewer features per tree.\n", 151 | " * Use a smaller learning rate.\n", 152 | "\n", 153 | "---\n", 154 | "\n", 155 | "### 11. **Additive Model**\n", 156 | "\n", 157 | "* Gradient boosting adds trees **one by one**, each improving the previous result:\n", 158 | "\n", 159 | " $$\n", 160 | " \\text{New model} = \\text{Old model} + \\text{Small correction}\n", 161 | " $$" 162 | ] 163 | } 164 | ], 165 | "metadata": { 166 | "language_info": { 167 | "name": "python" 168 | } 169 | }, 170 | "nbformat": 4, 171 | "nbformat_minor": 5 172 | } 173 | -------------------------------------------------------------------------------- /12-Unsupervised Machine Learning/Intro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "1f7ef3db", 6 | "metadata": {}, 7 | "source": [ 8 | "# Introduction to Unsupervised Machine Learning\n", 9 | "\n", 10 | "#### Supervised Machine Learning\n", 11 | "\n", 12 | "In **supervised learning**, we have a dataset with:\n", 13 | "\n", 14 | "* **Input features:** things we know, like age, experience, or salary\n", 15 | "* **Output feature:** the thing we want to predict\n", 16 | "\n", 17 | "The goal is to **predict the output** using the inputs.\n", 18 | "There are two main types of supervised learning:\n", 19 | "\n", 20 | "1. **Regression:** Predict numbers (e.g., salary)\n", 21 | "2. **Classification:** Predict categories (e.g., yes/no, red/blue)\n", 22 | "\n", 23 | "Examples of supervised learning algorithms are **Linear Regression, Logistic Regression, Decision Trees, Random Forest, and XGBoost**.\n", 24 | "\n", 25 | "---\n", 26 | "\n", 27 | "# Unsupervised Machine Learning\n", 28 | "\n", 29 | "In **unsupervised learning**, there is **no output to predict**. Instead, we try to **find patterns or group similar data together**.\n", 30 | "\n", 31 | "This is called **clustering**.\n", 32 | "\n", 33 | "**Example:**\n", 34 | "Imagine a dataset with **age, years of experience, and salary**.\n", 35 | "\n", 36 | "* We are **not trying to predict anything**.\n", 37 | "* Instead, we can **group people with similar age, experience, and salary**.\n", 38 | "* These groups are called **clusters**.\n", 39 | "\n", 40 | "---\n", 41 | "\n", 42 | "#### Real-World Example: Customer Segmentation\n", 43 | "\n", 44 | "Clustering is useful for **grouping customers**.\n", 45 | "\n", 46 | "* Suppose you know customers’ **salary** and **spending score**.\n", 47 | "* You can make clusters like:\n", 48 | "\n", 49 | " * Customers who buy regularly → offer a **15% discount**\n", 50 | " * Customers who buy occasionally → offer a **20% discount**\n", 51 | "\n", 52 | "This helps businesses **target the right customers** without any pre-labeled data.\n", 53 | "\n", 54 | "---\n", 55 | "\n", 56 | "#### Common Unsupervised Learning Algorithms\n", 57 | "\n", 58 | "Some popular clustering algorithms are:\n", 59 | "\n", 60 | "* **K-Means Clustering**\n", 61 | "* **Hierarchical Clustering**\n", 62 | "* **DBSCAN Clustering**\n", 63 | "\n", 64 | "We can also use **silhouette scoring** to check how good the clusters are.\n", 65 | "\n", 66 | "---\n", 67 | "\n", 68 | "### Key Points\n", 69 | "\n", 70 | "* **Supervised learning:** Predicts a known output.\n", 71 | "* **Unsupervised learning:** Finds patterns without an output.\n", 72 | "* **Clustering:** Groups similar data points.\n", 73 | "* **Algorithms to learn:** K-Means, Hierarchical, DBSCAN, and Silhouette scoring." 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "id": "6824bbcc", 79 | "metadata": { 80 | "vscode": { 81 | "languageId": "plaintext" 82 | } 83 | }, 84 | "source": [ 85 | "## **Unsupervised Learning**\n", 86 | "\n", 87 | "* **What it is:** You have data, but **no answers or labels**. The goal is to **find patterns, groups, or important features** in the data.\n", 88 | "* **Example:** You own a shop. You don’t tell the computer which customers are “VIP” or “regular,” but it can **group customers with similar buying habits**.\n", 89 | "\n", 90 | "---\n", 91 | "\n", 92 | "### **1. Clustering**\n", 93 | "\n", 94 | "**Goal:** Put similar items together. Think of it as **grouping friends who like the same things**.\n", 95 | "\n", 96 | "* **K-Means:**\n", 97 | "\n", 98 | " * Pick a number of groups (K).\n", 99 | " * Computer assigns each item to the nearest group.\n", 100 | " * Groups adjust until things are stable.\n", 101 | " * **Example:** 3 groups of fruits: sweet, sour, medium.\n", 102 | "\n", 103 | "* **Hierarchical Clustering:**\n", 104 | "\n", 105 | " * Start with each item alone.\n", 106 | " * Merge the closest ones step by step.\n", 107 | " * Makes a **tree (dendrogram)** showing relationships.\n", 108 | " * **Example:** Apple and Orange merge first, then Lemon joins later.\n", 109 | "\n", 110 | "* **DBSCAN:**\n", 111 | "\n", 112 | " * Looks for areas where items are **dense** (many close together).\n", 113 | " * Items far from any cluster are **outliers**.\n", 114 | " * **Example:** Most people live in 2 cities → 2 clusters. Someone alone in a village → outlier.\n", 115 | "\n", 116 | "---\n", 117 | "\n", 118 | "### **2. Dimensionality Reduction**\n", 119 | "\n", 120 | "**Goal:** Simplify data while keeping the important parts. Think of it as **summarizing a big book into a short story**.\n", 121 | "\n", 122 | "* **PCA (Principal Component Analysis):**\n", 123 | "\n", 124 | " * Combines features into a smaller number of “super-features.”\n", 125 | " * Keeps most of the variation in data.\n", 126 | " * **Example:** You have weight, height, age of people → PCA might combine into “body size factor.”\n", 127 | "\n", 128 | "* **Kernel PCA:**\n", 129 | "\n", 130 | " * Like PCA but can handle **curved/complex relationships**.\n", 131 | "\n", 132 | "* **t-SNE:**\n", 133 | "\n", 134 | " * Great for **visualizing** high-dimensional data in 2D or 3D.\n", 135 | " * Keeps similar items close together visually.\n", 136 | " * **Example:** Plot thousands of handwritten digits in 2D so clusters of 0s, 1s, 2s appear clearly.\n", 137 | "\n", 138 | "---\n", 139 | "\n", 140 | "✅ **Summary in one line:**\n", 141 | "\n", 142 | "* **Clustering →** group similar things\n", 143 | "* **Dimensionality reduction →** simplify data while keeping patterns\n", 144 | "* **DBSCAN →** find dense groups and outliers\n", 145 | "* **PCA/t-SNE →** make data smaller or visualizable" 146 | ] 147 | } 148 | ], 149 | "metadata": { 150 | "language_info": { 151 | "name": "python" 152 | } 153 | }, 154 | "nbformat": 4, 155 | "nbformat_minor": 5 156 | } 157 | -------------------------------------------------------------------------------- /6-K Nearest Neighbor/final_touch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "4e9a5378", 6 | "metadata": {}, 7 | "source": [ 8 | "# 🌟 KNN Algorithm – Simple Steps (with ML wording)\n", 9 | "\n", 10 | "### Step 1: **Pick K**\n", 11 | "\n", 12 | "* K = number of neighbors you will look at.\n", 13 | "* Example: K = 3 → we’ll check the 3 closest points.\n", 14 | "\n", 15 | "👉 In ML, **K is a hyperparameter** (something we choose before running the model).\n", 16 | "\n", 17 | "---\n", 18 | "\n", 19 | "### Step 2: **Measure Distance**\n", 20 | "\n", 21 | "* To know who is “closest,” we calculate **distance**.\n", 22 | "* Most common: **Euclidean distance** (like using a ruler on a graph).\n", 23 | "\n", 24 | "👉 In ML, this is the **distance metric**.\n", 25 | "\n", 26 | "---\n", 27 | "\n", 28 | "### Step 3: **Find K Nearest Neighbors**\n", 29 | "\n", 30 | "* From all the points in the dataset, select the **K closest ones** to the new point.\n", 31 | "\n", 32 | "👉 In ML, we call this **retrieving neighbors**.\n", 33 | "\n", 34 | "---\n", 35 | "\n", 36 | "### Step 4: **Make Prediction**\n", 37 | "\n", 38 | "Now comes the difference:\n", 39 | "\n", 40 | "#### 🔹 For **Classification**\n", 41 | "\n", 42 | "* Look at the labels of the K neighbors (e.g., Apple 🍎 or Orange 🍊).\n", 43 | "* The label with the **most votes** becomes the prediction.\n", 44 | " 👉 In ML: **majority vote rule**.\n", 45 | "\n", 46 | "#### 🔹 For **Regression**\n", 47 | "\n", 48 | "* Look at the values of the K neighbors (e.g., house prices 💰).\n", 49 | "* Take the **average (or weighted average)** of those values.\n", 50 | " 👉 In ML: **averaging rule**.\n", 51 | "\n", 52 | "---\n", 53 | "\n", 54 | "# 🎯 Putting it Together\n", 55 | "\n", 56 | "* **Classification with KNN** → Predict the **category** by majority vote.\n", 57 | "* **Regression with KNN** → Predict the **number** by averaging values.\n", 58 | "\n", 59 | "---\n", 60 | "\n", 61 | "# 🧠 Easy Analogy\n", 62 | "\n", 63 | "* Classification = Asking your neighbors: *“Do you drink tea or coffee?”* → You pick the drink most neighbors prefer.\n", 64 | "* Regression = Asking your neighbors: *“How much do you spend on coffee per week?”* → You take the average of their answers." 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "id": "abdc8a19", 70 | "metadata": { 71 | "vscode": { 72 | "languageId": "plaintext" 73 | } 74 | }, 75 | "source": [ 76 | "# Euclidean vs Manhattan Distance\n", 77 | "\n", 78 | "\n", 79 | "## 🔹 Euclidean Distance (straight-line distance)\n", 80 | "\n", 81 | "Think of it like:\n", 82 | "\n", 83 | "* You’re standing at one point in a field.\n", 84 | "* You want to get to another point.\n", 85 | "* If you could fly **like a bird**, you’d go in a **straight line**.\n", 86 | "\n", 87 | "👉 That straight line is **Euclidean distance**.\n", 88 | "It’s the “as-the-crow-flies” distance.\n", 89 | "\n", 90 | "---\n", 91 | "\n", 92 | "## 🔹 Manhattan Distance (grid distance)\n", 93 | "\n", 94 | "Now imagine you’re in a city with streets laid out like a grid (like Manhattan in New York 🏙️).\n", 95 | "\n", 96 | "* You’re at one corner.\n", 97 | "* You want to get to another corner.\n", 98 | "* You **can’t cut across buildings**—you must walk along the streets.\n", 99 | "\n", 100 | "👉 The total number of blocks you walk (up + across) is **Manhattan distance**.\n", 101 | "It’s the “city block” distance.\n", 102 | "\n", 103 | "---\n", 104 | "\n", 105 | "# 🎯 Simple Example\n", 106 | "\n", 107 | "Suppose you’re at point **(0, 0)** and want to reach **(3, 4)**:\n", 108 | "\n", 109 | "* **Euclidean distance** = straight line (like using Pythagoras’ theorem → 5).\n", 110 | "* **Manhattan distance** = walk 3 steps right + 4 steps up = 7.\n", 111 | "\n", 112 | "---\n", 113 | "\n", 114 | "✅ **In short:**\n", 115 | "\n", 116 | "* **Euclidean = straight line**\n", 117 | "* **Manhattan = grid path**" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "id": "a0ccf8a2", 123 | "metadata": { 124 | "vscode": { 125 | "languageId": "plaintext" 126 | } 127 | }, 128 | "source": [ 129 | "# KD-Tree & Ball-Tree \n", 130 | "## For optimizing search in KNN\n", 131 | "\n", 132 | "### 🌳 KD Tree (K-Dimensional Tree)\n", 133 | "\n", 134 | "* It’s a **data structure** that organizes points in space.\n", 135 | "* It works by **repeatedly splitting the dataset** along one feature at a time (e.g., first split by height, then by weight, etc.).\n", 136 | "* This creates a tree where each branch represents a smaller region of the data.\n", 137 | "* During KNN search, you don’t check all points — you only search the **relevant branches** of the tree.\n", 138 | "\n", 139 | "👉 In ML terms: **KD Tree speeds up nearest-neighbor search when data has fewer features (low dimensions).**\n", 140 | "\n", 141 | "---\n", 142 | "\n", 143 | "### ⚪ Ball Tree\n", 144 | "\n", 145 | "* Another **data structure** for organizing points.\n", 146 | "* Instead of splitting by features, it **groups points into hyperspheres** (think “balls” around clusters of points).\n", 147 | "* Each ball contains points that are close to each other.\n", 148 | "* During KNN search, you can quickly eliminate balls that are too far away, so you only check the useful ones.\n", 149 | "\n", 150 | "👉 In ML terms: **Ball Tree is more effective than KD Tree for higher-dimensional data.**\n", 151 | "\n", 152 | "---\n", 153 | "\n", 154 | "## 🔑 Why are they important in ML?\n", 155 | "\n", 156 | "* KNN requires finding **nearest neighbors**, which can be slow if you check every point.\n", 157 | "* **KD Tree and Ball Tree are efficient search structures** that reduce the number of comparisons.\n", 158 | "* They make KNN practical for larger datasets.\n", 159 | "\n", 160 | "---\n", 161 | "\n", 162 | "✅ **Summary in ML terms (simple)**\n", 163 | "\n", 164 | "* **KD Tree** → splits space by features (best for low dimensions).\n", 165 | "* **Ball Tree** → groups points into balls (better for high dimensions).\n", 166 | "* Both → **faster nearest-neighbor search** → makes KNN more efficient." 167 | ] 168 | } 169 | ], 170 | "metadata": { 171 | "language_info": { 172 | "name": "python" 173 | } 174 | }, 175 | "nbformat": 4, 176 | "nbformat_minor": 5 177 | } 178 | -------------------------------------------------------------------------------- /9-Adaboost/4-About-Ada-Boost-Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "894b6dfd", 6 | "metadata": {}, 7 | "source": [ 8 | "# Ada-Boost Regreesion with two weak learners" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "e5950c0f", 14 | "metadata": { 15 | "vscode": { 16 | "languageId": "plaintext" 17 | } 18 | }, 19 | "source": [ 20 | "## 🏠 House Price Example (AdaBoost Regressor)\n", 21 | "\n", 22 | "We want to predict house prices. \n", 23 | "The real prices are:\n", 24 | "\n", 25 | "```\n", 26 | "House True Price\n", 27 | "A 200\n", 28 | "B 300\n", 29 | "C 400\n", 30 | "```\n", 31 | "\n", 32 | "---\n", 33 | "\n", 34 | "### 🔹 Step 1: Weak Learner 1 (the rough guess)\n", 35 | "\n", 36 | "Imagine our first weak learner is very simple: it just predicts the **average price** for all houses.\n", 37 | "\n", 38 | "- Average = (200 + 300 + 400) / 3 = **300** \n", 39 | "\n", 40 | "So predictions are:\n", 41 | "\n", 42 | "```\n", 43 | "A: 300 (but true is 200 → error = -100)\n", 44 | "B: 300 (true is 300 → error = 0)\n", 45 | "C: 300 (true is 400 → error = +100)\n", 46 | "```\n", 47 | "\n", 48 | "👉 Weak Learner 1 is too simple. It gets B right but misses A and C.\n", 49 | "\n", 50 | "---\n", 51 | "\n", 52 | "### 🔹 Step 2: Focus on the Errors\n", 53 | "\n", 54 | "AdaBoost says: \"Okay, Learner 1 made big mistakes for A and C. Let’s train another weak learner to correct those errors.\"\n", 55 | "\n", 56 | "So, **Weak Learner 2** tries to predict the **correction (residual)**:\n", 57 | "\n", 58 | "```\n", 59 | "A needs -100 correction\n", 60 | "B needs 0 correction\n", 61 | "C needs +100 correction\n", 62 | "```\n", 63 | "\n", 64 | "Learner 2 learns something close to these corrections.\n", 65 | "\n", 66 | "---\n", 67 | "\n", 68 | "### 🔹 Step 3: Combine Them\n", 69 | "\n", 70 | "Now we combine the two learners. \n", 71 | "The final prediction is:\n", 72 | "\n", 73 | "```\n", 74 | "Final Prediction = Learner 1’s guess + Learner 2’s correction\n", 75 | "```\n", 76 | "\n", 77 | "So:\n", 78 | "\n", 79 | "```\n", 80 | "A: 300 + (-100) = 200 ✅\n", 81 | "B: 300 + 0 = 300 ✅\n", 82 | "C: 300 + (+100) = 400 ✅\n", 83 | "```\n", 84 | "\n", 85 | "👉 Perfect results after just 2 weak learners.\n", 86 | "\n", 87 | "---\n", 88 | "\n", 89 | "## 🌟 Key Takeaway\n", 90 | "\n", 91 | "- **Learner 1**: makes a rough overall guess (average). \n", 92 | "- **Learner 2**: fixes the mistakes from Learner 1 (adds corrections). \n", 93 | "- **Together**: they become accurate. \n", 94 | "\n", 95 | "AdaBoost repeats this idea with many learners:\n", 96 | "👉 *first guess → find mistakes → correct them → combine everything*. " 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "id": "6a04f525", 102 | "metadata": { 103 | "vscode": { 104 | "languageId": "plaintext" 105 | } 106 | }, 107 | "source": [ 108 | "# Ada-Boost Regreesion with three weak learners" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "id": "98d2b7e2", 114 | "metadata": { 115 | "vscode": { 116 | "languageId": "plaintext" 117 | } 118 | }, 119 | "source": [ 120 | "## 🏠 House Price Example with 3 Learners\n", 121 | "\n", 122 | "True prices:\n", 123 | "\n", 124 | "```\n", 125 | "House True Price\n", 126 | "A 200\n", 127 | "B 300\n", 128 | "C 400\n", 129 | "```\n", 130 | "\n", 131 | "---\n", 132 | "\n", 133 | "### 🔹 Step 1: Weak Learner 1 (the rough guess)\n", 134 | "\n", 135 | "Learner 1 predicts the **average of all houses**:\n", 136 | "\n", 137 | "* Average = (200 + 300 + 400) / 3 = **300**\n", 138 | "\n", 139 | "Predictions:\n", 140 | "\n", 141 | "```\n", 142 | "A: 300 (error = -100)\n", 143 | "B: 300 (error = 0)\n", 144 | "C: 300 (error = +100)\n", 145 | "```\n", 146 | "\n", 147 | "👉 Good for B, but bad for A and C.\n", 148 | "\n", 149 | "---\n", 150 | "\n", 151 | "### 🔹 Step 2: Weak Learner 2 (fixes mistakes)\n", 152 | "\n", 153 | "Learner 2 tries to predict the **corrections (residuals)** from Learner 1:\n", 154 | "\n", 155 | "```\n", 156 | "A needs -100\n", 157 | "B needs 0\n", 158 | "C needs +100\n", 159 | "```\n", 160 | "\n", 161 | "Suppose Learner 2 is weak and doesn’t perfectly predict — it only predicts **half of the needed correction**:\n", 162 | "\n", 163 | "```\n", 164 | "A: -50\n", 165 | "B: 0\n", 166 | "C: +50\n", 167 | "```\n", 168 | "\n", 169 | "---\n", 170 | "\n", 171 | "### 🔹 Step 3: Combine Learner 1 + Learner 2\n", 172 | "\n", 173 | "Final after 2 learners:\n", 174 | "\n", 175 | "```\n", 176 | "A: 300 + (-50) = 250 (still too high, error = -50)\n", 177 | "B: 300 + 0 = 300 (perfect ✅)\n", 178 | "C: 300 + 50 = 350 (still too low, error = +50)\n", 179 | "```\n", 180 | "\n", 181 | "👉 Better than before, but not perfect yet.\n", 182 | "\n", 183 | "---\n", 184 | "\n", 185 | "### 🔹 Step 4: Weak Learner 3 (fixes what’s still wrong)\n", 186 | "\n", 187 | "Now AdaBoost looks at the remaining errors:\n", 188 | "\n", 189 | "```\n", 190 | "A needs -50 more\n", 191 | "B needs 0\n", 192 | "C needs +50 more\n", 193 | "```\n", 194 | "\n", 195 | "Learner 3 again only predicts half the correction:\n", 196 | "\n", 197 | "```\n", 198 | "A: -25\n", 199 | "B: 0\n", 200 | "C: +25\n", 201 | "```\n", 202 | "\n", 203 | "---\n", 204 | "\n", 205 | "### 🔹 Step 5: Combine All 3 Learners\n", 206 | "\n", 207 | "Final predictions:\n", 208 | "\n", 209 | "```\n", 210 | "A: 300 + (-50) + (-25) = 225 (error = -25)\n", 211 | "B: 300 + 0 + 0 = 300 (perfect ✅)\n", 212 | "C: 300 + 50 + 25 = 375 (error = +25)\n", 213 | "```\n", 214 | "\n", 215 | "---\n", 216 | "\n", 217 | "## 🌟 Key Takeaway\n", 218 | "\n", 219 | "* **Learner 1**: gives a rough starting point (300 for all).\n", 220 | "* **Learner 2**: moves the predictions closer (fixes half the error).\n", 221 | "* **Learner 3**: fixes a bit more.\n", 222 | "* If we keep adding learners, we get closer and closer to the true values.\n", 223 | "\n", 224 | "👉 That’s how AdaBoost builds a **strong regressor** out of many weak learners:\n", 225 | "**guess → correct → correct more → … until accurate.**" 226 | ] 227 | } 228 | ], 229 | "metadata": { 230 | "language_info": { 231 | "name": "python" 232 | } 233 | }, 234 | "nbformat": 4, 235 | "nbformat_minor": 5 236 | } 237 | -------------------------------------------------------------------------------- /5-Naive Baye's/final_touch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "12c060db", 6 | "metadata": {}, 7 | "source": [ 8 | "# Little Math" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "0d539d54", 14 | "metadata": {}, 15 | "source": [ 16 | "### 1. Bayes’ Theorem\n", 17 | "\n", 18 | "Bayes’ theorem tells us how to calculate the probability of something (say $A$) happening when we already know something else (say $B$) happened:\n", 19 | "\n", 20 | "$$\n", 21 | "P(A \\mid B) = \\frac{P(B \\mid A) \\cdot P(A)}{P(B)}\n", 22 | "$$\n", 23 | "\n", 24 | "* $P(A \\mid B)$ = probability of $A$ given $B$\n", 25 | "* $P(B \\mid A)$ = probability of $B$ given $A$\n", 26 | "* $P(A)$ = overall probability of $A$\n", 27 | "* $P(B)$ = overall probability of $B$\n", 28 | "\n", 29 | "---\n", 30 | "\n", 31 | "### 2. Example (Bayes in daily life)\n", 32 | "\n", 33 | "Suppose:\n", 34 | "\n", 35 | "* 1% of emails are spam.\n", 36 | "* If an email is spam, there’s a 90% chance it contains the word *“offer”*.\n", 37 | "* If an email is not spam, there’s a 20% chance it contains the word *“offer”*.\n", 38 | "\n", 39 | "Now, if an email has the word *“offer”*, what’s the probability it’s spam?\n", 40 | "That’s exactly where Bayes’ theorem helps.\n", 41 | "\n", 42 | "---\n", 43 | "\n", 44 | "### 3. Extending to Many Features\n", 45 | "\n", 46 | "In real problems (like text classification), we don’t look at just one word, but many words (like *offer*, *win*, *money*, etc.).\n", 47 | "\n", 48 | "So, if our class is **Spam** or **Not Spam**, and we have features (words) $x_1, x_2, x_3, \\dots, x_n$, Bayes says:\n", 49 | "\n", 50 | "$$\n", 51 | "P(\\text{Class} \\mid x_1, x_2, \\dots, x_n) \n", 52 | "= \\frac{P(x_1, x_2, \\dots, x_n \\mid \\text{Class}) \\cdot P(\\text{Class})}{P(x_1, x_2, \\dots, x_n)}\n", 53 | "$$\n", 54 | "\n", 55 | "---\n", 56 | "\n", 57 | "### 4. The “Naive” Assumption\n", 58 | "\n", 59 | "Here’s the tricky part:\n", 60 | "Calculating $P(x_1, x_2, \\dots, x_n \\mid \\text{Class})$ is **very hard** because it needs the joint probability of all features together.\n", 61 | "\n", 62 | "So Naive Bayes makes a **simplifying assumption**:\n", 63 | "👉 All features are independent given the class.\n", 64 | "\n", 65 | "This means:\n", 66 | "\n", 67 | "$$\n", 68 | "P(x_1, x_2, \\dots, x_n \\mid \\text{Class}) \\approx P(x_1 \\mid \\text{Class}) \\cdot P(x_2 \\mid \\text{Class}) \\cdot \\dots \\cdot P(x_n \\mid \\text{Class})\n", 69 | "$$\n", 70 | "\n", 71 | "---\n", 72 | "\n", 73 | "### 5. Naive Bayes Formula\n", 74 | "\n", 75 | "Now, the formula becomes:\n", 76 | "\n", 77 | "$$\n", 78 | "P(\\text{Class} \\mid x_1, x_2, \\dots, x_n) \\propto P(\\text{Class}) \\cdot \\prod_{i=1}^{n} P(x_i \\mid \\text{Class})\n", 79 | "$$\n", 80 | "\n", 81 | "The denominator $P(x_1, x_2, \\dots, x_n)$ is the same for all classes, so we usually ignore it when comparing.\n", 82 | "\n", 83 | "So the rule is:\n", 84 | "👉 For each class, multiply its prior probability ($P(\\text{Class})$) by the likelihoods of each feature.\n", 85 | "👉 Pick the class with the highest score." 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "id": "0849b218", 91 | "metadata": { 92 | "vscode": { 93 | "languageId": "plaintext" 94 | } 95 | }, 96 | "source": [ 97 | "# About Algorithm\n", 98 | "\n", 99 | "### What is Naive Bayes?\n", 100 | "\n", 101 | "* **Naive Bayes** is a **simple algorithm** used to **classify things into categories** (like spam vs not spam, positive review vs negative review, etc.).\n", 102 | "* It is based on **probability** — it tries to find which category is **most likely** for a given piece of data.\n", 103 | "* The word **“Naive”** comes from the fact that it assumes all features (clues) are independent from each other.\n", 104 | "* The word **“Bayes”** comes from **Bayes’ Theorem**, the math rule it uses.\n", 105 | "\n", 106 | "---\n", 107 | "\n", 108 | "### How it works (super short steps):\n", 109 | "\n", 110 | "1. Look at your data (features/clues).\n", 111 | "2. For each possible category, calculate a probability score.\n", 112 | "3. Choose the category with the **highest score**.\n", 113 | "\n", 114 | "---\n", 115 | "\n", 116 | "👉 In short:\n", 117 | "**Naive Bayes is a simple probability-based method that guesses the category of data by multiplying probabilities of clues and picking the most likely category.**" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "id": "ed376c53", 123 | "metadata": { 124 | "vscode": { 125 | "languageId": "plaintext" 126 | } 127 | }, 128 | "source": [ 129 | "# Varients of Naive Bayes\n", 130 | "\n", 131 | "### 1. **Bernoulli Naive Bayes**\n", 132 | "\n", 133 | "* **What it is:** Works with **binary features** (yes/no, 0/1, present/absent).\n", 134 | "* **When to use:** When data is about **whether a feature exists or not** (not how many times).\n", 135 | "* **Tiny Example:**\n", 136 | "\n", 137 | " * Email classification:\n", 138 | "\n", 139 | " * Feature = “Does the email contain the word *offer*?”\n", 140 | " * Answer = Yes (1) or No (0).\n", 141 | " * Bernoulli NB looks at the **presence/absence** of words, not counts.\n", 142 | "\n", 143 | "---\n", 144 | "\n", 145 | "### 2. **Multinomial Naive Bayes**\n", 146 | "\n", 147 | "* **What it is:** Works with **counts of features** (how many times something appears).\n", 148 | "* **When to use:** For **text data** where word frequency matters.\n", 149 | "* **Tiny Example:**\n", 150 | "\n", 151 | " * Email classification:\n", 152 | "\n", 153 | " * Word “offer” appears **3 times**.\n", 154 | " * Word “win” appears **1 time**.\n", 155 | " * Multinomial NB uses these **counts** to calculate probabilities.\n", 156 | "\n", 157 | "---\n", 158 | "\n", 159 | "### 3. **Gaussian Naive Bayes**\n", 160 | "\n", 161 | "* **What it is:** Works with **continuous (numeric) features**, assuming values follow a **bell curve (Gaussian distribution)**.\n", 162 | "* **When to use:** When features are **real numbers** like height, weight, temperature, exam scores, etc.\n", 163 | "* **Tiny Example:**\n", 164 | "\n", 165 | " * Predict if a fruit is “apple” or “orange” using **weight** and **diameter** (continuous values).\n", 166 | " * Gaussian NB assumes these numbers follow a normal distribution for each class.\n", 167 | "\n", 168 | "---\n", 169 | "\n", 170 | "### Quick Summary\n", 171 | "\n", 172 | "* **Bernoulli:** Features are **yes/no** → good for binary text data.\n", 173 | "* **Multinomial:** Features are **counts** → good for word frequency in documents.\n", 174 | "* **Gaussian:** Features are **continuous numbers** → good for measurements.\n" 175 | ] 176 | } 177 | ], 178 | "metadata": { 179 | "language_info": { 180 | "name": "python" 181 | } 182 | }, 183 | "nbformat": 4, 184 | "nbformat_minor": 5 185 | } 186 | -------------------------------------------------------------------------------- /4-SVM/final_touch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "925f5d6e", 6 | "metadata": {}, 7 | "source": [ 8 | "### **SVC (Support Vector Classification)**\n", 9 | "\n", 10 | "* **Task**: Sort things into groups (like “spam” vs. “not spam”).\n", 11 | "* **Loss function**: **Hinge loss**\n", 12 | "\n", 13 | " * Punishes the model if it puts something in the **wrong group** or if it’s **too close to the boundary**.\n", 14 | " * Encourages the model to draw a **clear gap (margin)** between groups.\n", 15 | "\n", 16 | "---\n", 17 | "\n", 18 | "### **SVR (Support Vector Regression)**\n", 19 | "\n", 20 | "* **Task**: Predict a number (like house price or temperature).\n", 21 | "* **Loss function**: **Epsilon (ε) loss**\n", 22 | "\n", 23 | " * Small mistakes are **ignored** (if prediction is “close enough”).\n", 24 | " * Only **big mistakes** get punished.\n", 25 | " * Lets the model be a little flexible.\n", 26 | "\n", 27 | "---\n", 28 | "\n", 29 | "👉 Think of it like this:\n", 30 | "\n", 31 | "* **SVC hinge loss** = “Be strict, separate groups clearly.”\n", 32 | "* **SVR epsilon loss** = “Small errors are okay, big errors are not.”" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "72c347a3", 38 | "metadata": { 39 | "vscode": { 40 | "languageId": "plaintext" 41 | } 42 | }, 43 | "source": [ 44 | "# Key terminologies \n", 45 | "\n", 46 | "### 🔹 **Hyperplane**\n", 47 | "\n", 48 | "Think of it as an **imaginary line (in 2D)** or a **flat sheet (in higher dimensions)** that separates two groups of data.\n", 49 | "\n", 50 | "* Example: Draw a line on paper that divides **red dots** from **blue dots**. That line is the **hyperplane**.\n", 51 | "\n", 52 | "---\n", 53 | "\n", 54 | "### 🔹 **Margin (and Marginal Planes)**\n", 55 | "\n", 56 | "The **margin** is the **space or gap** between the hyperplane and the closest data points from each group.\n", 57 | "\n", 58 | "* Imagine two fences on either side of the line (hyperplane).\n", 59 | "* The distance between these fences = **margin**.\n", 60 | "* The fences themselves = **marginal planes**.\n", 61 | "\n", 62 | "---\n", 63 | "\n", 64 | "### 🔹 **Support Vectors**\n", 65 | "\n", 66 | "These are the **data points that touch the fences (marginal planes)**.\n", 67 | "\n", 68 | "* They are the “critical” points that decide **where the line (hyperplane) will be drawn**.\n", 69 | "* If you remove them, the boundary could shift.\n", 70 | "* Think of them as the **guardians of the margin**.\n", 71 | "\n", 72 | "---\n", 73 | "\n", 74 | "### 🔹 **Hard Margin**\n", 75 | "\n", 76 | "* Very **strict rule**: all points must be perfectly separated, with no mistakes allowed.\n", 77 | "* Works only if the data is **perfectly clean and separable** (rare in real life).\n", 78 | "* Analogy: “No one is allowed to cross the fence, not even a toe.”\n", 79 | "\n", 80 | "---\n", 81 | "\n", 82 | "### 🔹 **Soft Margin**\n", 83 | "\n", 84 | "* More **flexible rule**: allows some points to be on the wrong side or inside the margin.\n", 85 | "* Useful when data is **messy or overlapping**.\n", 86 | "* Analogy: “A few people can cross the fence — it’s okay, as long as most are on the right side.”\n", 87 | "\n", 88 | "---\n", 89 | "\n", 90 | "👉 In short:\n", 91 | "\n", 92 | "* **Hyperplane** = dividing line.\n", 93 | "* **Margins** = safe zones on each side of the line.\n", 94 | "* **Support vectors** = the data points touching the margin.\n", 95 | "* **Hard margin** = no mistakes allowed.\n", 96 | "* **Soft margin** = some mistakes allowed." 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "id": "7da9f6a5", 102 | "metadata": { 103 | "vscode": { 104 | "languageId": "plaintext" 105 | } 106 | }, 107 | "source": [ 108 | "# 🔹 What are Kernels?\n", 109 | "\n", 110 | "A **kernel** is like a **clever trick** that lets SVM draw better boundaries.\n", 111 | "\n", 112 | "* Sometimes data is not separable with a simple straight line.\n", 113 | "* A kernel **transforms the data into a new space** where it becomes easier to separate.\n", 114 | "\n", 115 | "---\n", 116 | "\n", 117 | "### 🔹 Why are they important?\n", 118 | "\n", 119 | "* Without kernels: SVM can only draw **straight lines (or flat sheets)**.\n", 120 | "* With kernels: SVM can draw **curved, complex boundaries** that separate tricky data.\n", 121 | "\n", 122 | "---\n", 123 | "\n", 124 | "### 🔹 Example\n", 125 | "\n", 126 | "Imagine you have data shaped like a **circle**:\n", 127 | "\n", 128 | "* Red points inside the circle, blue points outside.\n", 129 | "* A straight line cannot separate them.\n", 130 | "* A kernel can transform the data so that SVM finds a nice **circular boundary**.\n", 131 | "\n", 132 | "---\n", 133 | "\n", 134 | "### 🔹 Common Kernels (in simple terms)\n", 135 | "\n", 136 | "* **Linear kernel** → draws straight lines.\n", 137 | "* **Polynomial kernel** → draws curved lines like circles or parabolas.\n", 138 | "* **RBF (Radial Basis Function) kernel** → very flexible, can make complex boundaries (most popular).\n", 139 | "\n", 140 | "---\n", 141 | "\n", 142 | "👉 **In short**:\n", 143 | "Kernels are like **special lenses** you put on your model. They change how the data looks so SVM can separate it better.\n" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "id": "57e9ec2e", 149 | "metadata": { 150 | "vscode": { 151 | "languageId": "plaintext" 152 | } 153 | }, 154 | "source": [ 155 | "# About loss functions \n", 156 | "\n", 157 | "### **1. Hinge Loss (used in SVC)**\n", 158 | "\n", 159 | "* **Purpose:** Make the model **classify correctly** and **keep points away from the boundary**.\n", 160 | "* **How it works:**\n", 161 | "\n", 162 | " * If a point is **correctly classified and far enough** from the boundary → **loss = 0** (no penalty).\n", 163 | " * If a point is **too close to the boundary** or **misclassified** → **loss > 0** (penalty applied).\n", 164 | "* **Effect:** The model is “encouraged” to **draw a line that separates classes clearly**.\n", 165 | "\n", 166 | "**Analogy:** Imagine a rope dividing red and blue balls.\n", 167 | "\n", 168 | "* Balls on the right side → no problem.\n", 169 | "* Balls touching or crossing the rope → you get a penalty.\n", 170 | "\n", 171 | "---\n", 172 | "\n", 173 | "### **2. Epsilon-Insensitive Loss (ε-loss, used in SVR)**\n", 174 | "\n", 175 | "* **Purpose:** Make the model **predict numbers close to the true value**, but **ignore tiny errors**.\n", 176 | "* **How it works:**\n", 177 | "\n", 178 | " * If the predicted value is **within ε (tolerance) of the true value** → **loss = 0** (no penalty).\n", 179 | " * If the predicted value is **outside ε** → **loss > 0** (penalty increases as error grows).\n", 180 | "* **Effect:** The model focuses on **big mistakes**, not tiny differences.\n", 181 | "\n", 182 | "**Analogy:** Predicting a house price:\n", 183 | "\n", 184 | "* Off by \\$500 → okay, no penalty.\n", 185 | "* Off by \\$20,000 → you get a penalty.\n", 186 | "\n", 187 | "---\n", 188 | "\n", 189 | "**In short:**\n", 190 | "\n", 191 | "* **Hinge loss (SVC)** → punish wrong or borderline classifications.\n", 192 | "* **Epsilon loss (SVR)** → punish predictions that are too far off, ignore small errors.\n", 193 | "\n", 194 | "Key idea:\n", 195 | "\n", 196 | "* Yes, penalties guide the model.\n", 197 | "\n", 198 | "* The more the penalty, the more the line/function will move or change shape to satisfy the loss function.\n" 199 | ] 200 | } 201 | ], 202 | "metadata": { 203 | "language_info": { 204 | "name": "python" 205 | } 206 | }, 207 | "nbformat": 4, 208 | "nbformat_minor": 5 209 | } 210 | -------------------------------------------------------------------------------- /2-Ridge Lasso And Elasticnet/4-Cv_and_tuning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "17ca8e3e", 6 | "metadata": {}, 7 | "source": [ 8 | "# Types of Cross validations\n", 9 | "\n", 10 | "## 🔹 1. Leave-One-Out Cross Validation (LOOCV)\n", 11 | "\n", 12 | "👉 Imagine you have **10 questions**.\n", 13 | "\n", 14 | "* Train the model on 9 questions, test it on the **1 question left out**.\n", 15 | "* Repeat this 10 times, each time leaving out a different question.\n", 16 | "* Average the results.\n", 17 | "\n", 18 | "✅ Very thorough (uses almost all data for training).\n", 19 | "❌ But can be slow if you have a lot of data, because it repeats many times.\n", 20 | "\n", 21 | "---\n", 22 | "\n", 23 | "## 🔹 2. Leave-P-Out Cross Validation\n", 24 | "\n", 25 | "👉 Similar to LOOCV, but instead of leaving **1 question out**, you leave out **P questions** at a time.\n", 26 | "\n", 27 | "* Example: With 10 questions, leave out 2 each time.\n", 28 | "* Train on 8, test on 2 → repeat for all possible combinations.\n", 29 | "\n", 30 | "✅ More flexible than LOOCV.\n", 31 | "❌ Gets very expensive if P is big or dataset is large.\n", 32 | "\n", 33 | "---\n", 34 | "\n", 35 | "## 🔹 3. K-Fold Cross Validation\n", 36 | "\n", 37 | "👉 The most common one.\n", 38 | "\n", 39 | "* Split your data into **k equal groups** (say 5 groups).\n", 40 | "* Train on 4 groups, test on the 1 left out.\n", 41 | "* Repeat until every group has been tested once.\n", 42 | "* Average the results.\n", 43 | "\n", 44 | "✅ Balanced, efficient, widely used.\n", 45 | "\n", 46 | "---\n", 47 | "\n", 48 | "## 🔹 4. Stratified K-Fold Cross Validation\n", 49 | "\n", 50 | "👉 Same as K-Fold, but with a twist:\n", 51 | "\n", 52 | "* Useful when your data has **categories** (like 70% cats, 30% dogs).\n", 53 | "* It makes sure **each fold keeps the same ratio** of cats and dogs.\n", 54 | "* This prevents some folds from being “unbalanced” (e.g., one fold with mostly cats).\n", 55 | "\n", 56 | "✅ Best for classification problems.\n", 57 | "\n", 58 | "---\n", 59 | "\n", 60 | "## 🔹 5. Time Series Cross Validation\n", 61 | "\n", 62 | "👉 Special for **time-based data** (like stock prices, weather, sales).\n", 63 | "\n", 64 | "* You can’t shuffle time, because the past always comes before the future.\n", 65 | "* Instead, you train on earlier data and test on later data.\n", 66 | "* Example:\n", 67 | "\n", 68 | " * Train on Jan–Mar, test on Apr\n", 69 | " * Train on Jan–Apr, test on May\n", 70 | " * Train on Jan–May, test on Jun\n", 71 | "\n", 72 | "✅ Respects the order of time.\n", 73 | "❌ Can’t randomly split like K-Fold.\n", 74 | "\n", 75 | "---\n", 76 | "\n", 77 | "✅ In short:\n", 78 | "\n", 79 | "* **LOOCV** → leave 1 out each time.\n", 80 | "* **Leave-P-Out** → leave P out each time.\n", 81 | "* **K-Fold** → split into k groups.\n", 82 | "* **Stratified K-Fold** → like K-Fold but keeps category balance.\n", 83 | "* **Time Series CV** → train on past, test on future." 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "id": "70f7660e", 89 | "metadata": { 90 | "vscode": { 91 | "languageId": "plaintext" 92 | } 93 | }, 94 | "source": [ 95 | "# Imagine you’re training a model\n", 96 | "\n", 97 | "## You have 100 practice questions/samples (your training data).\n", 98 | "\n", 99 | "## You want to know if the model is really learning, not just memorizing." 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "id": "4bb8314c", 105 | "metadata": {}, 106 | "source": [ 107 | "### Option 1: One Train-Test Split\n", 108 | "\n", 109 | "* **Training data (80 questions)** → like giving the model **80 samples** to learn from.\n", 110 | "* **Test data (20 questions)** → like holding out **20 samples** to check performance.\n", 111 | "\n", 112 | "**Problem**: The model is only evaluated on that **one specific test set**.\n", 113 | "\n", 114 | "* If those 20 samples are unusually easy/hard, the performance estimate may be **biased** or **lucky**.\n", 115 | "\n", 116 | "---\n", 117 | "\n", 118 | "### Option 2: Cross-Validation (CV)\n", 119 | "\n", 120 | "1. **Split the dataset into 5 folds** (20 samples per fold).\n", 121 | "2. **Round 1:** Train on 4 folds (80 samples), test on 1 fold (20 samples).\n", 122 | "3. **Round 2:** Train on a different 4 folds, test on the next fold.\n", 123 | "4. Repeat until **every fold has been the test set once**.\n", 124 | "\n", 125 | "Now you have **5 evaluation scores**, one per fold.\n", 126 | "👉 Take the **average** to estimate performance.\n", 127 | "\n", 128 | "---\n", 129 | "\n", 130 | "### Why CV is Better (in ML terms)\n", 131 | "\n", 132 | "✅ **Every data point** is used for **training** and for **testing** → no wasted data.\n", 133 | "✅ The model is tested on **multiple splits**, reducing bias from a single lucky/unlucky test set.\n", 134 | "✅ Produces a **more stable and reliable estimate** of model performance (generalization ability).\n", 135 | "\n", 136 | "---\n", 137 | "\n", 138 | "👉 That’s why in ML, cross-validation is the standard way to check if a model is **learning true patterns** in the data rather than just **memorizing a single train-test split**." 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "id": "85b4be7e", 144 | "metadata": { 145 | "vscode": { 146 | "languageId": "plaintext" 147 | } 148 | }, 149 | "source": [ 150 | "# hyperparameter tuning\n", 151 | "\n", 152 | "### 🔹 How Tuning Works in Machine Learning\n", 153 | "\n", 154 | "1. You pick some **values to try** for each hyperparameter.\n", 155 | "\n", 156 | " * Example: alpha = [0.1, 1, 10]\n", 157 | "2. For each combination:\n", 158 | "\n", 159 | " * Train the model on part of your data.\n", 160 | " * Test it on another part (cross-validation).\n", 161 | " * Measure performance.\n", 162 | "3. Choose the hyperparameter values that **give the best results**.\n", 163 | "4. Retrain the model on all training data using these best values.\n", 164 | "\n", 165 | "\n", 166 | "### 🔹 Key Point\n", 167 | "\n", 168 | "* **Hyperparameters are not learned** from data; you set them.\n", 169 | "* Tuning is **testing different settings** to find the ones that work best.\n", 170 | "* Cross-validation is often used to **check each setting fairly**.\n", 171 | "\n", 172 | "\n", 173 | "✅ In one line:\n", 174 | "Hyperparameter tuning = trying different “settings” for your model to make it perform as well as possible on new data." 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "id": "9ac1953c", 180 | "metadata": { 181 | "vscode": { 182 | "languageId": "plaintext" 183 | } 184 | }, 185 | "source": [ 186 | "# Most Commonly used hyperparameter tuning techniques\n", 187 | "\n", 188 | "### 🔹 1. Grid Search\n", 189 | "\n", 190 | "* **How it works:** You make a **list of possible values** for each hyperparameter.\n", 191 | "* The algorithm tries **every possible combination** and checks which one works best.\n", 192 | "* **Pros:** Simple, guarantees you test all combinations.\n", 193 | "* **Cons:** Can be very slow if you have many hyperparameters or many values.\n", 194 | "\n", 195 | "**Example:**\n", 196 | "\n", 197 | "```python\n", 198 | "alpha = [0.1, 1, 10]\n", 199 | "max_depth = [3, 5, 7]\n", 200 | "# Grid Search will try all 3 x 3 = 9 combinations\n", 201 | "```\n", 202 | "\n", 203 | "---\n", 204 | "\n", 205 | "### 🔹 2. Random Search\n", 206 | "\n", 207 | "* **How it works:** Instead of trying all combinations, it **randomly picks some combinations** to test.\n", 208 | "* **Pros:** Much faster than grid search for large search spaces.\n", 209 | "* **Cons:** Might miss the best combination.\n", 210 | "\n", 211 | "**Example:**\n", 212 | "\n", 213 | "```python\n", 214 | "alpha = [0.1, 1, 10, 100]\n", 215 | "Randomly pick 2 or 3 combinations to try instead of all 4.\n", 216 | "```\n", 217 | "\n", 218 | "\n", 219 | "### 🔹3. LassoCV / RidgeCV / Tree-based CV\n", 220 | "\n", 221 | "* Some models, like **LassoCV**, automatically **tune certain hyperparameters** (like `alpha`) using cross-validation.\n", 222 | "* **Pros:** Easy to use, no manual search needed.\n", 223 | "* **Cons:** Only works for specific parameters/models.\n", 224 | "\n", 225 | "---\n", 226 | "\n", 227 | "✅ **Summary Table (Simple)**\n", 228 | "\n", 229 | "| Technique | Pros | Cons |\n", 230 | "| --------------------- | ------------------------ | ------------------------- |\n", 231 | "| Grid Search | Tries all combos, simple | Slow for big search space |\n", 232 | "| Random Search | Faster | Might miss best values |\n", 233 | "| CV-based tuning | Automatic, easy | Limited to some models |" 234 | ] 235 | } 236 | ], 237 | "metadata": { 238 | "language_info": { 239 | "name": "python" 240 | } 241 | }, 242 | "nbformat": 4, 243 | "nbformat_minor": 5 244 | } 245 | -------------------------------------------------------------------------------- /18-Anomaly Detection ML/healthcare.csv: -------------------------------------------------------------------------------- 1 | 0,1 2 | 1.6166714467607703,1.9445220058244743 3 | 1.2564614954716933,1.609444278372786 4 | -2.343918805514341,4.392961298452939 5 | 1.1953925137948538,2.794484609935539 6 | -3.32958611899837,5.30315976217969 7 | 1.624198299577627,1.2187533446542167 8 | 0.4561196851060762,1.807574190885176 9 | 0.6230432479650517,2.044243249082337 10 | -2.0590820225642847,4.659910898001574 11 | 2.1630763774416235,1.391492934836416 12 | -2.133866768446983,4.435994334264997 13 | -1.6531134236549336,4.920213280926866 14 | 0.7504731649426203,2.238116208851796 15 | -1.7107392511355608,5.174827228496587 16 | 0.861024442189089,2.281911401667097 17 | -1.8627418211380304,4.554542458502236 18 | 1.4802724401629417,1.7998323093712925 19 | 1.452230876272758,2.699339905154557 20 | -1.655090917732606,5.650923114782499 21 | 1.2288712278144334,1.2883880745972651 22 | 9.39915671907378,-1.9714832667941733 23 | 2.0402533858495677,2.076452907774612 24 | 0.8904419955726828,1.0948052346649093 25 | -2.352350137928117,5.471630362484747 26 | 0.5661267685700757,6.088818216688779 27 | -2.3820719619532214,4.281104263099211 28 | -2.5780912159109564,5.390599050854997 29 | -1.9290234183396104,4.840335791427452 30 | -2.406682129602101,4.266787836098743 31 | 0.370271506709434,1.351081803850222 32 | -1.626405832897684,4.405527522398132 33 | 0.3056108665624912,2.8655427766622528 34 | -2.1459186813739315,4.6192538940941885 35 | -2.1545064845235613,4.161998096835012 36 | 2.57294545005092,2.5285511389302 37 | 1.2177761371187743,2.355711549267699 38 | -2.065053477096852,5.046976614692785 39 | 1.763497871808941,0.730419446591064 40 | 0.3588611571237687,1.823947578868042 41 | -1.921746731017312,5.116090518100138 42 | -2.3420054488686084,5.829775398094936 43 | 1.645792484653802,2.183647325538599 44 | -1.7394675617736208,4.712106015093467 45 | 2.363725270111905,0.8406562937367941 46 | 1.9036135716304583,1.3541908710954036 47 | 6.858753230385831,-3.0148790578234266 48 | 1.2104667134681768,0.9767816319914696 49 | 1.0292776865568345,1.248312786921621 50 | 4.134460104166776,7.943369443003483 51 | 2.1851165888740174,3.0686252394410203 52 | 1.0279671375398367,1.5222813907920458 53 | 2.099781428331884,1.0442820846898972 54 | 0.8785499766589211,0.1134424722432512 55 | 8.521063582817488,-2.011324333618001 56 | -2.4779725002463886,4.827009112150307 57 | -1.3639767533797116,-3.4204594039359293 58 | -1.8145874993594489,5.071030902593618 59 | 0.9610587478882786,1.877474433904272 60 | 1.5320309621110122,1.3689713049819163 61 | 1.9376620157190312,0.3785596410752767 62 | -1.2527577277543156,3.9650074874932337 63 | -3.1277821147010947,4.488746578182198 64 | 3.014265164496483,2.4355695646054745 65 | 2.3006499478938576,5.248303342322846 66 | -2.030813201047824,4.9463473618544125 67 | 1.810704262921798,1.7335720291619867 68 | -1.5791843679631818,4.875270709919525 69 | -2.078833508081908,6.128361748649104 70 | -2.884269225338516,5.177740896371884 71 | 0.7269316624000847,1.536515555482682 72 | -2.3460249238921955,5.7681885271228985 73 | -2.87794529171886,5.225467230902957 74 | 9.086555262600308,-0.8287530876679163 75 | 0.6106605010599866,2.178577695710608 76 | 7.809654814009496,3.972770029240927 77 | -2.0664402887934776,4.851104560299136 78 | -1.5710380378538318,5.570550933328787 79 | -2.9681399029232534,5.094389298396914 80 | 6.172560777870359,2.98043516111194 81 | -2.1034518380819858,5.440089456040391 82 | -3.047301535603072,5.061860957116753 83 | -2.3187185127761145,4.801364092835601 84 | 1.6008304998126146,2.5179914548740827 85 | 0.6785760503804639,-2.446275032468316 86 | 1.2522042621225613,0.5375340946073869 87 | -0.1819263619312963,9.54851332353537 88 | 1.5465620556682176,1.711730328417847 89 | -5.593293972094477,-4.273737168107129 90 | 0.6421951368188824,2.091054083481083 91 | -2.6532634258676584,5.829065339809094 92 | 0.7660129244530353,0.5059874439747174 93 | 0.5950001631935861,1.686935608544428 94 | -4.082154122661075,-0.5981572575419527 95 | 1.7087667219442393,2.4284701276809213 96 | 2.3316208546466117,1.7218598283473947 97 | -2.1997245146314377,5.185027943923759 98 | -1.535247444260236,5.291112295698962 99 | 6.335474879272976,1.900729541830252 100 | -1.4657453003419951,4.773307098074306 101 | 1.9706063299087049,1.7852232853687773 102 | -2.0679748503391604,5.568445681301347 103 | 1.929855559242256,2.145544670096569 104 | -2.472223127959125,4.794975153398726 105 | -2.220461316146296,4.859822252407746 106 | -1.2400025696171135,5.859794653708097 107 | -2.5944296288920143,4.7465918228506565 108 | 2.3251456998117552,1.3740526149313543 109 | -2.299326968461493,4.442051507019803 110 | -5.888765911500949,2.171956841241052 111 | 2.5458553512103244,1.3563892153639394 112 | -2.055270328616236,5.5100863558579 113 | -1.85682815555386,5.304421917237725 114 | 1.719147391155631,0.9021329824887928 115 | 2.1645945648480325,1.5612858689696336 116 | -2.353037399328933,0.833724618020101 117 | 1.0559745344233256,1.2460811838090031 118 | -2.021085725645289,4.856556403805047 119 | -0.8703455246545739,4.978871424169679 120 | -1.42383421760844,5.539809296018411 121 | 8.875162437481169,6.929926073030671 122 | -2.8490529097161272,5.193640237697531 123 | -2.3627986892317923,4.308318022302473 124 | 1.4277468037445096,1.7874189513568606 125 | -2.0925268355046707,4.596175756191823 126 | -2.3598021942758964,4.593503505722961 127 | -1.980684724079906,4.171642448839023 128 | -0.9703043027787732,2.1371310586352443 129 | 1.5073500145045744,2.7501093457340846 130 | -2.3140437798207896,4.759486440769607 131 | -1.6668084589840428,4.769640106305723 132 | 5.309717501561874,5.664114688655797 133 | -1.266710642212911,5.426275969730616 134 | 1.0494671180169766,-0.0563822065570984 135 | 6.340325546975132,-4.815285572254554 136 | 0.6788136164200881,2.8605348299545046 137 | -1.7380544880828972,5.04421104352233 138 | 1.8662936623722275,1.3799175681444262 139 | 2.2975804791803105,0.6356219257524433 140 | -2.563412904378372,4.634661123567588 141 | -1.951137516142572,5.291476839876647 142 | -1.9752525091749549,5.246918388140478 143 | -5.497133029012252,4.182566580220486 144 | 2.137125235664814,1.7220527506339114 145 | -2.32620429119351,4.804523312406199 146 | 0.5788648126419352,1.176890776629704 147 | -2.298658034482681,4.881039135131997 148 | -1.592740088756067,5.029462794590815 149 | 0.6843951114516506,2.1305785408679307 150 | -2.231797987323047,5.240740736886731 151 | -2.0078410558012774,5.080464084149112 152 | 0.8648911449317305,1.5363617770572973 153 | 7.08823625475989,7.771689332101495 154 | -2.155443085849236,5.048700083134392 155 | -2.6671292357013767,4.326641247101223 156 | 2.144601176646573,1.7231093570601148 157 | 2.288645145474555,0.7440479441456037 158 | -2.712030454491266,4.753340058318903 159 | 1.6959472193302667,1.4312947272499297 160 | -2.0668507798342195,5.538871902988132 161 | 0.978671624566092,0.92149302293869 162 | -3.015342233890747,6.03224643067966 163 | -2.1924399045906378,5.047175794658537 164 | 2.105105339201654,0.9804844857154906 165 | -2.1767158743785995,4.191762905674484 166 | -1.022043845874653,5.195046661343963 167 | -1.6133735112987002,4.408059679903341 168 | 3.1682013424047595,2.16113564089329 169 | 5.673698853409025,4.20091954168341 170 | 2.3612034765094045,2.341665894149088 171 | 1.4829717129207451,0.983378363556018 172 | -1.17593253389622,5.082113877743667 173 | 0.9823059025260809,2.580110216784715 174 | -1.6542306244649068,5.347374571828003 175 | 0.9307976025688822,0.2916021783678262 176 | 1.2819132275108391,1.539315739560822 177 | -2.1545572223858542,4.883266669228154 178 | 1.1513772845153865,2.85067243767189 179 | -1.800476827179935,3.6137036217866743 180 | 2.0327264076028952,1.58517251154498 181 | 6.9190424826250005,8.337460798775892 182 | -2.4927553688420754,4.264082496268206 183 | -0.8480416511580291,4.469992088639226 184 | 3.0888282367913256,0.4819440277808646 185 | 1.7816391488242842,1.0206329363417808 186 | -1.678342767468536,4.214688295683274 187 | 2.050429573476283,1.1735066322848398 188 | -2.271430738008359,5.208025023130713 189 | -1.0242828525494048,-0.7970668475720473 190 | 0.6967719433122284,1.1935259686788962 191 | -1.7868706346109495,5.338454017515123 192 | 2.846059418536269,2.536360354004097 193 | 1.6084631979878414,1.7647137637215216 194 | -2.723267349781694,5.400148974670014 195 | 1.0549077451347832,1.9735033064625496 196 | -2.5226266830734776,5.605572644841351 197 | -1.5340758129428482,5.169982491900631 198 | -2.1823467721958427,5.078351927636182 199 | -0.2870928710838549,1.9575330168082523 200 | -1.1336394065404334,5.342250553429595 201 | 1.9559760942781672,-1.1859470429316854 202 | -1.3177340759487643,4.655275407725031 203 | -1.650771425446332,5.001885444543134 204 | 1.3123976403340338,2.0617194770574767 205 | -2.578677629595427,4.843853874437153 206 | 3.7530293436783495,2.042864371661784 207 | 1.4725020272407532,0.6823345515811619 208 | -1.7163548610736652,4.888662449742423 209 | -2.2398279070039737,5.3101791491717565 210 | 1.2415727134402896,1.3324345756968516 211 | 2.818205487939378,0.5565686572002876 212 | -2.0085102069307204,5.189575867777541 213 | -1.859779147341852,4.503438194535209 214 | -1.4425520899605182,-5.409808842327475 215 | 0.8971420204617082,2.8370454671693235 216 | 2.807290593104977,0.8159054840865123 217 | 1.1811272475378598,1.5122354113175398 218 | 0.9376132350783112,1.0173151555748596 219 | -0.911944400450178,-4.239169207557172 220 | -1.616668409177457,5.178146408736144 221 | 0.7998492568273047,0.418660232255672 222 | 8.280943975839644,2.629475870650412 223 | -2.3392293521340406,-4.768321442739312 224 | 2.010363393524276,1.5902880375301875 225 | 1.4310832329021974,1.035565199546525 226 | -2.9215347750783245,4.761012997979757 227 | 0.7760300107508747,1.977116162797139 228 | 1.588838464892534,1.7813925544112912 229 | -0.2645483432916382,-4.146095047597925 230 | 1.9480920682683145,0.368130768982416 231 | 1.7494564780220814,1.994601217734364 232 | 0.8731734071644272,1.7708317485014833 233 | 2.165061892267913,1.3940499541916114 234 | -2.298157019225254,4.973716351865227 235 | -2.3261467999675096,4.739405343849445 236 | 1.1432434727738423,1.1933479888721696 237 | 1.7772046988631516,0.734856943888646 238 | -2.0953267467906995,4.802575242983274 239 | -2.791469198667541,5.305189689553602 240 | 2.807291272298599,2.13423126079277 241 | 1.8259637078113216,0.4246294196059434 242 | 1.0138024981880442,1.3952558217706306 243 | -2.7703985072223123,5.031630997100166 244 | -0.705631602357613,-4.983066395423622 245 | 2.7348366421773647,1.7801100458570562 246 | 2.54177653565692,2.8271224232214083 247 | 2.2697617171209794,1.9586846115058183 248 | 1.142436403701789,0.6735574711143115 249 | 8.195403881221225,1.5554388025911887 250 | -4.086492064987173,5.41191659556792 251 | -1.7531291113254055,4.941948030482817 252 | -------------------------------------------------------------------------------- /2-Ridge Lasso And Elasticnet/3-final_touch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "edd51733", 6 | "metadata": { 7 | "vscode": { 8 | "languageId": "plaintext" 9 | } 10 | }, 11 | "source": [ 12 | "\n", 13 | "## Ridge regression\n", 14 | "Linear Regression + penalty on large coefficients (L2 norm) → gives a more stable, less overfit model." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "id": "599e884f", 20 | "metadata": {}, 21 | "source": [ 22 | "### ✅ Use Ridge Regression when:\n", 23 | "\n", 24 | "1. **You have multicollinearity** (features are correlated).\n", 25 | "2. **Your model is overfitting** (good training accuracy, poor test accuracy).\n", 26 | "3. **You want to keep all features** but just reduce their influence (shrink, not remove).\n", 27 | "4. **You expect coefficients to be small but not exactly zero** (like in polynomial regression or high-dimensional data)." 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "id": "4a4f7448", 33 | "metadata": { 34 | "vscode": { 35 | "languageId": "plaintext" 36 | } 37 | }, 38 | "source": [ 39 | "# small example\n", 40 | "\n", 41 | "### 📊 Data\n", 42 | "\n", 43 | "Suppose we have just 3 points:\n", 44 | "\n", 45 | "| x | y |\n", 46 | "| - | - |\n", 47 | "| 1 | 2 |\n", 48 | "| 2 | 4 |\n", 49 | "| 3 | 6 |\n", 50 | "\n", 51 | "This is a **perfect line**: $y = 2x$.\n", 52 | "\n", 53 | "---\n", 54 | "\n", 55 | "### 🔹 Plain Linear Regression\n", 56 | "\n", 57 | "* Model can fit exactly:\n", 58 | "\n", 59 | " $$\n", 60 | " y = 2x\n", 61 | " $$\n", 62 | "* **Training cost = 0** (no error).\n", 63 | "* But if we add a noisy point later (say $x=4, y=9$ instead of 8), the model will struggle because it only memorized the perfect line.\n", 64 | "\n", 65 | "---\n", 66 | "\n", 67 | "### 🔹 With Ridge Regression\n", 68 | "\n", 69 | "* Ridge says: “Fit the line, but don’t let coefficients get too extreme.”\n", 70 | "* So instead of exactly $m = 2, b = 0$, Ridge might give something like:\n", 71 | "\n", 72 | " $$\n", 73 | " y = 1.9x + 0.1\n", 74 | " $$\n", 75 | "* **Training cost > 0** (a little error).\n", 76 | "* But on noisy/unseen data, it performs **better** because weights are smaller and more stable.\n", 77 | "\n", 78 | "---\n", 79 | "\n", 80 | "✅ **Takeaway:**\n", 81 | "\n", 82 | "* **Plain regression**: memorizes perfectly → cost = 0, risk of overfit.\n", 83 | "* **Ridge regression**: allows tiny training error but improves **generalization**." 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "id": "3842bb3a", 89 | "metadata": { 90 | "vscode": { 91 | "languageId": "plaintext" 92 | } 93 | }, 94 | "source": [ 95 | "## Lasso regression \n", 96 | "Linear Regression + L1 penalty → makes the model simpler by shrinking some feature weights to zero." 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "id": "e9a57ef3", 102 | "metadata": { 103 | "vscode": { 104 | "languageId": "plaintext" 105 | } 106 | }, 107 | "source": [ 108 | "\n", 109 | "✅ Use Lasso Regression when:\n", 110 | "\n", 111 | "1. You have **many features** but only a few are truly important.\n", 112 | "2. You want the model to **automatically remove irrelevant features** (by setting their weights to zero).\n", 113 | "3. You care about a **simpler, more interpretable model**.\n", 114 | "\n", 115 | "## In short:\n", 116 | "**Use Lasso when you want prediction + feature selection at the same time.**\n" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "id": "36286806", 122 | "metadata": { 123 | "vscode": { 124 | "languageId": "plaintext" 125 | } 126 | }, 127 | "source": [ 128 | "## 📊 Example Data\n", 129 | "\n", 130 | "Suppose we want to predict a score (`y`) using 3 features:\n", 131 | "\n", 132 | "| x1 | x2 | x3 | y |\n", 133 | "| -- | -- | -- | -- |\n", 134 | "| 1 | 2 | 0 | 10 |\n", 135 | "| 2 | 4 | 0 | 20 |\n", 136 | "| 3 | 6 | 0 | 30 |\n", 137 | "| 4 | 8 | 0 | 40 |\n", 138 | "\n", 139 | "Here:\n", 140 | "\n", 141 | "* `x1` and `x2` are useful (they explain y).\n", 142 | "* `x3` is **useless** (always 0).\n", 143 | "\n", 144 | "---\n", 145 | "\n", 146 | "### 🔹 Plain Linear Regression\n", 147 | "\n", 148 | "It will try to give **some weight** to every feature, even the useless one:\n", 149 | "\n", 150 | "* $y \\approx 0.1x1 + 4.9x2 + 0.05x3$\n", 151 | "* Notice `x3` got a tiny weight (not exactly zero).\n", 152 | "\n", 153 | "---\n", 154 | "\n", 155 | "### 🔹 Lasso Regression\n", 156 | "\n", 157 | "Because of the L1 penalty, Lasso will **shrink useless features to zero**:\n", 158 | "\n", 159 | "* $y \\approx 0.2x1 + 4.8x2 + 0x3$\n", 160 | "* 👉 `x3` dropped out completely.\n", 161 | "\n", 162 | "---\n", 163 | "\n", 164 | "### ✅ Takeaway\n", 165 | "\n", 166 | "* **Plain regression**: keeps all features, even useless ones.\n", 167 | "* **Lasso regression**: keeps only important features, sets useless ones to **0**." 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "id": "5a30827a", 173 | "metadata": { 174 | "vscode": { 175 | "languageId": "plaintext" 176 | } 177 | }, 178 | "source": [ 179 | "## 🔹 What is Elastic Net Regression?\n", 180 | "\n", 181 | "Elastic Net = **Linear Regression with both Ridge (L2) and Lasso (L1) penalties**.\n", 182 | "\n", 183 | "$$\n", 184 | "J(w) = \\text{MSE} + \\lambda_1 \\sum |w_j| + \\lambda_2 \\sum w_j^2\n", 185 | "$$\n", 186 | "\n", 187 | "* **Lasso part (L1):** can set useless features to 0 (feature selection).\n", 188 | "* **Ridge part (L2):** shrinks large weights to keep them stable when features are correlated.\n", 189 | "\n", 190 | "👉 It combines the strengths of both.\n", 191 | "\n", 192 | "---\n", 193 | "\n", 194 | "## 🔹 When to Use Elastic Net\n", 195 | "\n", 196 | "✅ Use **Elastic Net** when:\n", 197 | "\n", 198 | "1. You have **many features**, some irrelevant, some correlated.\n", 199 | "2. You want **feature selection** (like Lasso) but also **stability with correlated features** (like Ridge).\n", 200 | "3. You’re not sure whether Ridge or Lasso alone is best → Elastic Net is a safer middle ground.\n", 201 | "\n", 202 | "---\n", 203 | "\n", 204 | "## 🔹 Simple Example\n", 205 | "\n", 206 | "### Data\n", 207 | "\n", 208 | "| x1 | x2 | x3 | y |\n", 209 | "| -- | -- | -- | -- |\n", 210 | "| 1 | 2 | 0 | 10 |\n", 211 | "| 2 | 4 | 0 | 20 |\n", 212 | "| 3 | 6 | 0 | 30 |\n", 213 | "| 4 | 8 | 0 | 40 |\n", 214 | "\n", 215 | "* `x1` and `x2` are correlated (x2 = 2 × x1).\n", 216 | "* `x3` is useless (always 0).\n", 217 | "\n", 218 | "---\n", 219 | "\n", 220 | "### 🔹 Plain Linear Regression\n", 221 | "\n", 222 | "* Might give unstable weights, e.g.:\n", 223 | " $y = 0 \\cdot x1 + 5 \\cdot x2 + 0.1 \\cdot x3$\n", 224 | "\n", 225 | "---\n", 226 | "\n", 227 | "### 🔹 Lasso\n", 228 | "\n", 229 | "* Drops `x3` completely, but may randomly choose between `x1` or `x2` (since they’re correlated).\n", 230 | " $y = 0 \\cdot x1 + 5 \\cdot x2 + 0 \\cdot x3$\n", 231 | "\n", 232 | "---\n", 233 | "\n", 234 | "### 🔹 Ridge\n", 235 | "\n", 236 | "* Keeps both `x1` and `x2`, shrinks them evenly, but doesn’t drop `x3`.\n", 237 | " $y = 2.4 \\cdot x1 + 2.4 \\cdot x2 + 0.05 \\cdot x3$\n", 238 | "\n", 239 | "---\n", 240 | "\n", 241 | "### 🔹 Elastic Net\n", 242 | "\n", 243 | "* Drops the useless feature (`x3` like Lasso).\n", 244 | "* Shares weights more fairly between correlated features (`x1`, `x2` like Ridge).\n", 245 | " $y = 2.3 \\cdot x1 + 2.3 \\cdot x2 + 0 \\cdot x3$\n", 246 | "\n", 247 | "---\n", 248 | "\n", 249 | "✅ **Takeaway:**\n", 250 | "\n", 251 | "* Lasso → feature selection but unstable with correlated features.\n", 252 | "* Ridge → stable with correlated features but keeps everything.\n", 253 | "* **Elastic Net → best of both: drops useless features, keeps stability with correlated ones.**" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "id": "87055fd7", 259 | "metadata": {}, 260 | "source": [ 261 | "# Ridge() Ridgecv() Lasso() Lassocv()" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "id": "ac0c3d4b", 267 | "metadata": { 268 | "vscode": { 269 | "languageId": "plaintext" 270 | } 271 | }, 272 | "source": [ 273 | "### 1. **Ridge()**\n", 274 | "\n", 275 | "* Think of it as **regular linear regression** but with a tiny rule that stops the model from giving too much importance to any single feature.\n", 276 | "* You **set a strength (`alpha`)** yourself.\n", 277 | "* Helps prevent the model from **memorizing the data** (overfitting).\n", 278 | "\n", 279 | "---\n", 280 | "\n", 281 | "### 2. **Lasso()**\n", 282 | "\n", 283 | "* Like Ridge, but **stronger**: it can actually **ignore unimportant features** by setting their weight to zero.\n", 284 | "* You **choose the strength (`alpha`)**.\n", 285 | "* Useful if you want the model to focus on the **most important features only**.\n", 286 | "\n", 287 | "---\n", 288 | "\n", 289 | "### 3. **RidgeCV()**\n", 290 | "\n", 291 | "* Same as Ridge, but smarter: it **automatically finds the best alpha** for you.\n", 292 | "* It does this by testing different alphas using **cross-validation**.\n", 293 | "* Saves you from guessing the right alpha.\n", 294 | "\n", 295 | "---\n", 296 | "\n", 297 | "### 4. **LassoCV()**\n", 298 | "\n", 299 | "* Same as Lasso, but **automatically finds the best alpha** using cross-validation.\n", 300 | "* Shrinks unimportant features to zero **and** chooses the best strength for the penalty.\n", 301 | "\n", 302 | "---\n", 303 | "\n", 304 | "✅ **In short:**\n", 305 | "\n", 306 | "| Method | Manual or Auto alpha? | Feature Selection? | What it does |\n", 307 | "| --------- | --------------------- | ------------------ | ----------------------------------------------------------------- |\n", 308 | "| Ridge() | Manual | No | Shrinks coefficients slightly to prevent overfitting |\n", 309 | "| Lasso() | Manual | Yes | Shrinks some coefficients to zero, keeps only important features |\n", 310 | "| RidgeCV() | Automatic | No | Shrinks coefficients + chooses best alpha |\n", 311 | "| LassoCV() | Automatic | Yes | Shrinks coefficients, drops unimportant ones + chooses best alpha |" 312 | ] 313 | } 314 | ], 315 | "metadata": { 316 | "language_info": { 317 | "name": "python" 318 | } 319 | }, 320 | "nbformat": 4, 321 | "nbformat_minor": 5 322 | } 323 | -------------------------------------------------------------------------------- /7-Decision Tree/final_touch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "56e236d9", 6 | "metadata": { 7 | "vscode": { 8 | "languageId": "plaintext" 9 | } 10 | }, 11 | "source": [ 12 | "## DT used For Both Classification & Regression" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "id": "b6c74522", 18 | "metadata": {}, 19 | "source": [ 20 | "**decision tree**\n", 21 | "\n", 22 | "* A **supervised learning model** (it learns from labeled training data).\n", 23 | "* It works by **splitting the dataset** into smaller and smaller groups based on feature values.\n", 24 | "* At each split (called a **node**), the algorithm chooses the feature and condition (like “Age > 30?”) that best separates the data.\n", 25 | "* This process continues until the data is pure enough (mostly one label) or until stopping rules are reached (like max depth).\n", 26 | "* The final points (called **leaves**) hold the prediction:\n", 27 | "\n", 28 | " * For **classification** → a class label (e.g., “spam” or “not spam”).\n", 29 | " * For **regression** → a numeric value (e.g., predicted house price).\n", 30 | "\n", 31 | "📌 Example in ML:\n", 32 | "\n", 33 | "* Suppose you train a decision tree to predict if a student passes an exam.\n", 34 | "* Features: hours studied, attendance, assignments completed.\n", 35 | "* The tree might learn rules like:\n", 36 | "\n", 37 | " * If hours studied > 5 → predict **Pass**.\n", 38 | " * Else if attendance < 50% → predict **Fail**.\n", 39 | " * Else → predict **Pass**.\n", 40 | "\n", 41 | "👉 In short: **A decision tree is a model that learns a set of “if-then” rules from data, and uses those rules to predict outcomes.**" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "id": "0d9b1c6d", 47 | "metadata": { 48 | "vscode": { 49 | "languageId": "plaintext" 50 | } 51 | }, 52 | "source": [ 53 | "## pure and Impure splits\n", 54 | "\n", 55 | "### Imagine this:\n", 56 | "\n", 57 | "You’re sorting fruits into boxes: 🍎 apples and 🍌 bananas.\n", 58 | "\n", 59 | "* **Pure split** → A box has only apples, or only bananas. (Everything inside is the same — nice and clean!)\n", 60 | "* **Impure split** → A box has a mix of apples and bananas. (Not clear, messy.)\n", 61 | "\n", 62 | "### In decision trees:\n", 63 | "\n", 64 | "* **Pure split** = All data in that group belongs to one outcome (e.g., all “Yes” or all “No”).\n", 65 | "* **Impure split** = The group still has a mix of outcomes.\n", 66 | "\n", 67 | "👉 The goal of the tree is to keep splitting until the boxes (groups) are as **pure** as possible — meaning each group mostly has one clear answer." 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "id": "dffe8632", 73 | "metadata": { 74 | "vscode": { 75 | "languageId": "plaintext" 76 | } 77 | }, 78 | "source": [ 79 | "# Tiny Example\n", 80 | "\n", 81 | "### Example: Students passing or failing an exam\n", 82 | "\n", 83 | "We have 6 students:\n", 84 | "\n", 85 | "| Hours studied | Result |\n", 86 | "| ------------- | ------ |\n", 87 | "| 8 | Pass |\n", 88 | "| 7 | Pass |\n", 89 | "| 6 | Pass |\n", 90 | "| 2 | Fail |\n", 91 | "| 1 | Fail |\n", 92 | "| 0 | Fail |\n", 93 | "\n", 94 | "---\n", 95 | "\n", 96 | "### Step 1: Look at all students together\n", 97 | "\n", 98 | "* Mixed results: some **Pass**, some **Fail**.\n", 99 | "* This group is **impure** (not clean).\n", 100 | "\n", 101 | "---\n", 102 | "\n", 103 | "### Step 2: Try a split → “Did the student study more than 5 hours?”\n", 104 | "\n", 105 | "* **Group 1 (Yes, >5 hrs):** Pass, Pass, Pass → All are **Pass** ✅ (Pure)\n", 106 | "* **Group 2 (No, ≤5 hrs):** Fail, Fail, Fail → All are **Fail** ✅ (Pure)\n", 107 | "\n", 108 | "Now both groups are **pure** because each box only has one result.\n", 109 | "\n", 110 | "---\n", 111 | "\n", 112 | "👉 That’s the idea:\n", 113 | "\n", 114 | "* **Impure split** = a box has both Pass and Fail.\n", 115 | "* **Pure split** = a box has only Pass or only Fail.\n", 116 | "\n", 117 | "The decision tree’s job is to keep asking smart questions until the boxes are as pure as possible." 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "id": "4512964b", 123 | "metadata": { 124 | "vscode": { 125 | "languageId": "plaintext" 126 | } 127 | }, 128 | "source": [ 129 | "# How to know pure or impure splits\n", 130 | "\n", 131 | "## **1. Entropy** – “How mixed up the labels are”\n", 132 | "\n", 133 | "* **Measures uncertainty** in a group.\n", 134 | "* **Pure group** → all examples have the same label → entropy = **0**.\n", 135 | "* **Impure group** → labels are mixed → entropy is **higher**.\n", 136 | "* **Range:**\n", 137 | "\n", 138 | " * For **binary classification** (2 classes): 0 to 1\n", 139 | "\n", 140 | " * 0 → pure (all same class)\n", 141 | " * 1 → perfectly mixed (50%-50%)\n", 142 | " * For **more than 2 classes**, the max value increases slightly (depends on number of classes).\n", 143 | "\n", 144 | "**Example (binary Pass/Fail):**\n", 145 | "\n", 146 | "* All Pass → entropy = 0 ✅\n", 147 | "* 2 Pass, 2 Fail → entropy = 1 ❌ (most impure)\n", 148 | "\n", 149 | "---\n", 150 | "\n", 151 | "## **2. Gini Index / Gini Impurity** – “Chance of picking the wrong label”\n", 152 | "\n", 153 | "* **Measures impurity** of a group.\n", 154 | "* **Pure group** → all examples have the same label → Gini = **0**.\n", 155 | "* **Impure group** → labels are mixed → Gini is **higher**.\n", 156 | "* **Range:**\n", 157 | "\n", 158 | " * Binary classification → 0 to 0.5\n", 159 | "\n", 160 | " * 0 → pure\n", 161 | " * 0.5 → perfectly mixed (50%-50%)\n", 162 | " * For multiple classes → max = 1 − 1/n\\_classes\n", 163 | "\n", 164 | "**Example (binary Pass/Fail):**\n", 165 | "\n", 166 | "* All Pass → Gini = 0 ✅\n", 167 | "* 2 Pass, 2 Fail → Gini = 0.5 ❌ (most impure)\n", 168 | "\n", 169 | "---\n", 170 | "\n", 171 | "## **3. Comparing Entropy & Gini**\n", 172 | "\n", 173 | "| Concept | Pure | Impure | Max value (binary) |\n", 174 | "| ------- | ---- | ------ | ------------------ |\n", 175 | "| Entropy | 0 | High | 1 |\n", 176 | "| Gini | 0 | High | 0.5 |\n", 177 | "\n", 178 | "* Both **tell the tree how “mixed” a group is**.\n", 179 | "* Decision tree chooses splits that **reduce impurity** → makes groups **purer**." 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "id": "91b80b68", 185 | "metadata": { 186 | "vscode": { 187 | "languageId": "plaintext" 188 | } 189 | }, 190 | "source": [ 191 | "# Information Gain\n", 192 | "## What feature you need to select to start constructing DT\n", 193 | "\n", 194 | "### Choosing a Feature to Split in Decision Tree\n", 195 | "\n", 196 | "* At each step, the tree wants to **split the data into purer groups**.\n", 197 | "* **Information Gain (IG)** tells us **how much a split reduces messiness** (impurity).\n", 198 | "* The tree looks at all features and picks the one with the **highest IG** → creates the cleanest child nodes.\n", 199 | "\n", 200 | "**Example:**\n", 201 | "\n", 202 | "* Predicting Pass/Fail:\n", 203 | "\n", 204 | " * Feature “Hours Studied” → splits into all Pass / all Fail → **high IG** ✅\n", 205 | " * Feature “Attendance” → some mixed groups → **lower IG** ❌\n", 206 | "* Tree chooses **Hours Studied** to split first.\n", 207 | "\n", 208 | "**Key idea:** **Pick the feature that makes the groups as pure as possible.**" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "id": "0d48a262", 214 | "metadata": { 215 | "vscode": { 216 | "languageId": "plaintext" 217 | } 218 | }, 219 | "source": [ 220 | "# Post pruning & Pre pruning \n", 221 | "\n", 222 | "### **1. Pre-Pruning (Stop Early)**\n", 223 | "\n", 224 | "* Done **while building the tree**.\n", 225 | "* The tree **stops growing** if a split won’t make a big difference.\n", 226 | "* Example rules:\n", 227 | "\n", 228 | " * Don’t split if the group is very small\n", 229 | " * Don’t split if the tree is already too deep\n", 230 | "* Purpose: **keep the tree simple** and avoid memorizing noise in the training data.\n", 231 | "\n", 232 | "**Think:** “Stop asking more questions once you’re confident enough.”\n", 233 | "\n", 234 | "---\n", 235 | "\n", 236 | "### **2. Post-Pruning (Cut Back Later)**\n", 237 | "\n", 238 | "* Done **after the tree is fully built**.\n", 239 | "* The tree is grown completely first, then **unnecessary branches are removed**.\n", 240 | "* Purpose: remove parts that are too specific to training data and **make the tree simpler**.\n", 241 | "\n", 242 | "**Think:** “Ask all questions first, then remove the ones that don’t really help.”\n", 243 | "\n", 244 | "---\n", 245 | "* **Pre-pruning:** Stop early to avoid overfitting\n", 246 | "* **Post-pruning:** Grow fully, then cut back to improve generalization" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "id": "71968a8a", 252 | "metadata": { 253 | "vscode": { 254 | "languageId": "plaintext" 255 | } 256 | }, 257 | "source": [ 258 | "# Varience Reduction in DTR\n", 259 | "\n", 260 | "### **1. Variance of a group**\n", 261 | "\n", 262 | "* Variance measures how **spread out the numbers** are in a group.\n", 263 | "* **Range:**\n", 264 | "\n", 265 | " * Minimum = 0 → all numbers are the same (perfectly “pure”)\n", 266 | " * Maximum = depends on the data → more spread = higher variance\n", 267 | "\n", 268 | "**Example:**\n", 269 | "\n", 270 | "* Group: 500, 480 → variance small (\\~200) ✅\n", 271 | "* Group: 500, 250 → variance large (\\~15,625) ❌\n", 272 | "\n", 273 | "---\n", 274 | "\n", 275 | "### **2. Variance Reduction**\n", 276 | "\n", 277 | "* Variance reduction = variance before split − weighted variance after split\n", 278 | "* **Range:**\n", 279 | "\n", 280 | " * Minimum = 0 → split doesn’t reduce variance (no improvement)\n", 281 | " * Maximum = variance before split → best possible split (child nodes perfectly uniform)\n", 282 | "\n", 283 | "---\n", 284 | "\n", 285 | "### ✅ **Key Idea:**\n", 286 | "\n", 287 | "* Variance = how messy the numbers are\n", 288 | "* Variance reduction = how much cleaner a split makes the numbers\n", 289 | "* Goal: pick the feature that **maximizes variance reduction**" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "id": "e1d579e1", 295 | "metadata": { 296 | "vscode": { 297 | "languageId": "plaintext" 298 | } 299 | }, 300 | "source": [ 301 | "### in simple words it is like information gain in DTC to select which feature to start split but variecne Reduction in DTR" 302 | ] 303 | } 304 | ], 305 | "metadata": { 306 | "language_info": { 307 | "name": "python" 308 | } 309 | }, 310 | "nbformat": 4, 311 | "nbformat_minor": 5 312 | } 313 | -------------------------------------------------------------------------------- /8-Random Forest/final_touch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "75266593", 6 | "metadata": {}, 7 | "source": [ 8 | "### 🌳 What is Random Forest?\n", 9 | "\n", 10 | "A **Random Forest** is like a **team of Decision Trees** working together.\n", 11 | "\n", 12 | "* A single decision tree can make predictions, but it might **overfit** (memorize training data too much).\n", 13 | "* A Random Forest builds **many decision trees** on random parts of the data and then **combines their answers**.\n", 14 | "\n", 15 | "It’s called a “forest” because it’s just a **collection of many trees**.\n", 16 | "\n", 17 | "---\n", 18 | "\n", 19 | "### ✅ How it works (step by step)\n", 20 | "\n", 21 | "1. **Take the dataset**\n", 22 | " Example: Predict if a customer will buy a product based on **Age**, **Income**, and **Location**.\n", 23 | "\n", 24 | "2. **Make random samples**\n", 25 | " Randomly pick rows from the dataset to create different training sets (with replacement).\n", 26 | "\n", 27 | "3. **Grow multiple trees**\n", 28 | " Train a decision tree on each random dataset.\n", 29 | "\n", 30 | " * Tree 1 might say “Yes” based on Age.\n", 31 | " * Tree 2 might say “No” based on Income.\n", 32 | " * Tree 3 might say “Yes” based on Location.\n", 33 | "\n", 34 | "4. **Make a prediction**\n", 35 | " When a new customer comes in:\n", 36 | "\n", 37 | " * Each tree gives its prediction.\n", 38 | " * The forest combines them:\n", 39 | "\n", 40 | " * **For classification** → majority vote (e.g., 7 trees say “Yes,” 3 say “No” → final = “Yes”).\n", 41 | " * **For regression** → average of all predictions.\n", 42 | "\n", 43 | "---\n", 44 | "\n", 45 | "### 📘 Small Example\n", 46 | "\n", 47 | "Predict house prices:\n", 48 | "\n", 49 | "* Tree 1 → \\$200k\n", 50 | "* Tree 2 → \\$220k\n", 51 | "* Tree 3 → \\$210k\n", 52 | " **Final prediction = average = \\$210k**\n", 53 | "\n", 54 | "Predict if an email is spam:\n", 55 | "\n", 56 | "* Tree 1 → Spam\n", 57 | "* Tree 2 → Not Spam\n", 58 | "* Tree 3 → Spam\n", 59 | " **Final prediction = Spam (majority vote)**\n", 60 | "\n", 61 | "---\n", 62 | "\n", 63 | "### 🔑 In ML terms (still simple):\n", 64 | "\n", 65 | "* Random Forest = **ensemble of decision trees**.\n", 66 | "* Built using **bagging** (random subsets + multiple models).\n", 67 | "* **Strong points:** Accurate, handles many features, reduces overfitting, works well for both classification and regression.\n", 68 | "* **Weak points:** Can be slower with very large datasets, less interpretable than a single tree.\n", 69 | "\n", 70 | "---\n", 71 | "\n", 72 | "👉 In short:\n", 73 | "**Random Forest = many trees working together → better, more reliable predictions.**" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "id": "caefaf63", 79 | "metadata": { 80 | "vscode": { 81 | "languageId": "plaintext" 82 | } 83 | }, 84 | "source": [ 85 | "# Base Learner & Weak Learner\n", 86 | "\n", 87 | "### 🌱 Base Learner\n", 88 | "\n", 89 | "* A **base learner** is the starting model we use in an ensemble.\n", 90 | "* It can be **any ML algorithm**: a decision tree, logistic regression, SVM, etc.\n", 91 | "* Example: In a Random Forest, the **base learner** is a **decision tree**.\n", 92 | "\n", 93 | "Think of it as the **building block** of an ensemble.\n", 94 | "\n", 95 | "---\n", 96 | "\n", 97 | "### 🌱 Weak Learner\n", 98 | "\n", 99 | "* A **weak learner** is a simple model that performs **just slightly better than random guessing**.\n", 100 | "* Example:\n", 101 | "\n", 102 | " * For classification with 2 classes → better than 50% accuracy.\n", 103 | " * For regression → explains a little bit of the data pattern, but not perfectly.\n", 104 | "* In Boosting, we often use **very shallow decision trees (stumps)** as weak learners.\n", 105 | "\n", 106 | "Think of it as a **tiny model that alone isn’t strong**, but when combined with others, it becomes powerful.\n", 107 | "\n", 108 | "### 📘 Example\n", 109 | "\n", 110 | "Let’s say we’re predicting if an email is spam.\n", 111 | "\n", 112 | "* **Base learner:** Decision Tree.\n", 113 | "* If the tree is very deep → it may be strong.\n", 114 | "* If the tree is very shallow (only 1 or 2 splits) → it’s a **weak learner**.\n", 115 | "\n", 116 | "In **Boosting**, we purposely use weak learners (shallow trees) and combine them sequentially to build a strong model.\n", 117 | "\n", 118 | "---\n", 119 | "\n", 120 | "👉 In short:\n", 121 | "\n", 122 | "* **Base learner = any model inside an ensemble.**\n", 123 | "* **Weak learner = a base learner that is simple and only slightly better than guessing.**" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "id": "87c8da08", 129 | "metadata": { 130 | "vscode": { 131 | "languageId": "plaintext" 132 | } 133 | }, 134 | "source": [ 135 | "# key terminologies Explained\n", 136 | "\n", 137 | "### 🌱 Row Sampling\n", 138 | "\n", 139 | "* Also called **sample sampling** or **bootstrap sampling**.\n", 140 | "* Means: instead of using the **entire dataset**, we randomly select some rows (examples/data points) to train a model.\n", 141 | "* Example:\n", 142 | "\n", 143 | " * Dataset has 1,000 rows.\n", 144 | " * For one tree in a Random Forest, we randomly pick 700 rows (with replacement).\n", 145 | " * Each tree gets a slightly different dataset.\n", 146 | "\n", 147 | "👉 **Why?**\n", 148 | "\n", 149 | "* Makes models in the ensemble see different parts of the data.\n", 150 | "* Helps reduce **variance** and avoid overfitting.\n", 151 | "\n", 152 | "---\n", 153 | "\n", 154 | "### 🌱 Feature Sampling\n", 155 | "\n", 156 | "* Also called **column sampling**.\n", 157 | "* Means: instead of using **all features** (columns), the model only looks at a **random subset of features** when splitting nodes.\n", 158 | "* Example:\n", 159 | "\n", 160 | " * Dataset has 10 features (age, income, location, etc.).\n", 161 | " * For a split in one tree, the algorithm randomly picks only 3 features to consider.\n", 162 | "\n", 163 | "👉 **Why?**\n", 164 | "\n", 165 | "* Prevents all trees from looking the same (correlation).\n", 166 | "* Increases diversity among trees, which improves the forest’s performance.\n", 167 | "\n", 168 | "---\n", 169 | "\n", 170 | "### 📘 Example with Random Forest\n", 171 | "\n", 172 | "* **Row sampling:** Each tree is trained on a random selection of rows.\n", 173 | "* **Feature sampling:** At each split, the tree only looks at a random subset of features.\n", 174 | "\n", 175 | "This randomness is why Random Forest is called *random*.\n", 176 | "\n", 177 | "---\n", 178 | "\n", 179 | "👉 In ML terms (without jargon):\n", 180 | "\n", 181 | "* **Row sampling = pick random data points for training each model.**\n", 182 | "* **Feature sampling = pick random features for splits.**\n", 183 | "* Together, they make ensembles (like Random Forest) stronger and more robust." 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "id": "cda0d6c4", 189 | "metadata": { 190 | "vscode": { 191 | "languageId": "plaintext" 192 | } 193 | }, 194 | "source": [ 195 | "### 🌱 Row Replacement Sampling\n", 196 | "\n", 197 | "* Also called **sampling with replacement**.\n", 198 | "* When creating a new training dataset (like for each tree in a Random Forest), we pick rows **randomly with replacement**.\n", 199 | "* This means:\n", 200 | "\n", 201 | " * A row can be chosen **more than once**.\n", 202 | " * Some rows might **not be chosen at all**.\n", 203 | "\n", 204 | "**Example:**\n", 205 | "Dataset has 5 rows: \\[A, B, C, D, E]\n", 206 | "\n", 207 | "* After sampling with replacement, one new dataset could be \\[B, C, A, C, E]\n", 208 | "\n", 209 | " * Row C appears twice.\n", 210 | " * Row D is missing.\n", 211 | "\n", 212 | "👉 This is what happens in **bagging** (bootstrap aggregating).\n", 213 | "\n", 214 | "---\n", 215 | "\n", 216 | "### 🌱 Feature Replacement Sampling\n", 217 | "\n", 218 | "* Similar idea, but for **features (columns)** instead of rows.\n", 219 | "* At each split in a tree, the algorithm randomly selects a subset of features **with replacement**.\n", 220 | "* This means:\n", 221 | "\n", 222 | " * A feature might be considered more than once.\n", 223 | " * Some features may not be considered at that split.\n", 224 | "\n", 225 | "**Example:**\n", 226 | "Features: \\[Age, Income, Location, Education]\n", 227 | "\n", 228 | "* At a split, the algorithm might randomly choose \\[Age, Age, Income].\n", 229 | "* So the decision tree only considers these features when deciding the best split.\n", 230 | "\n", 231 | "👉 This randomness helps keep trees **diverse**, so the forest doesn’t become too similar.\n", 232 | "\n", 233 | "---\n", 234 | "\n", 235 | "### ✅ In ML terms (kept simple):\n", 236 | "\n", 237 | "* **Row replacement sampling = pick rows randomly with replacement → makes each tree see a different dataset.**\n", 238 | "* **Feature replacement sampling = pick features randomly with replacement → makes each tree split differently.**" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "id": "dfd6c30f", 244 | "metadata": { 245 | "vscode": { 246 | "languageId": "plaintext" 247 | } 248 | }, 249 | "source": [ 250 | "### 🌱 Parallel Building\n", 251 | "\n", 252 | "* Models are built **at the same time (independently)**.\n", 253 | "* Each model doesn’t care what the others are doing.\n", 254 | "* After all are trained, their predictions are **combined** (average or vote).\n", 255 | "\n", 256 | "**Example → Bagging / Random Forest**\n", 257 | "\n", 258 | "* Many decision trees are trained in parallel on different random subsets.\n", 259 | "* At the end, results are averaged (regression) or majority vote (classification).\n", 260 | "\n", 261 | "👉 **Effect in ML terms:** Reduces **variance** (less overfitting).\n", 262 | "\n", 263 | "---\n", 264 | "\n", 265 | "### 🌱 Sequential Building\n", 266 | "\n", 267 | "* Models are built **one after another**.\n", 268 | "* Each new model **learns from the mistakes** of the previous one.\n", 269 | "* Final prediction = combination of all models (weighted).\n", 270 | "\n", 271 | "**Example → Boosting (AdaBoost, Gradient Boosting, XGBoost)**\n", 272 | "\n", 273 | "* First tree predicts.\n", 274 | "* Next tree focuses on the errors made by the first.\n", 275 | "* Next tree fixes what’s still wrong, and so on.\n", 276 | "\n", 277 | "👉 **Effect in ML terms:** Reduces **bias** (turns weak learners into strong ones).\n", 278 | "\n", 279 | "---\n", 280 | "\n", 281 | "### ✅ Simple Analogy\n", 282 | "\n", 283 | "* **Parallel (Bagging):** A group of students solve the same problem separately, then the teacher takes the majority answer.\n", 284 | "* **Sequential (Boosting):** One student solves first, the next improves their solution, the next improves it further, until it’s very accurate.\n", 285 | "\n", 286 | "---\n", 287 | "\n", 288 | "👉 In short:\n", 289 | "\n", 290 | "* **Parallel = independent models → combine at the end.**\n", 291 | "* **Sequential = dependent models → each fixes the last one’s errors.**" 292 | ] 293 | } 294 | ], 295 | "metadata": { 296 | "language_info": { 297 | "name": "python" 298 | } 299 | }, 300 | "nbformat": 4, 301 | "nbformat_minor": 5 302 | } 303 | -------------------------------------------------------------------------------- /1-Complete Linear Regression/Algerian_forest_fires_dataset_UPDATE.csv: -------------------------------------------------------------------------------- 1 | Bejaia Region Dataset 2 | day,month,year,Temperature, RH, Ws,Rain ,FFMC,DMC,DC,ISI,BUI,FWI,Classes 3 | 01,06,2012,29,57,18,0,65.7,3.4,7.6,1.3,3.4,0.5,not fire 4 | 02,06,2012,29,61,13,1.3,64.4,4.1,7.6,1,3.9,0.4,not fire 5 | 03,06,2012,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,not fire 6 | 04,06,2012,25,89,13,2.5,28.6,1.3,6.9,0,1.7,0,not fire 7 | 05,06,2012,27,77,16,0,64.8,3,14.2,1.2,3.9,0.5,not fire 8 | 06,06,2012,31,67,14,0,82.6,5.8,22.2,3.1,7,2.5,fire 9 | 07,06,2012,33,54,13,0,88.2,9.9,30.5,6.4,10.9,7.2,fire 10 | 08,06,2012,30,73,15,0,86.6,12.1,38.3,5.6,13.5,7.1,fire 11 | 09,06,2012,25,88,13,0.2,52.9,7.9,38.8,0.4,10.5,0.3,not fire 12 | 10,06,2012,28,79,12,0,73.2,9.5,46.3,1.3,12.6,0.9,not fire 13 | 11,06,2012,31,65,14,0,84.5,12.5,54.3,4,15.8,5.6,fire 14 | 12,06,2012,26,81,19,0,84,13.8,61.4,4.8,17.7,7.1 ,fire 15 | 13,06,2012,27,84,21,1.2,50,6.7,17,0.5,6.7,0.2,not fire 16 | 14,06,2012,30,78,20,0.5,59,4.6,7.8,1,4.4,0.4,not fire 17 | 15,06,2012,28,80,17,3.1,49.4,3,7.4,0.4,3,0.1,not fire 18 | 16,06,2012,29,89,13,0.7,36.1,1.7,7.6,0,2.2,0,not fire 19 | 17,06,2012,30,89,16,0.6,37.3,1.1,7.8,0,1.6,0,not fire 20 | 18,06,2012,31,78,14,0.3,56.9,1.9,8,0.7,2.4,0.2,not fire 21 | 19,06,2012,31,55,16,0.1,79.9,4.5,16,2.5,5.3,1.4,not fire 22 | 20,06,2012,30,80,16,0.4,59.8,3.4,27.1,0.9,5.1,0.4,not fire 23 | 21,06,2012,30,78,14,0,81,6.3,31.6,2.6,8.4,2.2,fire 24 | 22,06,2012,31,67,17,0.1,79.1,7,39.5,2.4,9.7,2.3,not fire 25 | 23,06,2012,32,62,18,0.1,81.4,8.2,47.7,3.3,11.5,3.8,fire 26 | 24,06,2012,32,66,17,0,85.9,11.2,55.8,5.6,14.9,7.5,fire 27 | 25,06,2012,31,64,15,0,86.7,14.2,63.8,5.7,18.3,8.4,fire 28 | 26,06,2012,31,64,18,0,86.8,17.8,71.8,6.7,21.6,10.6,fire 29 | 27,06,2012,34,53,18,0,89,21.6,80.3,9.2,25.8,15,fire 30 | 28,06,2012,32,55,14,0,89.1,25.5,88.5,7.6,29.7,13.9,fire 31 | 29,06,2012,32,47,13,0.3,79.9,18.4,84.4,2.2,23.8,3.9,not fire 32 | 30,06,2012,33,50,14,0,88.7,22.9,92.8,7.2,28.3,12.9,fire 33 | 01,07,2012,29,68,19,1,59.9,2.5,8.6,1.1,2.9,0.4,not fire 34 | 02,07,2012,27,75,19,1.2,55.7,2.4,8.3,0.8,2.8,0.3,not fire 35 | 03,07,2012,32,76,20,0.7,63.1,2.6,9.2,1.3,3,0.5,not fire 36 | 04,07,2012,33,78,17,0,80.1,4.6,18.5,2.7,5.7,1.7,not fire 37 | 05,07,2012,33,66,14,0,85.9,7.6,27.9,4.8,9.1,4.9,fire 38 | 06,07,2012,32,63,14,0,87,10.9,37,5.6,12.5,6.8,fire 39 | 07,07,2012,35,64,18,0.2,80,9.7,40.4,2.8,12.1,3.2,not fire 40 | 08,07,2012,33,68,19,0,85.6,12.5,49.8,6,15.4,8,fire 41 | 09,07,2012,32,68,14,1.4,66.6,7.7,9.2,1.1,7.4,0.6,not fire 42 | 10,07,2012,33,69,13,0.7,66.6,6,9.3,1.1,5.8,0.5,not fire 43 | 11,07,2012,33,76,14,0,81.1,8.1,18.7,2.6,8.1,2.2,not fire 44 | 12,07,2012,31,75,13,0.1,75.1,7.9,27.7,1.5,9.2,0.9,not fire 45 | 13,07,2012,34,81,15,0,81.8,9.7,37.2,3,11.7,3.4,not fire 46 | 14,07,2012,34,61,13,0.6,73.9,7.8,22.9,1.4,8.4,0.8,not fire 47 | 15,07,2012,30,80,19,0.4,60.7,5.2,17,1.1,5.9,0.5,not fire 48 | 16,07,2012,28,76,21,0,72.6,7,25.5,0.7,8.3,0.4,not fire 49 | 17,07,2012,29,70,14,0,82.8,9.4,34.1,3.2,11.1,3.6,fire 50 | 18,07,2012,31,68,14,0,85.4,12.1,43.1,4.6,14.2,6,fire 51 | 19,07,2012,35,59,17,0,88.1,12,52.8,7.7,18.2,10.9,fire 52 | 20,07,2012,33,65,15,0.1,81.4,12.3,62.1,2.8,16.5,4,fire 53 | 21,07,2012,33,70,17,0,85.4,18.5,71.5,5.2,22.4,8.8,fire 54 | 22,07,2012,28,79,18,0.1,73.4,16.4,79.9,1.8,21.7,2.8,not fire 55 | 23,07,2012,27,66,22,0.4,68.2,10.5,71.3,1.8,15.4,2.1,not fire 56 | 24,07,2012,28,78,16,0.1,70,9.6,79.7,1.4,14.7,1.3,not fire 57 | 25,07,2012,31,65,18,0,84.3,12.5,88.7,4.8,18.5,7.3,fire 58 | 26,07,2012,36,53,19,0,89.2,17.1,98.6,10,23.9,15.3,fire 59 | 27,07,2012,36,48,13,0,90.3,22.2,108.5,8.7,29.4,15.3,fire 60 | 28,07,2012,33,76,15,0,86.5,24.4,117.8,5.6,32.1,11.3,fire 61 | 29,07,2012,32,73,15,0,86.6,26.7,127,5.6,35,11.9,fire 62 | 30,07,2012,31,79,15,0,85.4,28.5,136,4.7,37.4,10.7,fire 63 | 31,07,2012,35,64,17,0,87.2,31.9,145.7,6.8,41.2,15.7,fire 64 | 01,08,2012,36,45,14,0,78.8,4.8,10.2,2,4.7,0.9,not fire 65 | 02,08,2012,35,55,12,0.4,78,5.8,10,1.7,5.5,0.8,not fire 66 | 03,08,2012,35,63,14,0.3,76.6,5.7,10,1.7,5.5,0.8,not fire 67 | 04,08,2012,34,69,13,0,85,8.2,19.8,4,8.2,3.9,fire 68 | 05,08,2012,34,65,13,0,86.8,11.1,29.7,5.2,11.5,6.1,fire 69 | 06,08,2012,32,75,14,0,86.4,13,39.1,5.2,14.2,6.8,fire 70 | 07,08,2012,32,69,16,0,86.5,15.5,48.6,5.5,17.2,8,fire 71 | 08,08,2012,32,60,18,0.3,77.1,11.3,47,2.2,14.1,2.6,not fire 72 | 09,08,2012,35,59,17,0,87.4,14.8,57,6.9,17.9,9.9,fire 73 | 10,08,2012,35,55,14,0,88.9,18.6,67,7.4,21.9,11.6,fire 74 | 11,08,2012,35,63,13,0,88.9,21.7,77,7.1,25.5,12.1,fire 75 | 12,08,2012,35,51,13,0.3,81.3,15.6,75.1,2.5,20.7,4.2,not fire 76 | 13,08,2012,35,63,15,0,87,19,85.1,5.9,24.4,10.2,fire 77 | 14,08,2012,33,66,14,0,87,21.7,94.7,5.7,27.2,10.6,fire 78 | 15,08,2012,36,55,13,0.3,82.4,15.6,92.5,3.7,22,6.3,fire 79 | 16,08,2012,36,61,18,0.3,80.2,11.7,90.4,2.8,17.6,4.2,fire 80 | 17,08,2012,37,52,18,0,89.3,16,100.7,9.7,22.9,14.6,fire 81 | 18,08,2012,36,54,18,0,89.4,20,110.9,9.7,27.5,16.1,fire 82 | 19,08,2012,35,62,19,0,89.4,23.2,120.9,9.7,31.3,17.2,fire 83 | 20,08,2012,35,68,19,0,88.3,25.9,130.6,8.8,34.7,16.8,fire 84 | 21,08,2012,36,58,19,0,88.6,29.6,141.1,9.2,38.8,18.4,fire 85 | 22,08,2012,36,55,18,0,89.1,33.5,151.3,9.9,43.1,20.4,fire 86 | 23,08,2012,36,53,16,0,89.5,37.6,161.5,10.4,47.5,22.3,fire 87 | 24,08,2012,34,64,14,0,88.9,40.5,171.3,9,50.9,20.9,fire 88 | 25,08,2012,35,60,15,0,88.9,43.9,181.3,8.2,54.7,20.3,fire 89 | 26,08,2012,31,78,18,0,85.8,45.6,190.6,4.7,57.1,13.7,fire 90 | 27,08,2012,33,82,21,0,84.9,47,200.2,4.4,59.3,13.2,fire 91 | 28,08,2012,34,64,16,0,89.4,50.2,210.4,7.3,62.9,19.9,fire 92 | 29,08,2012,35,48,18,0,90.1,54.2,220.4,12.5,67.4,30.2,fire 93 | 30,08,2012,35,70,17,0.8,72.7,25.2,180.4,1.7,37.4,4.2,not fire 94 | 31,08,2012,28,80,21,16.8,52.5,8.7,8.7,0.6,8.3,0.3,not fire 95 | 01,09,2012,25,76,17,7.2,46,1.3,7.5,0.2,1.8,0.1,not fire 96 | 02,09,2012,22,86,15,10.1,30.5,0.7,7,0,1.1,0,not fire 97 | 03,09,2012,25,78,15,3.8,42.6,1.2,7.5,0.1,1.7,0,not fire 98 | 04,09,2012,29,73,17,0.1,68.4,1.9,15.7,1.4,2.9,0.5,not fire 99 | 05,09,2012,29,75,16,0,80.8,3.4,24,2.8,5.1,1.7,fire 100 | 06,09,2012,29,74,19,0.1,75.8,3.6,32.2,2.1,5.6,0.9,not fire 101 | 07,09,2012,31,71,17,0.3,69.6,3.2,30.1,1.5,5.1,0.6,not fire 102 | 08,09,2012,30,73,17,0.9,62,2.6,8.4,1.1,3,0.4,not fire 103 | 09,09,2012,30,77,15,1,56.1,2.1,8.4,0.7,2.6,0.2,not fire 104 | 10,09,2012,33,73,12,1.8,59.9,2.2,8.9,0.7,2.7,0.3,not fire 105 | 11,09,2012,30,77,21,1.8,58.5,1.9,8.4,1.1,2.4,0.3,not fire 106 | 12,09,2012,29,88,13,0,71,2.6,16.6,1.2,3.7,0.5,not fire 107 | 13,09,2012,25,86,21,4.6,40.9,1.3,7.5,0.1,1.8,0,not fire 108 | 14,09,2012,22,76,26,8.3,47.4,1.1,7,0.4,1.6,0.1,not fire 109 | 15,09,2012,24,82,15,0.4,44.9,0.9,7.3,0.2,1.4,0,not fire 110 | 16,09,2012,30,65,14,0,78.1,3.2,15.7,1.9,4.2,0.8,not fire 111 | 17,09,2012,31,52,14,0,87.7,6.4,24.3,6.2,7.7,5.9,fire 112 | 18,09,2012,32,49,11,0,89.4,9.8,33.1,6.8,11.3,7.7,fire 113 | 19,09,2012,29,57,14,0,89.3,12.5,41.3,7.8,14.2,9.7,fire 114 | 20,09,2012,28,84,18,0,83.8,13.5,49.3,4.5,16,6.3,fire 115 | 21,09,2012,31,55,11,0,87.8,16.5,57.9,5.4,19.2,8.3,fire 116 | 22,09,2012,31,50,19,0.6,77.8,10.6,41.4,2.4,12.9,2.8,not fire 117 | 23,09,2012,32,54,11,0.5,73.7,7.9,30.4,1.2,9.6,0.7,not fire 118 | 24,09,2012,29,65,19,0.6,68.3,5.5,15.2,1.5,5.8,0.7,not fire 119 | 25,09,2012,26,81,21,5.8,48.6,3,7.7,0.4,3,0.1,not fire 120 | 26,09,2012,31,54,11,0,82,6,16.3,2.5,6.2,1.7,not fire 121 | 27,09,2012,31,66,11,0,85.7,8.3,24.9,4,9,4.1,fire 122 | 28,09,2012,32,47,14,0.7,77.5,7.1,8.8,1.8,6.8,0.9,not fire 123 | 29,09,2012,26,80,16,1.8,47.4,2.9,7.7,0.3,3,0.1,not fire 124 | 30,09,2012,25,78,14,1.4,45,1.9,7.5,0.2,2.4,0.1,not fire 125 | 126 | Sidi-Bel Abbes Region Dataset 127 | day,month,year,Temperature, RH, Ws,Rain ,FFMC,DMC,DC,ISI,BUI,FWI,Classes 128 | 01,06,2012,32,71,12,0.7,57.1,2.5,8.2,0.6,2.8,0.2,not fire 129 | 02,06,2012,30,73,13,4,55.7,2.7,7.8,0.6,2.9,0.2,not fire 130 | 03,06,2012,29,80,14,2,48.7,2.2,7.6,0.3,2.6,0.1,not fire 131 | 04,06,2012,30,64,14,0,79.4,5.2,15.4,2.2,5.6,1,not fire 132 | 05,06,2012,32,60,14,0.2,77.1,6,17.6,1.8,6.5,0.9,not fire 133 | 06,06,2012,35,54,11,0.1,83.7,8.4,26.3,3.1,9.3,3.1,fire 134 | 07,06,2012,35,44,17,0.2,85.6,9.9,28.9,5.4,10.7,6,fire 135 | 08,06,2012,28,51,17,1.3,71.4,7.7,7.4,1.5,7.3,0.8,not fire 136 | 09,06,2012,27,59,18,0.1,78.1,8.5,14.7,2.4,8.3,1.9,not fire 137 | 10,06,2012,30,41,15,0,89.4,13.3,22.5,8.4,13.1,10,fire 138 | 11,06,2012,31,42,21,0,90.6,18.2,30.5,13.4,18,16.7,fire 139 | 12,06,2012,27,58,17,0,88.9,21.3,37.8,8.7,21.2,12.9,fire 140 | 13,06,2012,30,52,15,2,72.3,11.4,7.8,1.4,10.9,0.9,not fire 141 | 14,06,2012,27,79,16,0.7,53.4,6.4,7.3,0.5,6.1,0.3,not fire 142 | 15,06,2012,28,90,15,0,66.8,7.2,14.7,1.2,7.1,0.6,not fire 143 | 16,06,2012,29,87,15,0.4,47.4,4.2,8,0.2,4.1,0.1,not fire 144 | 17,06,2012,31,69,17,4.7,62.2,3.9,8,1.1,3.8,0.4,not fire 145 | 18,06,2012,33,62,10,8.7,65.5,4.6,8.3,0.9,4.4,0.4,not fire 146 | 19,06,2012,32,67,14,4.5,64.6,4.4,8.2,1,4.2,0.4,not fire 147 | 20,06,2012,31,72,14,0.2,60.2,3.8,8,0.8,3.7,0.3,not fire 148 | 21,06,2012,32,55,14,0,86.2,8.3,18.4,5,8.2,4.9,fire 149 | 22,06,2012,33,46,14,1.1,78.3,8.1,8.3,1.9,7.7,1.2,not fire 150 | 23,06,2012,33,59,16,0.8,74.2,7,8.3,1.6,6.7,0.8,not fire 151 | 24,06,2012,35,68,16,0,85.3,10,17,4.9,9.9,5.3,fire 152 | 25,06,2012,34,70,16,0,86,12.8,25.6,5.4,12.7,6.7,fire 153 | 26,06,2012,36,62,16,0,87.8,16.5,34.5,7,16.4,9.5,fire 154 | 27,06,2012,36,55,15,0,89.1,20.9,43.3,8,20.8,12,fire 155 | 28,06,2012,37,37,13,0,92.5,27.2,52.4,11.7,27.1,18.4,fire 156 | 29,06,2012,37,36,13,0.6,86.2,17.9,36.7,4.8,17.8,7.2,fire 157 | 30,06,2012,34,42,15,1.7,79.7,12,8.5,2.2,11.5,2.2,not fire 158 | 01,07,2012,28,58,18,2.2,63.7,3.2,8.5,1.2,3.3,0.5,not fire 159 | 02,07,2012,33,48,16,0,87.6,7.9,17.8,6.8,7.8,6.4,fire 160 | 03,07,2012,34,56,17,0.1,84.7,9.7,27.3,4.7,10.3,5.2,fire 161 | 04,07,2012,34,58,18,0,88,13.6,36.8,8,14.1,9.9,fire 162 | 05,07,2012,34,45,18,0,90.5,18.7,46.4,11.3,18.7,15,fire 163 | 06,07,2012,35,42,15,0.3,84.7,15.5,45.1,4.3,16.7,6.3,fire 164 | 07,07,2012,38,43,13,0.5,85,13,35.4,4.1,13.7,5.2,fire 165 | 08,07,2012,35,47,18,6,80.8,9.8,9.7,3.1,9.4,3,fire 166 | 09,07,2012,36,43,15,1.9,82.3,9.4,9.9,3.2,9,3.1,fire 167 | 10,07,2012,34,51,16,3.8,77.5,8,9.5,2,7.7,1.3,not fire 168 | 11,07,2012,34,56,15,2.9,74.8,7.1,9.5,1.6,6.8,0.8,not fire 169 | 12,07,2012,36,44,13,0,90.1,12.6,19.4,8.3,12.5,9.6,fire 170 | 13,07,2012,39,45,13,0.6,85.2,11.3,10.4,4.2,10.9,4.7,fire 171 | 14,07,2012,37,37,18,0.2,88.9,12.9,14.6 9,12.5,10.4,fire 172 | 15,07,2012,34,45,17,0,90.5,18,24.1,10.9,17.7,14.1,fire 173 | 16,07,2012,31,83,17,0,84.5,19.4,33.1,4.7,19.2,7.3,fire 174 | 17,07,2012,32,81,17,0,84.6,21.1,42.3,4.7,20.9,7.7,fire 175 | 18,07,2012,33,68,15,0,86.1,23.9,51.6,5.2,23.9,9.1,fire 176 | 19,07,2012,34,58,16,0,88.1,27.8,61.1,7.3,27.7,13,fire 177 | 20,07,2012,36,50,16,0,89.9,32.7,71,9.5,32.6,17.3,fire 178 | 21,07,2012,36,29,18,0,93.9,39.6,80.6,18.5,39.5,30,fire 179 | 22,07,2012,32,48,18,0,91.5,44.2,90.1,13.2,44,25.4,fire 180 | 23,07,2012,31,71,17,0,87.3,46.6,99,6.9,46.5,16.3,fire 181 | 24,07,2012,33,63,17,1.1,72.8,20.9,56.6,1.6,21.7,2.5,not fire 182 | 25,07,2012,39,64,9,1.2,73.8,11.7,15.9,1.1,11.4,0.7,not fire 183 | 26,07,2012,35,58,10,0.2,78.3,10.8,19.7,1.6,10.7,1,not fire 184 | 27,07,2012,29,87,18,0,80,11.8,28.3,2.8,11.8,3.2,not fire 185 | 28,07,2012,33,57,16,0,87.5,15.7,37.6,6.7,15.7,9,fire 186 | 29,07,2012,34,59,16,0,88.1,19.5,47.2,7.4,19.5,10.9,fire 187 | 30,07,2012,36,56,16,0,88.9,23.8,57.1,8.2,23.8,13.2,fire 188 | 31,07,2012,37,55,15,0,89.3,28.3,67.2,8.3,28.3,14.5,fire 189 | 01,08,2012,38,52,14,0,78.3,4.4,10.5,2,4.4,0.8,not fire 190 | 02,08,2012,40,34,14,0,93.3,10.8,21.4,13.8,10.6,13.5,fire 191 | 03,08,2012,39,33,17,0,93.7,17.1,32.1,17.2,16.9,19.5,fire 192 | 04,08,2012,38,35,15,0,93.8,23,42.7,15.7,22.9,20.9,fire 193 | 05,08,2012,34,42,17,0.1,88.3,23.6,52.5,19,23.5,12.6,fire 194 | 06,08,2012,30,54,14,3.1,70.5,11,9.1,1.3,10.5,0.8,not fire 195 | 07,08,2012,34,63,13,2.9,69.7,7.2,9.8,1.2,6.9,0.6,not fire 196 | 08,08,2012,37,56,11,0,87.4,11.2,20.2,5.2,11,5.9,fire 197 | 09,08,2012,39,43,12,0,91.7,16.5,30.9,9.6,16.4,12.7,fire 198 | 10,08,2012,39,39,15,0.2,89.3,15.8,35.4,8.2,15.8,10.7,fire 199 | 11,08,2012,40,31,15,0,94.2,22.5,46.3,16.6,22.4,21.6,fire 200 | 12,08,2012,39,21,17,0.4,93,18.4,41.5,15.5,18.4,18.8,fire 201 | 13,08,2012,35,34,16,0.2,88.3,16.9,45.1,7.5,17.5,10.5,fire 202 | 14,08,2012,37,40,13,0,91.9,22.3,55.5,10.8,22.3,15.7,fire 203 | 15,08,2012,35,46,13,0.3,83.9,16.9,54.2,3.5,19,5.5,fire 204 | 16,08,2012,40,41,10,0.1,92,22.6,65.1,9.5,24.2,14.8,fire 205 | 17,08,2012,42,24,9,0,96,30.3,76.4,15.7,30.4,24,fire 206 | 18,08,2012,37,37,14,0,94.3,35.9,86.8,16,35.9,26.3,fire 207 | 19,08,2012,35,66,15,0.1,82.7,32.7,96.8,3.3,35.5,7.7,fire 208 | 20,08,2012,36,81,15,0,83.7,34.4,107,3.8,38.1,9,fire 209 | 21,08,2012,36,71,15,0,86,36.9,117.1,5.1,41.3,12.2,fire 210 | 22,08,2012,37,53,14,0,89.5,41.1,127.5,8,45.5,18.1,fire 211 | 23,08,2012,36,43,16,0,91.2,46.1,137.7,11.5,50.2,24.5,fire 212 | 24,08,2012,35,38,15,0,92.1,51.3,147.7,12.2,54.9,26.9,fire 213 | 25,08,2012,34,40,18,0,92.1,56.3,157.5,14.3,59.5,31.1,fire 214 | 26,08,2012,33,37,16,0,92.2,61.3,167.2,13.1,64,30.3,fire 215 | 27,08,2012,36,54,14,0,91,65.9,177.3,10,68,26.1,fire 216 | 28,08,2012,35,56,14,0.4,79.2,37,166,2.1,30.6,6.1,not fire 217 | 29,08,2012,35,53,17,0.5,80.2,20.7,149.2,2.7,30.6,5.9,fire 218 | 30,08,2012,34,49,15,0,89.2,24.8,159.1,8.1,35.7,16,fire 219 | 31,08,2012,30,59,19,0,89.1,27.8,168.2,9.8,39.3,19.4,fire 220 | 01,09,2012,29,86,16,0,37.9,0.9,8.2,0.1,1.4,0,not fire 221 | 02,09,2012,28,67,19,0,75.4,2.9,16.3,2,4,0.8,not fire 222 | 03,09,2012,28,75,16,0,82.2,4.4,24.3,3.3,6,2.5,fire 223 | 04,09,2012,30,66,15,0.2,73.5,4.1,26.6,1.5,6,0.7,not fire 224 | 05,09,2012,30,58,12,4.1,66.1,4,8.4,1,3.9,0.4,not fire 225 | 06,09,2012,34,71,14,6.5,64.5,3.3,9.1,1,3.5,0.4,not fire 226 | 07,09,2012,31,62,15,0,83.3,5.8,17.7,3.8,6.4,3.2,fire 227 | 08,09,2012,30,88,14,0,82.5,6.6,26.1,3,8.1,2.7,fire 228 | 09,09,2012,30,80,15,0,83.1,7.9,34.5,3.5,10,3.7,fire 229 | 10,09,2012,29,74,15,1.1,59.5,4.7,8.2,0.8,4.6,0.3,not fire 230 | 11,09,2012,30,73,14,0,79.2,6.5,16.6,2.1,6.6,1.2,not fire 231 | 12,09,2012,31,72,14,0,84.2,8.3,25.2,3.8,9.1,3.9,fire 232 | 13,09,2012,29,49,19,0,88.6,11.5,33.4,9.1,12.4,10.3,fire 233 | 14,09,2012,28,81,15,0,84.6,12.6,41.5,4.3,14.3,5.7,fire 234 | 15,09,2012,32,51,13,0,88.7,16,50.2,6.9,17.8,9.8,fire 235 | 16,09,2012,33,26,13,0,93.9,21.2,59.2,14.2,22.4,19.3,fire 236 | 17,09,2012,34,44,12,0,92.5,25.2,63.3,11.2,26.2,17.5,fire 237 | 18,09,2012,36,33,13,0.1,90.6,25.8,77.8,9,28.2,15.4,fire 238 | 19,09,2012,29,41,8,0.1,83.9,24.9,86,2.7,28.9,5.6,fire 239 | 20,09,2012,34,58,13,0.2,79.5,18.7,88,2.1,24.4,3.8,not fire 240 | 21,09,2012,35,34,17,0,92.2,23.6,97.3,13.8,29.4,21.6,fire 241 | 22,09,2012,33,64,13,0,88.9,26.1,106.3,7.1,32.4,13.7,fire 242 | 23,09,2012,35,56,14,0,89,29.4,115.6,7.5,36,15.2,fire 243 | 24,09,2012,26,49,6,2,61.3,11.9,28.1,0.6,11.9,0.4,not fire 244 | 25,09,2012,28,70,15,0,79.9,13.8,36.1,2.4,14.1,3,not fire 245 | 26,09,2012,30,65,14,0,85.4,16,44.5,4.5,16.9,6.5,fire 246 | 27,09,2012,28,87,15,4.4,41.1,6.5,8,0.1,6.2,0,not fire 247 | 28,09,2012,27,87,29,0.5,45.9,3.5,7.9,0.4,3.4,0.2,not fire 248 | 29,09,2012,24,54,18,0.1,79.7,4.3,15.2,1.7,5.1,0.7,not fire 249 | 30,09,2012,24,64,15,0.2,67.3,3.8,16.5,1.2,4.8,0.5,not fire 250 | -------------------------------------------------------------------------------- /2-Ridge Lasso And Elasticnet/Algerian_forest_fires_dataset_UPDATE.csv: -------------------------------------------------------------------------------- 1 | Bejaia Region Dataset 2 | day,month,year,Temperature, RH, Ws,Rain ,FFMC,DMC,DC,ISI,BUI,FWI,Classes 3 | 01,06,2012,29,57,18,0,65.7,3.4,7.6,1.3,3.4,0.5,not fire 4 | 02,06,2012,29,61,13,1.3,64.4,4.1,7.6,1,3.9,0.4,not fire 5 | 03,06,2012,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,not fire 6 | 04,06,2012,25,89,13,2.5,28.6,1.3,6.9,0,1.7,0,not fire 7 | 05,06,2012,27,77,16,0,64.8,3,14.2,1.2,3.9,0.5,not fire 8 | 06,06,2012,31,67,14,0,82.6,5.8,22.2,3.1,7,2.5,fire 9 | 07,06,2012,33,54,13,0,88.2,9.9,30.5,6.4,10.9,7.2,fire 10 | 08,06,2012,30,73,15,0,86.6,12.1,38.3,5.6,13.5,7.1,fire 11 | 09,06,2012,25,88,13,0.2,52.9,7.9,38.8,0.4,10.5,0.3,not fire 12 | 10,06,2012,28,79,12,0,73.2,9.5,46.3,1.3,12.6,0.9,not fire 13 | 11,06,2012,31,65,14,0,84.5,12.5,54.3,4,15.8,5.6,fire 14 | 12,06,2012,26,81,19,0,84,13.8,61.4,4.8,17.7,7.1 ,fire 15 | 13,06,2012,27,84,21,1.2,50,6.7,17,0.5,6.7,0.2,not fire 16 | 14,06,2012,30,78,20,0.5,59,4.6,7.8,1,4.4,0.4,not fire 17 | 15,06,2012,28,80,17,3.1,49.4,3,7.4,0.4,3,0.1,not fire 18 | 16,06,2012,29,89,13,0.7,36.1,1.7,7.6,0,2.2,0,not fire 19 | 17,06,2012,30,89,16,0.6,37.3,1.1,7.8,0,1.6,0,not fire 20 | 18,06,2012,31,78,14,0.3,56.9,1.9,8,0.7,2.4,0.2,not fire 21 | 19,06,2012,31,55,16,0.1,79.9,4.5,16,2.5,5.3,1.4,not fire 22 | 20,06,2012,30,80,16,0.4,59.8,3.4,27.1,0.9,5.1,0.4,not fire 23 | 21,06,2012,30,78,14,0,81,6.3,31.6,2.6,8.4,2.2,fire 24 | 22,06,2012,31,67,17,0.1,79.1,7,39.5,2.4,9.7,2.3,not fire 25 | 23,06,2012,32,62,18,0.1,81.4,8.2,47.7,3.3,11.5,3.8,fire 26 | 24,06,2012,32,66,17,0,85.9,11.2,55.8,5.6,14.9,7.5,fire 27 | 25,06,2012,31,64,15,0,86.7,14.2,63.8,5.7,18.3,8.4,fire 28 | 26,06,2012,31,64,18,0,86.8,17.8,71.8,6.7,21.6,10.6,fire 29 | 27,06,2012,34,53,18,0,89,21.6,80.3,9.2,25.8,15,fire 30 | 28,06,2012,32,55,14,0,89.1,25.5,88.5,7.6,29.7,13.9,fire 31 | 29,06,2012,32,47,13,0.3,79.9,18.4,84.4,2.2,23.8,3.9,not fire 32 | 30,06,2012,33,50,14,0,88.7,22.9,92.8,7.2,28.3,12.9,fire 33 | 01,07,2012,29,68,19,1,59.9,2.5,8.6,1.1,2.9,0.4,not fire 34 | 02,07,2012,27,75,19,1.2,55.7,2.4,8.3,0.8,2.8,0.3,not fire 35 | 03,07,2012,32,76,20,0.7,63.1,2.6,9.2,1.3,3,0.5,not fire 36 | 04,07,2012,33,78,17,0,80.1,4.6,18.5,2.7,5.7,1.7,not fire 37 | 05,07,2012,33,66,14,0,85.9,7.6,27.9,4.8,9.1,4.9,fire 38 | 06,07,2012,32,63,14,0,87,10.9,37,5.6,12.5,6.8,fire 39 | 07,07,2012,35,64,18,0.2,80,9.7,40.4,2.8,12.1,3.2,not fire 40 | 08,07,2012,33,68,19,0,85.6,12.5,49.8,6,15.4,8,fire 41 | 09,07,2012,32,68,14,1.4,66.6,7.7,9.2,1.1,7.4,0.6,not fire 42 | 10,07,2012,33,69,13,0.7,66.6,6,9.3,1.1,5.8,0.5,not fire 43 | 11,07,2012,33,76,14,0,81.1,8.1,18.7,2.6,8.1,2.2,not fire 44 | 12,07,2012,31,75,13,0.1,75.1,7.9,27.7,1.5,9.2,0.9,not fire 45 | 13,07,2012,34,81,15,0,81.8,9.7,37.2,3,11.7,3.4,not fire 46 | 14,07,2012,34,61,13,0.6,73.9,7.8,22.9,1.4,8.4,0.8,not fire 47 | 15,07,2012,30,80,19,0.4,60.7,5.2,17,1.1,5.9,0.5,not fire 48 | 16,07,2012,28,76,21,0,72.6,7,25.5,0.7,8.3,0.4,not fire 49 | 17,07,2012,29,70,14,0,82.8,9.4,34.1,3.2,11.1,3.6,fire 50 | 18,07,2012,31,68,14,0,85.4,12.1,43.1,4.6,14.2,6,fire 51 | 19,07,2012,35,59,17,0,88.1,12,52.8,7.7,18.2,10.9,fire 52 | 20,07,2012,33,65,15,0.1,81.4,12.3,62.1,2.8,16.5,4,fire 53 | 21,07,2012,33,70,17,0,85.4,18.5,71.5,5.2,22.4,8.8,fire 54 | 22,07,2012,28,79,18,0.1,73.4,16.4,79.9,1.8,21.7,2.8,not fire 55 | 23,07,2012,27,66,22,0.4,68.2,10.5,71.3,1.8,15.4,2.1,not fire 56 | 24,07,2012,28,78,16,0.1,70,9.6,79.7,1.4,14.7,1.3,not fire 57 | 25,07,2012,31,65,18,0,84.3,12.5,88.7,4.8,18.5,7.3,fire 58 | 26,07,2012,36,53,19,0,89.2,17.1,98.6,10,23.9,15.3,fire 59 | 27,07,2012,36,48,13,0,90.3,22.2,108.5,8.7,29.4,15.3,fire 60 | 28,07,2012,33,76,15,0,86.5,24.4,117.8,5.6,32.1,11.3,fire 61 | 29,07,2012,32,73,15,0,86.6,26.7,127,5.6,35,11.9,fire 62 | 30,07,2012,31,79,15,0,85.4,28.5,136,4.7,37.4,10.7,fire 63 | 31,07,2012,35,64,17,0,87.2,31.9,145.7,6.8,41.2,15.7,fire 64 | 01,08,2012,36,45,14,0,78.8,4.8,10.2,2,4.7,0.9,not fire 65 | 02,08,2012,35,55,12,0.4,78,5.8,10,1.7,5.5,0.8,not fire 66 | 03,08,2012,35,63,14,0.3,76.6,5.7,10,1.7,5.5,0.8,not fire 67 | 04,08,2012,34,69,13,0,85,8.2,19.8,4,8.2,3.9,fire 68 | 05,08,2012,34,65,13,0,86.8,11.1,29.7,5.2,11.5,6.1,fire 69 | 06,08,2012,32,75,14,0,86.4,13,39.1,5.2,14.2,6.8,fire 70 | 07,08,2012,32,69,16,0,86.5,15.5,48.6,5.5,17.2,8,fire 71 | 08,08,2012,32,60,18,0.3,77.1,11.3,47,2.2,14.1,2.6,not fire 72 | 09,08,2012,35,59,17,0,87.4,14.8,57,6.9,17.9,9.9,fire 73 | 10,08,2012,35,55,14,0,88.9,18.6,67,7.4,21.9,11.6,fire 74 | 11,08,2012,35,63,13,0,88.9,21.7,77,7.1,25.5,12.1,fire 75 | 12,08,2012,35,51,13,0.3,81.3,15.6,75.1,2.5,20.7,4.2,not fire 76 | 13,08,2012,35,63,15,0,87,19,85.1,5.9,24.4,10.2,fire 77 | 14,08,2012,33,66,14,0,87,21.7,94.7,5.7,27.2,10.6,fire 78 | 15,08,2012,36,55,13,0.3,82.4,15.6,92.5,3.7,22,6.3,fire 79 | 16,08,2012,36,61,18,0.3,80.2,11.7,90.4,2.8,17.6,4.2,fire 80 | 17,08,2012,37,52,18,0,89.3,16,100.7,9.7,22.9,14.6,fire 81 | 18,08,2012,36,54,18,0,89.4,20,110.9,9.7,27.5,16.1,fire 82 | 19,08,2012,35,62,19,0,89.4,23.2,120.9,9.7,31.3,17.2,fire 83 | 20,08,2012,35,68,19,0,88.3,25.9,130.6,8.8,34.7,16.8,fire 84 | 21,08,2012,36,58,19,0,88.6,29.6,141.1,9.2,38.8,18.4,fire 85 | 22,08,2012,36,55,18,0,89.1,33.5,151.3,9.9,43.1,20.4,fire 86 | 23,08,2012,36,53,16,0,89.5,37.6,161.5,10.4,47.5,22.3,fire 87 | 24,08,2012,34,64,14,0,88.9,40.5,171.3,9,50.9,20.9,fire 88 | 25,08,2012,35,60,15,0,88.9,43.9,181.3,8.2,54.7,20.3,fire 89 | 26,08,2012,31,78,18,0,85.8,45.6,190.6,4.7,57.1,13.7,fire 90 | 27,08,2012,33,82,21,0,84.9,47,200.2,4.4,59.3,13.2,fire 91 | 28,08,2012,34,64,16,0,89.4,50.2,210.4,7.3,62.9,19.9,fire 92 | 29,08,2012,35,48,18,0,90.1,54.2,220.4,12.5,67.4,30.2,fire 93 | 30,08,2012,35,70,17,0.8,72.7,25.2,180.4,1.7,37.4,4.2,not fire 94 | 31,08,2012,28,80,21,16.8,52.5,8.7,8.7,0.6,8.3,0.3,not fire 95 | 01,09,2012,25,76,17,7.2,46,1.3,7.5,0.2,1.8,0.1,not fire 96 | 02,09,2012,22,86,15,10.1,30.5,0.7,7,0,1.1,0,not fire 97 | 03,09,2012,25,78,15,3.8,42.6,1.2,7.5,0.1,1.7,0,not fire 98 | 04,09,2012,29,73,17,0.1,68.4,1.9,15.7,1.4,2.9,0.5,not fire 99 | 05,09,2012,29,75,16,0,80.8,3.4,24,2.8,5.1,1.7,fire 100 | 06,09,2012,29,74,19,0.1,75.8,3.6,32.2,2.1,5.6,0.9,not fire 101 | 07,09,2012,31,71,17,0.3,69.6,3.2,30.1,1.5,5.1,0.6,not fire 102 | 08,09,2012,30,73,17,0.9,62,2.6,8.4,1.1,3,0.4,not fire 103 | 09,09,2012,30,77,15,1,56.1,2.1,8.4,0.7,2.6,0.2,not fire 104 | 10,09,2012,33,73,12,1.8,59.9,2.2,8.9,0.7,2.7,0.3,not fire 105 | 11,09,2012,30,77,21,1.8,58.5,1.9,8.4,1.1,2.4,0.3,not fire 106 | 12,09,2012,29,88,13,0,71,2.6,16.6,1.2,3.7,0.5,not fire 107 | 13,09,2012,25,86,21,4.6,40.9,1.3,7.5,0.1,1.8,0,not fire 108 | 14,09,2012,22,76,26,8.3,47.4,1.1,7,0.4,1.6,0.1,not fire 109 | 15,09,2012,24,82,15,0.4,44.9,0.9,7.3,0.2,1.4,0,not fire 110 | 16,09,2012,30,65,14,0,78.1,3.2,15.7,1.9,4.2,0.8,not fire 111 | 17,09,2012,31,52,14,0,87.7,6.4,24.3,6.2,7.7,5.9,fire 112 | 18,09,2012,32,49,11,0,89.4,9.8,33.1,6.8,11.3,7.7,fire 113 | 19,09,2012,29,57,14,0,89.3,12.5,41.3,7.8,14.2,9.7,fire 114 | 20,09,2012,28,84,18,0,83.8,13.5,49.3,4.5,16,6.3,fire 115 | 21,09,2012,31,55,11,0,87.8,16.5,57.9,5.4,19.2,8.3,fire 116 | 22,09,2012,31,50,19,0.6,77.8,10.6,41.4,2.4,12.9,2.8,not fire 117 | 23,09,2012,32,54,11,0.5,73.7,7.9,30.4,1.2,9.6,0.7,not fire 118 | 24,09,2012,29,65,19,0.6,68.3,5.5,15.2,1.5,5.8,0.7,not fire 119 | 25,09,2012,26,81,21,5.8,48.6,3,7.7,0.4,3,0.1,not fire 120 | 26,09,2012,31,54,11,0,82,6,16.3,2.5,6.2,1.7,not fire 121 | 27,09,2012,31,66,11,0,85.7,8.3,24.9,4,9,4.1,fire 122 | 28,09,2012,32,47,14,0.7,77.5,7.1,8.8,1.8,6.8,0.9,not fire 123 | 29,09,2012,26,80,16,1.8,47.4,2.9,7.7,0.3,3,0.1,not fire 124 | 30,09,2012,25,78,14,1.4,45,1.9,7.5,0.2,2.4,0.1,not fire 125 | 126 | Sidi-Bel Abbes Region Dataset 127 | day,month,year,Temperature, RH, Ws,Rain ,FFMC,DMC,DC,ISI,BUI,FWI,Classes 128 | 01,06,2012,32,71,12,0.7,57.1,2.5,8.2,0.6,2.8,0.2,not fire 129 | 02,06,2012,30,73,13,4,55.7,2.7,7.8,0.6,2.9,0.2,not fire 130 | 03,06,2012,29,80,14,2,48.7,2.2,7.6,0.3,2.6,0.1,not fire 131 | 04,06,2012,30,64,14,0,79.4,5.2,15.4,2.2,5.6,1,not fire 132 | 05,06,2012,32,60,14,0.2,77.1,6,17.6,1.8,6.5,0.9,not fire 133 | 06,06,2012,35,54,11,0.1,83.7,8.4,26.3,3.1,9.3,3.1,fire 134 | 07,06,2012,35,44,17,0.2,85.6,9.9,28.9,5.4,10.7,6,fire 135 | 08,06,2012,28,51,17,1.3,71.4,7.7,7.4,1.5,7.3,0.8,not fire 136 | 09,06,2012,27,59,18,0.1,78.1,8.5,14.7,2.4,8.3,1.9,not fire 137 | 10,06,2012,30,41,15,0,89.4,13.3,22.5,8.4,13.1,10,fire 138 | 11,06,2012,31,42,21,0,90.6,18.2,30.5,13.4,18,16.7,fire 139 | 12,06,2012,27,58,17,0,88.9,21.3,37.8,8.7,21.2,12.9,fire 140 | 13,06,2012,30,52,15,2,72.3,11.4,7.8,1.4,10.9,0.9,not fire 141 | 14,06,2012,27,79,16,0.7,53.4,6.4,7.3,0.5,6.1,0.3,not fire 142 | 15,06,2012,28,90,15,0,66.8,7.2,14.7,1.2,7.1,0.6,not fire 143 | 16,06,2012,29,87,15,0.4,47.4,4.2,8,0.2,4.1,0.1,not fire 144 | 17,06,2012,31,69,17,4.7,62.2,3.9,8,1.1,3.8,0.4,not fire 145 | 18,06,2012,33,62,10,8.7,65.5,4.6,8.3,0.9,4.4,0.4,not fire 146 | 19,06,2012,32,67,14,4.5,64.6,4.4,8.2,1,4.2,0.4,not fire 147 | 20,06,2012,31,72,14,0.2,60.2,3.8,8,0.8,3.7,0.3,not fire 148 | 21,06,2012,32,55,14,0,86.2,8.3,18.4,5,8.2,4.9,fire 149 | 22,06,2012,33,46,14,1.1,78.3,8.1,8.3,1.9,7.7,1.2,not fire 150 | 23,06,2012,33,59,16,0.8,74.2,7,8.3,1.6,6.7,0.8,not fire 151 | 24,06,2012,35,68,16,0,85.3,10,17,4.9,9.9,5.3,fire 152 | 25,06,2012,34,70,16,0,86,12.8,25.6,5.4,12.7,6.7,fire 153 | 26,06,2012,36,62,16,0,87.8,16.5,34.5,7,16.4,9.5,fire 154 | 27,06,2012,36,55,15,0,89.1,20.9,43.3,8,20.8,12,fire 155 | 28,06,2012,37,37,13,0,92.5,27.2,52.4,11.7,27.1,18.4,fire 156 | 29,06,2012,37,36,13,0.6,86.2,17.9,36.7,4.8,17.8,7.2,fire 157 | 30,06,2012,34,42,15,1.7,79.7,12,8.5,2.2,11.5,2.2,not fire 158 | 01,07,2012,28,58,18,2.2,63.7,3.2,8.5,1.2,3.3,0.5,not fire 159 | 02,07,2012,33,48,16,0,87.6,7.9,17.8,6.8,7.8,6.4,fire 160 | 03,07,2012,34,56,17,0.1,84.7,9.7,27.3,4.7,10.3,5.2,fire 161 | 04,07,2012,34,58,18,0,88,13.6,36.8,8,14.1,9.9,fire 162 | 05,07,2012,34,45,18,0,90.5,18.7,46.4,11.3,18.7,15,fire 163 | 06,07,2012,35,42,15,0.3,84.7,15.5,45.1,4.3,16.7,6.3,fire 164 | 07,07,2012,38,43,13,0.5,85,13,35.4,4.1,13.7,5.2,fire 165 | 08,07,2012,35,47,18,6,80.8,9.8,9.7,3.1,9.4,3,fire 166 | 09,07,2012,36,43,15,1.9,82.3,9.4,9.9,3.2,9,3.1,fire 167 | 10,07,2012,34,51,16,3.8,77.5,8,9.5,2,7.7,1.3,not fire 168 | 11,07,2012,34,56,15,2.9,74.8,7.1,9.5,1.6,6.8,0.8,not fire 169 | 12,07,2012,36,44,13,0,90.1,12.6,19.4,8.3,12.5,9.6,fire 170 | 13,07,2012,39,45,13,0.6,85.2,11.3,10.4,4.2,10.9,4.7,fire 171 | 14,07,2012,37,37,18,0.2,88.9,12.9,14.6 9,12.5,10.4,fire 172 | 15,07,2012,34,45,17,0,90.5,18,24.1,10.9,17.7,14.1,fire 173 | 16,07,2012,31,83,17,0,84.5,19.4,33.1,4.7,19.2,7.3,fire 174 | 17,07,2012,32,81,17,0,84.6,21.1,42.3,4.7,20.9,7.7,fire 175 | 18,07,2012,33,68,15,0,86.1,23.9,51.6,5.2,23.9,9.1,fire 176 | 19,07,2012,34,58,16,0,88.1,27.8,61.1,7.3,27.7,13,fire 177 | 20,07,2012,36,50,16,0,89.9,32.7,71,9.5,32.6,17.3,fire 178 | 21,07,2012,36,29,18,0,93.9,39.6,80.6,18.5,39.5,30,fire 179 | 22,07,2012,32,48,18,0,91.5,44.2,90.1,13.2,44,25.4,fire 180 | 23,07,2012,31,71,17,0,87.3,46.6,99,6.9,46.5,16.3,fire 181 | 24,07,2012,33,63,17,1.1,72.8,20.9,56.6,1.6,21.7,2.5,not fire 182 | 25,07,2012,39,64,9,1.2,73.8,11.7,15.9,1.1,11.4,0.7,not fire 183 | 26,07,2012,35,58,10,0.2,78.3,10.8,19.7,1.6,10.7,1,not fire 184 | 27,07,2012,29,87,18,0,80,11.8,28.3,2.8,11.8,3.2,not fire 185 | 28,07,2012,33,57,16,0,87.5,15.7,37.6,6.7,15.7,9,fire 186 | 29,07,2012,34,59,16,0,88.1,19.5,47.2,7.4,19.5,10.9,fire 187 | 30,07,2012,36,56,16,0,88.9,23.8,57.1,8.2,23.8,13.2,fire 188 | 31,07,2012,37,55,15,0,89.3,28.3,67.2,8.3,28.3,14.5,fire 189 | 01,08,2012,38,52,14,0,78.3,4.4,10.5,2,4.4,0.8,not fire 190 | 02,08,2012,40,34,14,0,93.3,10.8,21.4,13.8,10.6,13.5,fire 191 | 03,08,2012,39,33,17,0,93.7,17.1,32.1,17.2,16.9,19.5,fire 192 | 04,08,2012,38,35,15,0,93.8,23,42.7,15.7,22.9,20.9,fire 193 | 05,08,2012,34,42,17,0.1,88.3,23.6,52.5,19,23.5,12.6,fire 194 | 06,08,2012,30,54,14,3.1,70.5,11,9.1,1.3,10.5,0.8,not fire 195 | 07,08,2012,34,63,13,2.9,69.7,7.2,9.8,1.2,6.9,0.6,not fire 196 | 08,08,2012,37,56,11,0,87.4,11.2,20.2,5.2,11,5.9,fire 197 | 09,08,2012,39,43,12,0,91.7,16.5,30.9,9.6,16.4,12.7,fire 198 | 10,08,2012,39,39,15,0.2,89.3,15.8,35.4,8.2,15.8,10.7,fire 199 | 11,08,2012,40,31,15,0,94.2,22.5,46.3,16.6,22.4,21.6,fire 200 | 12,08,2012,39,21,17,0.4,93,18.4,41.5,15.5,18.4,18.8,fire 201 | 13,08,2012,35,34,16,0.2,88.3,16.9,45.1,7.5,17.5,10.5,fire 202 | 14,08,2012,37,40,13,0,91.9,22.3,55.5,10.8,22.3,15.7,fire 203 | 15,08,2012,35,46,13,0.3,83.9,16.9,54.2,3.5,19,5.5,fire 204 | 16,08,2012,40,41,10,0.1,92,22.6,65.1,9.5,24.2,14.8,fire 205 | 17,08,2012,42,24,9,0,96,30.3,76.4,15.7,30.4,24,fire 206 | 18,08,2012,37,37,14,0,94.3,35.9,86.8,16,35.9,26.3,fire 207 | 19,08,2012,35,66,15,0.1,82.7,32.7,96.8,3.3,35.5,7.7,fire 208 | 20,08,2012,36,81,15,0,83.7,34.4,107,3.8,38.1,9,fire 209 | 21,08,2012,36,71,15,0,86,36.9,117.1,5.1,41.3,12.2,fire 210 | 22,08,2012,37,53,14,0,89.5,41.1,127.5,8,45.5,18.1,fire 211 | 23,08,2012,36,43,16,0,91.2,46.1,137.7,11.5,50.2,24.5,fire 212 | 24,08,2012,35,38,15,0,92.1,51.3,147.7,12.2,54.9,26.9,fire 213 | 25,08,2012,34,40,18,0,92.1,56.3,157.5,14.3,59.5,31.1,fire 214 | 26,08,2012,33,37,16,0,92.2,61.3,167.2,13.1,64,30.3,fire 215 | 27,08,2012,36,54,14,0,91,65.9,177.3,10,68,26.1,fire 216 | 28,08,2012,35,56,14,0.4,79.2,37,166,2.1,30.6,6.1,not fire 217 | 29,08,2012,35,53,17,0.5,80.2,20.7,149.2,2.7,30.6,5.9,fire 218 | 30,08,2012,34,49,15,0,89.2,24.8,159.1,8.1,35.7,16,fire 219 | 31,08,2012,30,59,19,0,89.1,27.8,168.2,9.8,39.3,19.4,fire 220 | 01,09,2012,29,86,16,0,37.9,0.9,8.2,0.1,1.4,0,not fire 221 | 02,09,2012,28,67,19,0,75.4,2.9,16.3,2,4,0.8,not fire 222 | 03,09,2012,28,75,16,0,82.2,4.4,24.3,3.3,6,2.5,fire 223 | 04,09,2012,30,66,15,0.2,73.5,4.1,26.6,1.5,6,0.7,not fire 224 | 05,09,2012,30,58,12,4.1,66.1,4,8.4,1,3.9,0.4,not fire 225 | 06,09,2012,34,71,14,6.5,64.5,3.3,9.1,1,3.5,0.4,not fire 226 | 07,09,2012,31,62,15,0,83.3,5.8,17.7,3.8,6.4,3.2,fire 227 | 08,09,2012,30,88,14,0,82.5,6.6,26.1,3,8.1,2.7,fire 228 | 09,09,2012,30,80,15,0,83.1,7.9,34.5,3.5,10,3.7,fire 229 | 10,09,2012,29,74,15,1.1,59.5,4.7,8.2,0.8,4.6,0.3,not fire 230 | 11,09,2012,30,73,14,0,79.2,6.5,16.6,2.1,6.6,1.2,not fire 231 | 12,09,2012,31,72,14,0,84.2,8.3,25.2,3.8,9.1,3.9,fire 232 | 13,09,2012,29,49,19,0,88.6,11.5,33.4,9.1,12.4,10.3,fire 233 | 14,09,2012,28,81,15,0,84.6,12.6,41.5,4.3,14.3,5.7,fire 234 | 15,09,2012,32,51,13,0,88.7,16,50.2,6.9,17.8,9.8,fire 235 | 16,09,2012,33,26,13,0,93.9,21.2,59.2,14.2,22.4,19.3,fire 236 | 17,09,2012,34,44,12,0,92.5,25.2,63.3,11.2,26.2,17.5,fire 237 | 18,09,2012,36,33,13,0.1,90.6,25.8,77.8,9,28.2,15.4,fire 238 | 19,09,2012,29,41,8,0.1,83.9,24.9,86,2.7,28.9,5.6,fire 239 | 20,09,2012,34,58,13,0.2,79.5,18.7,88,2.1,24.4,3.8,not fire 240 | 21,09,2012,35,34,17,0,92.2,23.6,97.3,13.8,29.4,21.6,fire 241 | 22,09,2012,33,64,13,0,88.9,26.1,106.3,7.1,32.4,13.7,fire 242 | 23,09,2012,35,56,14,0,89,29.4,115.6,7.5,36,15.2,fire 243 | 24,09,2012,26,49,6,2,61.3,11.9,28.1,0.6,11.9,0.4,not fire 244 | 25,09,2012,28,70,15,0,79.9,13.8,36.1,2.4,14.1,3,not fire 245 | 26,09,2012,30,65,14,0,85.4,16,44.5,4.5,16.9,6.5,fire 246 | 27,09,2012,28,87,15,4.4,41.1,6.5,8,0.1,6.2,0,not fire 247 | 28,09,2012,27,87,29,0.5,45.9,3.5,7.9,0.4,3.4,0.2,not fire 248 | 29,09,2012,24,54,18,0.1,79.7,4.3,15.2,1.7,5.1,0.7,not fire 249 | 30,09,2012,24,64,15,0.2,67.3,3.8,16.5,1.2,4.8,0.5,not fire 250 | -------------------------------------------------------------------------------- /1-Complete Linear Regression/Algerian_forest_fires_cleaned_dataset.csv: -------------------------------------------------------------------------------- 1 | day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Region 2 | 1,6,2012,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0.5,not fire ,0 3 | 2,6,2012,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4,not fire ,0 4 | 3,6,2012,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,not fire ,0 5 | 4,6,2012,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0.0,not fire ,0 6 | 5,6,2012,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0.5,not fire ,0 7 | 6,6,2012,31,67,14,0.0,82.6,5.8,22.2,3.1,7.0,2.5,fire ,0 8 | 7,6,2012,33,54,13,0.0,88.2,9.9,30.5,6.4,10.9,7.2,fire ,0 9 | 8,6,2012,30,73,15,0.0,86.6,12.1,38.3,5.6,13.5,7.1,fire ,0 10 | 9,6,2012,25,88,13,0.2,52.9,7.9,38.8,0.4,10.5,0.3,not fire ,0 11 | 10,6,2012,28,79,12,0.0,73.2,9.5,46.3,1.3,12.6,0.9,not fire ,0 12 | 11,6,2012,31,65,14,0.0,84.5,12.5,54.3,4.0,15.8,5.6,fire ,0 13 | 12,6,2012,26,81,19,0.0,84.0,13.8,61.4,4.8,17.7,7.1,fire ,0 14 | 13,6,2012,27,84,21,1.2,50.0,6.7,17.0,0.5,6.7,0.2,not fire ,0 15 | 14,6,2012,30,78,20,0.5,59.0,4.6,7.8,1.0,4.4,0.4,not fire ,0 16 | 15,6,2012,28,80,17,3.1,49.4,3.0,7.4,0.4,3.0,0.1,not fire ,0 17 | 16,6,2012,29,89,13,0.7,36.1,1.7,7.6,0.0,2.2,0.0,not fire ,0 18 | 17,6,2012,30,89,16,0.6,37.3,1.1,7.8,0.0,1.6,0.0,not fire ,0 19 | 18,6,2012,31,78,14,0.3,56.9,1.9,8.0,0.7,2.4,0.2,not fire ,0 20 | 19,6,2012,31,55,16,0.1,79.9,4.5,16.0,2.5,5.3,1.4,not fire ,0 21 | 20,6,2012,30,80,16,0.4,59.8,3.4,27.1,0.9,5.1,0.4,not fire ,0 22 | 21,6,2012,30,78,14,0.0,81.0,6.3,31.6,2.6,8.4,2.2,fire ,0 23 | 22,6,2012,31,67,17,0.1,79.1,7.0,39.5,2.4,9.7,2.3,not fire ,0 24 | 23,6,2012,32,62,18,0.1,81.4,8.2,47.7,3.3,11.5,3.8,fire ,0 25 | 24,6,2012,32,66,17,0.0,85.9,11.2,55.8,5.6,14.9,7.5,fire ,0 26 | 25,6,2012,31,64,15,0.0,86.7,14.2,63.8,5.7,18.3,8.4,fire ,0 27 | 26,6,2012,31,64,18,0.0,86.8,17.8,71.8,6.7,21.6,10.6,fire ,0 28 | 27,6,2012,34,53,18,0.0,89.0,21.6,80.3,9.2,25.8,15.0,fire ,0 29 | 28,6,2012,32,55,14,0.0,89.1,25.5,88.5,7.6,29.7,13.9,fire ,0 30 | 29,6,2012,32,47,13,0.3,79.9,18.4,84.4,2.2,23.8,3.9,not fire ,0 31 | 30,6,2012,33,50,14,0.0,88.7,22.9,92.8,7.2,28.3,12.9,fire ,0 32 | 1,7,2012,29,68,19,1.0,59.9,2.5,8.6,1.1,2.9,0.4,not fire ,0 33 | 2,7,2012,27,75,19,1.2,55.7,2.4,8.3,0.8,2.8,0.3,not fire ,0 34 | 3,7,2012,32,76,20,0.7,63.1,2.6,9.2,1.3,3.0,0.5,not fire ,0 35 | 4,7,2012,33,78,17,0.0,80.1,4.6,18.5,2.7,5.7,1.7,not fire ,0 36 | 5,7,2012,33,66,14,0.0,85.9,7.6,27.9,4.8,9.1,4.9,fire ,0 37 | 6,7,2012,32,63,14,0.0,87.0,10.9,37.0,5.6,12.5,6.8,fire ,0 38 | 7,7,2012,35,64,18,0.2,80.0,9.7,40.4,2.8,12.1,3.2,not fire ,0 39 | 8,7,2012,33,68,19,0.0,85.6,12.5,49.8,6.0,15.4,8.0,fire ,0 40 | 9,7,2012,32,68,14,1.4,66.6,7.7,9.2,1.1,7.4,0.6,not fire ,0 41 | 10,7,2012,33,69,13,0.7,66.6,6.0,9.3,1.1,5.8,0.5,not fire ,0 42 | 11,7,2012,33,76,14,0.0,81.1,8.1,18.7,2.6,8.1,2.2,not fire ,0 43 | 12,7,2012,31,75,13,0.1,75.1,7.9,27.7,1.5,9.2,0.9,not fire ,0 44 | 13,7,2012,34,81,15,0.0,81.8,9.7,37.2,3.0,11.7,3.4,not fire ,0 45 | 14,7,2012,34,61,13,0.6,73.9,7.8,22.9,1.4,8.4,0.8,not fire ,0 46 | 15,7,2012,30,80,19,0.4,60.7,5.2,17.0,1.1,5.9,0.5,not fire ,0 47 | 16,7,2012,28,76,21,0.0,72.6,7.0,25.5,0.7,8.3,0.4,not fire ,0 48 | 17,7,2012,29,70,14,0.0,82.8,9.4,34.1,3.2,11.1,3.6,fire ,0 49 | 18,7,2012,31,68,14,0.0,85.4,12.1,43.1,4.6,14.2,6.0,fire ,0 50 | 19,7,2012,35,59,17,0.0,88.1,12.0,52.8,7.7,18.2,10.9,fire ,0 51 | 20,7,2012,33,65,15,0.1,81.4,12.3,62.1,2.8,16.5,4.0,fire ,0 52 | 21,7,2012,33,70,17,0.0,85.4,18.5,71.5,5.2,22.4,8.8,fire ,0 53 | 22,7,2012,28,79,18,0.1,73.4,16.4,79.9,1.8,21.7,2.8,not fire ,0 54 | 23,7,2012,27,66,22,0.4,68.2,10.5,71.3,1.8,15.4,2.1,not fire ,0 55 | 24,7,2012,28,78,16,0.1,70.0,9.6,79.7,1.4,14.7,1.3,not fire ,0 56 | 25,7,2012,31,65,18,0.0,84.3,12.5,88.7,4.8,18.5,7.3,fire ,0 57 | 26,7,2012,36,53,19,0.0,89.2,17.1,98.6,10.0,23.9,15.3,fire ,0 58 | 27,7,2012,36,48,13,0.0,90.3,22.2,108.5,8.7,29.4,15.3,fire ,0 59 | 28,7,2012,33,76,15,0.0,86.5,24.4,117.8,5.6,32.1,11.3,fire ,0 60 | 29,7,2012,32,73,15,0.0,86.6,26.7,127.0,5.6,35.0,11.9,fire ,0 61 | 30,7,2012,31,79,15,0.0,85.4,28.5,136.0,4.7,37.4,10.7,fire ,0 62 | 31,7,2012,35,64,17,0.0,87.2,31.9,145.7,6.8,41.2,15.7,fire ,0 63 | 1,8,2012,36,45,14,0.0,78.8,4.8,10.2,2.0,4.7,0.9,not fire ,0 64 | 2,8,2012,35,55,12,0.4,78.0,5.8,10.0,1.7,5.5,0.8,not fire ,0 65 | 3,8,2012,35,63,14,0.3,76.6,5.7,10.0,1.7,5.5,0.8,not fire ,0 66 | 4,8,2012,34,69,13,0.0,85.0,8.2,19.8,4.0,8.2,3.9,fire ,0 67 | 5,8,2012,34,65,13,0.0,86.8,11.1,29.7,5.2,11.5,6.1,fire ,0 68 | 6,8,2012,32,75,14,0.0,86.4,13.0,39.1,5.2,14.2,6.8,fire ,0 69 | 7,8,2012,32,69,16,0.0,86.5,15.5,48.6,5.5,17.2,8.0,fire ,0 70 | 8,8,2012,32,60,18,0.3,77.1,11.3,47.0,2.2,14.1,2.6,not fire ,0 71 | 9,8,2012,35,59,17,0.0,87.4,14.8,57.0,6.9,17.9,9.9,fire ,0 72 | 10,8,2012,35,55,14,0.0,88.9,18.6,67.0,7.4,21.9,11.6,fire ,0 73 | 11,8,2012,35,63,13,0.0,88.9,21.7,77.0,7.1,25.5,12.1,fire ,0 74 | 12,8,2012,35,51,13,0.3,81.3,15.6,75.1,2.5,20.7,4.2,not fire ,0 75 | 13,8,2012,35,63,15,0.0,87.0,19.0,85.1,5.9,24.4,10.2,fire ,0 76 | 14,8,2012,33,66,14,0.0,87.0,21.7,94.7,5.7,27.2,10.6,fire ,0 77 | 15,8,2012,36,55,13,0.3,82.4,15.6,92.5,3.7,22.0,6.3,fire ,0 78 | 16,8,2012,36,61,18,0.3,80.2,11.7,90.4,2.8,17.6,4.2,fire ,0 79 | 17,8,2012,37,52,18,0.0,89.3,16.0,100.7,9.7,22.9,14.6,fire ,0 80 | 18,8,2012,36,54,18,0.0,89.4,20.0,110.9,9.7,27.5,16.1,fire ,0 81 | 19,8,2012,35,62,19,0.0,89.4,23.2,120.9,9.7,31.3,17.2,fire ,0 82 | 20,8,2012,35,68,19,0.0,88.3,25.9,130.6,8.8,34.7,16.8,fire ,0 83 | 21,8,2012,36,58,19,0.0,88.6,29.6,141.1,9.2,38.8,18.4,fire ,0 84 | 22,8,2012,36,55,18,0.0,89.1,33.5,151.3,9.9,43.1,20.4,fire ,0 85 | 23,8,2012,36,53,16,0.0,89.5,37.6,161.5,10.4,47.5,22.3,fire,0 86 | 24,8,2012,34,64,14,0.0,88.9,40.5,171.3,9.0,50.9,20.9,fire ,0 87 | 25,8,2012,35,60,15,0.0,88.9,43.9,181.3,8.2,54.7,20.3,fire,0 88 | 26,8,2012,31,78,18,0.0,85.8,45.6,190.6,4.7,57.1,13.7,fire ,0 89 | 27,8,2012,33,82,21,0.0,84.9,47.0,200.2,4.4,59.3,13.2,fire,0 90 | 28,8,2012,34,64,16,0.0,89.4,50.2,210.4,7.3,62.9,19.9,fire ,0 91 | 29,8,2012,35,48,18,0.0,90.1,54.2,220.4,12.5,67.4,30.2,fire ,0 92 | 30,8,2012,35,70,17,0.8,72.7,25.2,180.4,1.7,37.4,4.2,not fire ,0 93 | 31,8,2012,28,80,21,16.8,52.5,8.7,8.7,0.6,8.3,0.3,not fire,0 94 | 1,9,2012,25,76,17,7.2,46.0,1.3,7.5,0.2,1.8,0.1,not fire ,0 95 | 2,9,2012,22,86,15,10.1,30.5,0.7,7.0,0.0,1.1,0.0,not fire,0 96 | 3,9,2012,25,78,15,3.8,42.6,1.2,7.5,0.1,1.7,0.0,not fire ,0 97 | 4,9,2012,29,73,17,0.1,68.4,1.9,15.7,1.4,2.9,0.5,not fire ,0 98 | 5,9,2012,29,75,16,0.0,80.8,3.4,24.0,2.8,5.1,1.7,fire ,0 99 | 6,9,2012,29,74,19,0.1,75.8,3.6,32.2,2.1,5.6,0.9,not fire ,0 100 | 7,9,2012,31,71,17,0.3,69.6,3.2,30.1,1.5,5.1,0.6,not fire ,0 101 | 8,9,2012,30,73,17,0.9,62.0,2.6,8.4,1.1,3.0,0.4,not fire ,0 102 | 9,9,2012,30,77,15,1.0,56.1,2.1,8.4,0.7,2.6,0.2,not fire ,0 103 | 10,9,2012,33,73,12,1.8,59.9,2.2,8.9,0.7,2.7,0.3,not fire ,0 104 | 11,9,2012,30,77,21,1.8,58.5,1.9,8.4,1.1,2.4,0.3,not fire ,0 105 | 12,9,2012,29,88,13,0.0,71.0,2.6,16.6,1.2,3.7,0.5,not fire ,0 106 | 13,9,2012,25,86,21,4.6,40.9,1.3,7.5,0.1,1.8,0.0,not fire ,0 107 | 14,9,2012,22,76,26,8.3,47.4,1.1,7.0,0.4,1.6,0.1,not fire ,0 108 | 15,9,2012,24,82,15,0.4,44.9,0.9,7.3,0.2,1.4,0.0,not fire ,0 109 | 16,9,2012,30,65,14,0.0,78.1,3.2,15.7,1.9,4.2,0.8,not fire ,0 110 | 17,9,2012,31,52,14,0.0,87.7,6.4,24.3,6.2,7.7,5.9,fire ,0 111 | 18,9,2012,32,49,11,0.0,89.4,9.8,33.1,6.8,11.3,7.7,fire ,0 112 | 19,9,2012,29,57,14,0.0,89.3,12.5,41.3,7.8,14.2,9.7,fire ,0 113 | 20,9,2012,28,84,18,0.0,83.8,13.5,49.3,4.5,16.0,6.3,fire,0 114 | 21,9,2012,31,55,11,0.0,87.8,16.5,57.9,5.4,19.2,8.3,fire ,0 115 | 22,9,2012,31,50,19,0.6,77.8,10.6,41.4,2.4,12.9,2.8,not fire ,0 116 | 23,9,2012,32,54,11,0.5,73.7,7.9,30.4,1.2,9.6,0.7,not fire ,0 117 | 24,9,2012,29,65,19,0.6,68.3,5.5,15.2,1.5,5.8,0.7,not fire ,0 118 | 25,9,2012,26,81,21,5.8,48.6,3.0,7.7,0.4,3.0,0.1,not fire ,0 119 | 26,9,2012,31,54,11,0.0,82.0,6.0,16.3,2.5,6.2,1.7,not fire ,0 120 | 27,9,2012,31,66,11,0.0,85.7,8.3,24.9,4.0,9.0,4.1,fire ,0 121 | 28,9,2012,32,47,14,0.7,77.5,7.1,8.8,1.8,6.8,0.9,not fire ,0 122 | 29,9,2012,26,80,16,1.8,47.4,2.9,7.7,0.3,3.0,0.1,not fire ,0 123 | 30,9,2012,25,78,14,1.4,45.0,1.9,7.5,0.2,2.4,0.1,not fire ,0 124 | 1,6,2012,32,71,12,0.7,57.1,2.5,8.2,0.6,2.8,0.2,not fire ,1 125 | 2,6,2012,30,73,13,4.0,55.7,2.7,7.8,0.6,2.9,0.2,not fire ,1 126 | 3,6,2012,29,80,14,2.0,48.7,2.2,7.6,0.3,2.6,0.1,not fire ,1 127 | 4,6,2012,30,64,14,0.0,79.4,5.2,15.4,2.2,5.6,1.0,not fire ,1 128 | 5,6,2012,32,60,14,0.2,77.1,6.0,17.6,1.8,6.5,0.9,not fire ,1 129 | 6,6,2012,35,54,11,0.1,83.7,8.4,26.3,3.1,9.3,3.1,fire ,1 130 | 7,6,2012,35,44,17,0.2,85.6,9.9,28.9,5.4,10.7,6.0,fire ,1 131 | 8,6,2012,28,51,17,1.3,71.4,7.7,7.4,1.5,7.3,0.8,not fire ,1 132 | 9,6,2012,27,59,18,0.1,78.1,8.5,14.7,2.4,8.3,1.9,not fire ,1 133 | 10,6,2012,30,41,15,0.0,89.4,13.3,22.5,8.4,13.1,10.0,fire ,1 134 | 11,6,2012,31,42,21,0.0,90.6,18.2,30.5,13.4,18.0,16.7,fire ,1 135 | 12,6,2012,27,58,17,0.0,88.9,21.3,37.8,8.7,21.2,12.9,fire ,1 136 | 13,6,2012,30,52,15,2.0,72.3,11.4,7.8,1.4,10.9,0.9,not fire ,1 137 | 14,6,2012,27,79,16,0.7,53.4,6.4,7.3,0.5,6.1,0.3,not fire ,1 138 | 15,6,2012,28,90,15,0.0,66.8,7.2,14.7,1.2,7.1,0.6,not fire ,1 139 | 16,6,2012,29,87,15,0.4,47.4,4.2,8.0,0.2,4.1,0.1,not fire ,1 140 | 17,6,2012,31,69,17,4.7,62.2,3.9,8.0,1.1,3.8,0.4,not fire ,1 141 | 18,6,2012,33,62,10,8.7,65.5,4.6,8.3,0.9,4.4,0.4,not fire ,1 142 | 19,6,2012,32,67,14,4.5,64.6,4.4,8.2,1.0,4.2,0.4,not fire ,1 143 | 20,6,2012,31,72,14,0.2,60.2,3.8,8.0,0.8,3.7,0.3,not fire ,1 144 | 21,6,2012,32,55,14,0.0,86.2,8.3,18.4,5.0,8.2,4.9,fire ,1 145 | 22,6,2012,33,46,14,1.1,78.3,8.1,8.3,1.9,7.7,1.2,not fire ,1 146 | 23,6,2012,33,59,16,0.8,74.2,7.0,8.3,1.6,6.7,0.8,not fire ,1 147 | 24,6,2012,35,68,16,0.0,85.3,10.0,17.0,4.9,9.9,5.3,fire ,1 148 | 25,6,2012,34,70,16,0.0,86.0,12.8,25.6,5.4,12.7,6.7,fire ,1 149 | 26,6,2012,36,62,16,0.0,87.8,16.5,34.5,7.0,16.4,9.5,fire ,1 150 | 27,6,2012,36,55,15,0.0,89.1,20.9,43.3,8.0,20.8,12.0,fire ,1 151 | 28,6,2012,37,37,13,0.0,92.5,27.2,52.4,11.7,27.1,18.4,fire ,1 152 | 29,6,2012,37,36,13,0.6,86.2,17.9,36.7,4.8,17.8,7.2,fire ,1 153 | 30,6,2012,34,42,15,1.7,79.7,12.0,8.5,2.2,11.5,2.2,not fire ,1 154 | 1,7,2012,28,58,18,2.2,63.7,3.2,8.5,1.2,3.3,0.5,not fire ,1 155 | 2,7,2012,33,48,16,0.0,87.6,7.9,17.8,6.8,7.8,6.4,fire ,1 156 | 3,7,2012,34,56,17,0.1,84.7,9.7,27.3,4.7,10.3,5.2,fire ,1 157 | 4,7,2012,34,58,18,0.0,88.0,13.6,36.8,8.0,14.1,9.9,fire ,1 158 | 5,7,2012,34,45,18,0.0,90.5,18.7,46.4,11.3,18.7,15.0,fire ,1 159 | 6,7,2012,35,42,15,0.3,84.7,15.5,45.1,4.3,16.7,6.3,fire ,1 160 | 7,7,2012,38,43,13,0.5,85.0,13.0,35.4,4.1,13.7,5.2,fire ,1 161 | 8,7,2012,35,47,18,6.0,80.8,9.8,9.7,3.1,9.4,3.0,fire ,1 162 | 9,7,2012,36,43,15,1.9,82.3,9.4,9.9,3.2,9.0,3.1,fire ,1 163 | 10,7,2012,34,51,16,3.8,77.5,8.0,9.5,2.0,7.7,1.3,not fire ,1 164 | 11,7,2012,34,56,15,2.9,74.8,7.1,9.5,1.6,6.8,0.8,not fire ,1 165 | 12,7,2012,36,44,13,0.0,90.1,12.6,19.4,8.3,12.5,9.6,fire ,1 166 | 13,7,2012,39,45,13,0.6,85.2,11.3,10.4,4.2,10.9,4.7,fire ,1 167 | 15,7,2012,34,45,17,0.0,90.5,18.0,24.1,10.9,17.7,14.1,fire ,1 168 | 16,7,2012,31,83,17,0.0,84.5,19.4,33.1,4.7,19.2,7.3,fire ,1 169 | 17,7,2012,32,81,17,0.0,84.6,21.1,42.3,4.7,20.9,7.7,fire ,1 170 | 18,7,2012,33,68,15,0.0,86.1,23.9,51.6,5.2,23.9,9.1,fire ,1 171 | 19,7,2012,34,58,16,0.0,88.1,27.8,61.1,7.3,27.7,13.0,fire ,1 172 | 20,7,2012,36,50,16,0.0,89.9,32.7,71.0,9.5,32.6,17.3,fire ,1 173 | 21,7,2012,36,29,18,0.0,93.9,39.6,80.6,18.5,39.5,30.0,fire ,1 174 | 22,7,2012,32,48,18,0.0,91.5,44.2,90.1,13.2,44.0,25.4,fire ,1 175 | 23,7,2012,31,71,17,0.0,87.3,46.6,99.0,6.9,46.5,16.3,fire ,1 176 | 24,7,2012,33,63,17,1.1,72.8,20.9,56.6,1.6,21.7,2.5,not fire ,1 177 | 25,7,2012,39,64,9,1.2,73.8,11.7,15.9,1.1,11.4,0.7,not fire ,1 178 | 26,7,2012,35,58,10,0.2,78.3,10.8,19.7,1.6,10.7,1.0,not fire ,1 179 | 27,7,2012,29,87,18,0.0,80.0,11.8,28.3,2.8,11.8,3.2,not fire ,1 180 | 28,7,2012,33,57,16,0.0,87.5,15.7,37.6,6.7,15.7,9.0,fire ,1 181 | 29,7,2012,34,59,16,0.0,88.1,19.5,47.2,7.4,19.5,10.9,fire ,1 182 | 30,7,2012,36,56,16,0.0,88.9,23.8,57.1,8.2,23.8,13.2,fire ,1 183 | 31,7,2012,37,55,15,0.0,89.3,28.3,67.2,8.3,28.3,14.5,fire ,1 184 | 1,8,2012,38,52,14,0.0,78.3,4.4,10.5,2.0,4.4,0.8,not fire ,1 185 | 2,8,2012,40,34,14,0.0,93.3,10.8,21.4,13.8,10.6,13.5,fire ,1 186 | 3,8,2012,39,33,17,0.0,93.7,17.1,32.1,17.2,16.9,19.5,fire ,1 187 | 4,8,2012,38,35,15,0.0,93.8,23.0,42.7,15.7,22.9,20.9,fire ,1 188 | 5,8,2012,34,42,17,0.1,88.3,23.6,52.5,19.0,23.5,12.6,fire ,1 189 | 6,8,2012,30,54,14,3.1,70.5,11.0,9.1,1.3,10.5,0.8,not fire ,1 190 | 7,8,2012,34,63,13,2.9,69.7,7.2,9.8,1.2,6.9,0.6,not fire ,1 191 | 8,8,2012,37,56,11,0.0,87.4,11.2,20.2,5.2,11.0,5.9,fire ,1 192 | 9,8,2012,39,43,12,0.0,91.7,16.5,30.9,9.6,16.4,12.7,fire ,1 193 | 10,8,2012,39,39,15,0.2,89.3,15.8,35.4,8.2,15.8,10.7,fire ,1 194 | 11,8,2012,40,31,15,0.0,94.2,22.5,46.3,16.6,22.4,21.6,fire ,1 195 | 12,8,2012,39,21,17,0.4,93.0,18.4,41.5,15.5,18.4,18.8,fire ,1 196 | 13,8,2012,35,34,16,0.2,88.3,16.9,45.1,7.5,17.5,10.5,fire ,1 197 | 14,8,2012,37,40,13,0.0,91.9,22.3,55.5,10.8,22.3,15.7,fire ,1 198 | 15,8,2012,35,46,13,0.3,83.9,16.9,54.2,3.5,19.0,5.5,fire ,1 199 | 16,8,2012,40,41,10,0.1,92.0,22.6,65.1,9.5,24.2,14.8,fire ,1 200 | 17,8,2012,42,24,9,0.0,96.0,30.3,76.4,15.7,30.4,24.0,fire ,1 201 | 18,8,2012,37,37,14,0.0,94.3,35.9,86.8,16.0,35.9,26.3,fire ,1 202 | 19,8,2012,35,66,15,0.1,82.7,32.7,96.8,3.3,35.5,7.7,fire ,1 203 | 20,8,2012,36,81,15,0.0,83.7,34.4,107.0,3.8,38.1,9.0,fire ,1 204 | 21,8,2012,36,71,15,0.0,86.0,36.9,117.1,5.1,41.3,12.2,fire ,1 205 | 22,8,2012,37,53,14,0.0,89.5,41.1,127.5,8.0,45.5,18.1,fire ,1 206 | 23,8,2012,36,43,16,0.0,91.2,46.1,137.7,11.5,50.2,24.5,fire ,1 207 | 24,8,2012,35,38,15,0.0,92.1,51.3,147.7,12.2,54.9,26.9,fire ,1 208 | 25,8,2012,34,40,18,0.0,92.1,56.3,157.5,14.3,59.5,31.1,fire ,1 209 | 26,8,2012,33,37,16,0.0,92.2,61.3,167.2,13.1,64.0,30.3,fire ,1 210 | 27,8,2012,36,54,14,0.0,91.0,65.9,177.3,10.0,68.0,26.1,fire ,1 211 | 28,8,2012,35,56,14,0.4,79.2,37.0,166.0,2.1,30.6,6.1,not fire ,1 212 | 29,8,2012,35,53,17,0.5,80.2,20.7,149.2,2.7,30.6,5.9,fire ,1 213 | 30,8,2012,34,49,15,0.0,89.2,24.8,159.1,8.1,35.7,16.0,fire ,1 214 | 31,8,2012,30,59,19,0.0,89.1,27.8,168.2,9.8,39.3,19.4,fire ,1 215 | 1,9,2012,29,86,16,0.0,37.9,0.9,8.2,0.1,1.4,0.0,not fire ,1 216 | 2,9,2012,28,67,19,0.0,75.4,2.9,16.3,2.0,4.0,0.8,not fire ,1 217 | 3,9,2012,28,75,16,0.0,82.2,4.4,24.3,3.3,6.0,2.5,fire ,1 218 | 4,9,2012,30,66,15,0.2,73.5,4.1,26.6,1.5,6.0,0.7,not fire ,1 219 | 5,9,2012,30,58,12,4.1,66.1,4.0,8.4,1.0,3.9,0.4,not fire ,1 220 | 6,9,2012,34,71,14,6.5,64.5,3.3,9.1,1.0,3.5,0.4,not fire ,1 221 | 7,9,2012,31,62,15,0.0,83.3,5.8,17.7,3.8,6.4,3.2,fire ,1 222 | 8,9,2012,30,88,14,0.0,82.5,6.6,26.1,3.0,8.1,2.7,fire ,1 223 | 9,9,2012,30,80,15,0.0,83.1,7.9,34.5,3.5,10.0,3.7,fire ,1 224 | 10,9,2012,29,74,15,1.1,59.5,4.7,8.2,0.8,4.6,0.3,not fire ,1 225 | 11,9,2012,30,73,14,0.0,79.2,6.5,16.6,2.1,6.6,1.2,not fire ,1 226 | 12,9,2012,31,72,14,0.0,84.2,8.3,25.2,3.8,9.1,3.9,fire ,1 227 | 13,9,2012,29,49,19,0.0,88.6,11.5,33.4,9.1,12.4,10.3,fire ,1 228 | 14,9,2012,28,81,15,0.0,84.6,12.6,41.5,4.3,14.3,5.7,fire ,1 229 | 15,9,2012,32,51,13,0.0,88.7,16.0,50.2,6.9,17.8,9.8,fire ,1 230 | 16,9,2012,33,26,13,0.0,93.9,21.2,59.2,14.2,22.4,19.3,fire ,1 231 | 17,9,2012,34,44,12,0.0,92.5,25.2,63.3,11.2,26.2,17.5,fire ,1 232 | 18,9,2012,36,33,13,0.1,90.6,25.8,77.8,9.0,28.2,15.4,fire ,1 233 | 19,9,2012,29,41,8,0.1,83.9,24.9,86.0,2.7,28.9,5.6,fire ,1 234 | 20,9,2012,34,58,13,0.2,79.5,18.7,88.0,2.1,24.4,3.8,not fire ,1 235 | 21,9,2012,35,34,17,0.0,92.2,23.6,97.3,13.8,29.4,21.6,fire ,1 236 | 22,9,2012,33,64,13,0.0,88.9,26.1,106.3,7.1,32.4,13.7,fire ,1 237 | 23,9,2012,35,56,14,0.0,89.0,29.4,115.6,7.5,36.0,15.2,fire ,1 238 | 24,9,2012,26,49,6,2.0,61.3,11.9,28.1,0.6,11.9,0.4,not fire ,1 239 | 25,9,2012,28,70,15,0.0,79.9,13.8,36.1,2.4,14.1,3.0,not fire ,1 240 | 26,9,2012,30,65,14,0.0,85.4,16.0,44.5,4.5,16.9,6.5,fire ,1 241 | 27,9,2012,28,87,15,4.4,41.1,6.5,8.0,0.1,6.2,0.0,not fire ,1 242 | 28,9,2012,27,87,29,0.5,45.9,3.5,7.9,0.4,3.4,0.2,not fire ,1 243 | 29,9,2012,24,54,18,0.1,79.7,4.3,15.2,1.7,5.1,0.7,not fire ,1 244 | 30,9,2012,24,64,15,0.2,67.3,3.8,16.5,1.2,4.8,0.5,not fire ,1 245 | -------------------------------------------------------------------------------- /2-Ridge Lasso And Elasticnet/Algerian_forest_fires_cleaned_dataset.csv: -------------------------------------------------------------------------------- 1 | day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Region 2 | 1,6,2012,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0.5,not fire ,0 3 | 2,6,2012,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4,not fire ,0 4 | 3,6,2012,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,not fire ,0 5 | 4,6,2012,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0.0,not fire ,0 6 | 5,6,2012,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0.5,not fire ,0 7 | 6,6,2012,31,67,14,0.0,82.6,5.8,22.2,3.1,7.0,2.5,fire ,0 8 | 7,6,2012,33,54,13,0.0,88.2,9.9,30.5,6.4,10.9,7.2,fire ,0 9 | 8,6,2012,30,73,15,0.0,86.6,12.1,38.3,5.6,13.5,7.1,fire ,0 10 | 9,6,2012,25,88,13,0.2,52.9,7.9,38.8,0.4,10.5,0.3,not fire ,0 11 | 10,6,2012,28,79,12,0.0,73.2,9.5,46.3,1.3,12.6,0.9,not fire ,0 12 | 11,6,2012,31,65,14,0.0,84.5,12.5,54.3,4.0,15.8,5.6,fire ,0 13 | 12,6,2012,26,81,19,0.0,84.0,13.8,61.4,4.8,17.7,7.1,fire ,0 14 | 13,6,2012,27,84,21,1.2,50.0,6.7,17.0,0.5,6.7,0.2,not fire ,0 15 | 14,6,2012,30,78,20,0.5,59.0,4.6,7.8,1.0,4.4,0.4,not fire ,0 16 | 15,6,2012,28,80,17,3.1,49.4,3.0,7.4,0.4,3.0,0.1,not fire ,0 17 | 16,6,2012,29,89,13,0.7,36.1,1.7,7.6,0.0,2.2,0.0,not fire ,0 18 | 17,6,2012,30,89,16,0.6,37.3,1.1,7.8,0.0,1.6,0.0,not fire ,0 19 | 18,6,2012,31,78,14,0.3,56.9,1.9,8.0,0.7,2.4,0.2,not fire ,0 20 | 19,6,2012,31,55,16,0.1,79.9,4.5,16.0,2.5,5.3,1.4,not fire ,0 21 | 20,6,2012,30,80,16,0.4,59.8,3.4,27.1,0.9,5.1,0.4,not fire ,0 22 | 21,6,2012,30,78,14,0.0,81.0,6.3,31.6,2.6,8.4,2.2,fire ,0 23 | 22,6,2012,31,67,17,0.1,79.1,7.0,39.5,2.4,9.7,2.3,not fire ,0 24 | 23,6,2012,32,62,18,0.1,81.4,8.2,47.7,3.3,11.5,3.8,fire ,0 25 | 24,6,2012,32,66,17,0.0,85.9,11.2,55.8,5.6,14.9,7.5,fire ,0 26 | 25,6,2012,31,64,15,0.0,86.7,14.2,63.8,5.7,18.3,8.4,fire ,0 27 | 26,6,2012,31,64,18,0.0,86.8,17.8,71.8,6.7,21.6,10.6,fire ,0 28 | 27,6,2012,34,53,18,0.0,89.0,21.6,80.3,9.2,25.8,15.0,fire ,0 29 | 28,6,2012,32,55,14,0.0,89.1,25.5,88.5,7.6,29.7,13.9,fire ,0 30 | 29,6,2012,32,47,13,0.3,79.9,18.4,84.4,2.2,23.8,3.9,not fire ,0 31 | 30,6,2012,33,50,14,0.0,88.7,22.9,92.8,7.2,28.3,12.9,fire ,0 32 | 1,7,2012,29,68,19,1.0,59.9,2.5,8.6,1.1,2.9,0.4,not fire ,0 33 | 2,7,2012,27,75,19,1.2,55.7,2.4,8.3,0.8,2.8,0.3,not fire ,0 34 | 3,7,2012,32,76,20,0.7,63.1,2.6,9.2,1.3,3.0,0.5,not fire ,0 35 | 4,7,2012,33,78,17,0.0,80.1,4.6,18.5,2.7,5.7,1.7,not fire ,0 36 | 5,7,2012,33,66,14,0.0,85.9,7.6,27.9,4.8,9.1,4.9,fire ,0 37 | 6,7,2012,32,63,14,0.0,87.0,10.9,37.0,5.6,12.5,6.8,fire ,0 38 | 7,7,2012,35,64,18,0.2,80.0,9.7,40.4,2.8,12.1,3.2,not fire ,0 39 | 8,7,2012,33,68,19,0.0,85.6,12.5,49.8,6.0,15.4,8.0,fire ,0 40 | 9,7,2012,32,68,14,1.4,66.6,7.7,9.2,1.1,7.4,0.6,not fire ,0 41 | 10,7,2012,33,69,13,0.7,66.6,6.0,9.3,1.1,5.8,0.5,not fire ,0 42 | 11,7,2012,33,76,14,0.0,81.1,8.1,18.7,2.6,8.1,2.2,not fire ,0 43 | 12,7,2012,31,75,13,0.1,75.1,7.9,27.7,1.5,9.2,0.9,not fire ,0 44 | 13,7,2012,34,81,15,0.0,81.8,9.7,37.2,3.0,11.7,3.4,not fire ,0 45 | 14,7,2012,34,61,13,0.6,73.9,7.8,22.9,1.4,8.4,0.8,not fire ,0 46 | 15,7,2012,30,80,19,0.4,60.7,5.2,17.0,1.1,5.9,0.5,not fire ,0 47 | 16,7,2012,28,76,21,0.0,72.6,7.0,25.5,0.7,8.3,0.4,not fire ,0 48 | 17,7,2012,29,70,14,0.0,82.8,9.4,34.1,3.2,11.1,3.6,fire ,0 49 | 18,7,2012,31,68,14,0.0,85.4,12.1,43.1,4.6,14.2,6.0,fire ,0 50 | 19,7,2012,35,59,17,0.0,88.1,12.0,52.8,7.7,18.2,10.9,fire ,0 51 | 20,7,2012,33,65,15,0.1,81.4,12.3,62.1,2.8,16.5,4.0,fire ,0 52 | 21,7,2012,33,70,17,0.0,85.4,18.5,71.5,5.2,22.4,8.8,fire ,0 53 | 22,7,2012,28,79,18,0.1,73.4,16.4,79.9,1.8,21.7,2.8,not fire ,0 54 | 23,7,2012,27,66,22,0.4,68.2,10.5,71.3,1.8,15.4,2.1,not fire ,0 55 | 24,7,2012,28,78,16,0.1,70.0,9.6,79.7,1.4,14.7,1.3,not fire ,0 56 | 25,7,2012,31,65,18,0.0,84.3,12.5,88.7,4.8,18.5,7.3,fire ,0 57 | 26,7,2012,36,53,19,0.0,89.2,17.1,98.6,10.0,23.9,15.3,fire ,0 58 | 27,7,2012,36,48,13,0.0,90.3,22.2,108.5,8.7,29.4,15.3,fire ,0 59 | 28,7,2012,33,76,15,0.0,86.5,24.4,117.8,5.6,32.1,11.3,fire ,0 60 | 29,7,2012,32,73,15,0.0,86.6,26.7,127.0,5.6,35.0,11.9,fire ,0 61 | 30,7,2012,31,79,15,0.0,85.4,28.5,136.0,4.7,37.4,10.7,fire ,0 62 | 31,7,2012,35,64,17,0.0,87.2,31.9,145.7,6.8,41.2,15.7,fire ,0 63 | 1,8,2012,36,45,14,0.0,78.8,4.8,10.2,2.0,4.7,0.9,not fire ,0 64 | 2,8,2012,35,55,12,0.4,78.0,5.8,10.0,1.7,5.5,0.8,not fire ,0 65 | 3,8,2012,35,63,14,0.3,76.6,5.7,10.0,1.7,5.5,0.8,not fire ,0 66 | 4,8,2012,34,69,13,0.0,85.0,8.2,19.8,4.0,8.2,3.9,fire ,0 67 | 5,8,2012,34,65,13,0.0,86.8,11.1,29.7,5.2,11.5,6.1,fire ,0 68 | 6,8,2012,32,75,14,0.0,86.4,13.0,39.1,5.2,14.2,6.8,fire ,0 69 | 7,8,2012,32,69,16,0.0,86.5,15.5,48.6,5.5,17.2,8.0,fire ,0 70 | 8,8,2012,32,60,18,0.3,77.1,11.3,47.0,2.2,14.1,2.6,not fire ,0 71 | 9,8,2012,35,59,17,0.0,87.4,14.8,57.0,6.9,17.9,9.9,fire ,0 72 | 10,8,2012,35,55,14,0.0,88.9,18.6,67.0,7.4,21.9,11.6,fire ,0 73 | 11,8,2012,35,63,13,0.0,88.9,21.7,77.0,7.1,25.5,12.1,fire ,0 74 | 12,8,2012,35,51,13,0.3,81.3,15.6,75.1,2.5,20.7,4.2,not fire ,0 75 | 13,8,2012,35,63,15,0.0,87.0,19.0,85.1,5.9,24.4,10.2,fire ,0 76 | 14,8,2012,33,66,14,0.0,87.0,21.7,94.7,5.7,27.2,10.6,fire ,0 77 | 15,8,2012,36,55,13,0.3,82.4,15.6,92.5,3.7,22.0,6.3,fire ,0 78 | 16,8,2012,36,61,18,0.3,80.2,11.7,90.4,2.8,17.6,4.2,fire ,0 79 | 17,8,2012,37,52,18,0.0,89.3,16.0,100.7,9.7,22.9,14.6,fire ,0 80 | 18,8,2012,36,54,18,0.0,89.4,20.0,110.9,9.7,27.5,16.1,fire ,0 81 | 19,8,2012,35,62,19,0.0,89.4,23.2,120.9,9.7,31.3,17.2,fire ,0 82 | 20,8,2012,35,68,19,0.0,88.3,25.9,130.6,8.8,34.7,16.8,fire ,0 83 | 21,8,2012,36,58,19,0.0,88.6,29.6,141.1,9.2,38.8,18.4,fire ,0 84 | 22,8,2012,36,55,18,0.0,89.1,33.5,151.3,9.9,43.1,20.4,fire ,0 85 | 23,8,2012,36,53,16,0.0,89.5,37.6,161.5,10.4,47.5,22.3,fire,0 86 | 24,8,2012,34,64,14,0.0,88.9,40.5,171.3,9.0,50.9,20.9,fire ,0 87 | 25,8,2012,35,60,15,0.0,88.9,43.9,181.3,8.2,54.7,20.3,fire,0 88 | 26,8,2012,31,78,18,0.0,85.8,45.6,190.6,4.7,57.1,13.7,fire ,0 89 | 27,8,2012,33,82,21,0.0,84.9,47.0,200.2,4.4,59.3,13.2,fire,0 90 | 28,8,2012,34,64,16,0.0,89.4,50.2,210.4,7.3,62.9,19.9,fire ,0 91 | 29,8,2012,35,48,18,0.0,90.1,54.2,220.4,12.5,67.4,30.2,fire ,0 92 | 30,8,2012,35,70,17,0.8,72.7,25.2,180.4,1.7,37.4,4.2,not fire ,0 93 | 31,8,2012,28,80,21,16.8,52.5,8.7,8.7,0.6,8.3,0.3,not fire,0 94 | 1,9,2012,25,76,17,7.2,46.0,1.3,7.5,0.2,1.8,0.1,not fire ,0 95 | 2,9,2012,22,86,15,10.1,30.5,0.7,7.0,0.0,1.1,0.0,not fire,0 96 | 3,9,2012,25,78,15,3.8,42.6,1.2,7.5,0.1,1.7,0.0,not fire ,0 97 | 4,9,2012,29,73,17,0.1,68.4,1.9,15.7,1.4,2.9,0.5,not fire ,0 98 | 5,9,2012,29,75,16,0.0,80.8,3.4,24.0,2.8,5.1,1.7,fire ,0 99 | 6,9,2012,29,74,19,0.1,75.8,3.6,32.2,2.1,5.6,0.9,not fire ,0 100 | 7,9,2012,31,71,17,0.3,69.6,3.2,30.1,1.5,5.1,0.6,not fire ,0 101 | 8,9,2012,30,73,17,0.9,62.0,2.6,8.4,1.1,3.0,0.4,not fire ,0 102 | 9,9,2012,30,77,15,1.0,56.1,2.1,8.4,0.7,2.6,0.2,not fire ,0 103 | 10,9,2012,33,73,12,1.8,59.9,2.2,8.9,0.7,2.7,0.3,not fire ,0 104 | 11,9,2012,30,77,21,1.8,58.5,1.9,8.4,1.1,2.4,0.3,not fire ,0 105 | 12,9,2012,29,88,13,0.0,71.0,2.6,16.6,1.2,3.7,0.5,not fire ,0 106 | 13,9,2012,25,86,21,4.6,40.9,1.3,7.5,0.1,1.8,0.0,not fire ,0 107 | 14,9,2012,22,76,26,8.3,47.4,1.1,7.0,0.4,1.6,0.1,not fire ,0 108 | 15,9,2012,24,82,15,0.4,44.9,0.9,7.3,0.2,1.4,0.0,not fire ,0 109 | 16,9,2012,30,65,14,0.0,78.1,3.2,15.7,1.9,4.2,0.8,not fire ,0 110 | 17,9,2012,31,52,14,0.0,87.7,6.4,24.3,6.2,7.7,5.9,fire ,0 111 | 18,9,2012,32,49,11,0.0,89.4,9.8,33.1,6.8,11.3,7.7,fire ,0 112 | 19,9,2012,29,57,14,0.0,89.3,12.5,41.3,7.8,14.2,9.7,fire ,0 113 | 20,9,2012,28,84,18,0.0,83.8,13.5,49.3,4.5,16.0,6.3,fire,0 114 | 21,9,2012,31,55,11,0.0,87.8,16.5,57.9,5.4,19.2,8.3,fire ,0 115 | 22,9,2012,31,50,19,0.6,77.8,10.6,41.4,2.4,12.9,2.8,not fire ,0 116 | 23,9,2012,32,54,11,0.5,73.7,7.9,30.4,1.2,9.6,0.7,not fire ,0 117 | 24,9,2012,29,65,19,0.6,68.3,5.5,15.2,1.5,5.8,0.7,not fire ,0 118 | 25,9,2012,26,81,21,5.8,48.6,3.0,7.7,0.4,3.0,0.1,not fire ,0 119 | 26,9,2012,31,54,11,0.0,82.0,6.0,16.3,2.5,6.2,1.7,not fire ,0 120 | 27,9,2012,31,66,11,0.0,85.7,8.3,24.9,4.0,9.0,4.1,fire ,0 121 | 28,9,2012,32,47,14,0.7,77.5,7.1,8.8,1.8,6.8,0.9,not fire ,0 122 | 29,9,2012,26,80,16,1.8,47.4,2.9,7.7,0.3,3.0,0.1,not fire ,0 123 | 30,9,2012,25,78,14,1.4,45.0,1.9,7.5,0.2,2.4,0.1,not fire ,0 124 | 1,6,2012,32,71,12,0.7,57.1,2.5,8.2,0.6,2.8,0.2,not fire ,1 125 | 2,6,2012,30,73,13,4.0,55.7,2.7,7.8,0.6,2.9,0.2,not fire ,1 126 | 3,6,2012,29,80,14,2.0,48.7,2.2,7.6,0.3,2.6,0.1,not fire ,1 127 | 4,6,2012,30,64,14,0.0,79.4,5.2,15.4,2.2,5.6,1.0,not fire ,1 128 | 5,6,2012,32,60,14,0.2,77.1,6.0,17.6,1.8,6.5,0.9,not fire ,1 129 | 6,6,2012,35,54,11,0.1,83.7,8.4,26.3,3.1,9.3,3.1,fire ,1 130 | 7,6,2012,35,44,17,0.2,85.6,9.9,28.9,5.4,10.7,6.0,fire ,1 131 | 8,6,2012,28,51,17,1.3,71.4,7.7,7.4,1.5,7.3,0.8,not fire ,1 132 | 9,6,2012,27,59,18,0.1,78.1,8.5,14.7,2.4,8.3,1.9,not fire ,1 133 | 10,6,2012,30,41,15,0.0,89.4,13.3,22.5,8.4,13.1,10.0,fire ,1 134 | 11,6,2012,31,42,21,0.0,90.6,18.2,30.5,13.4,18.0,16.7,fire ,1 135 | 12,6,2012,27,58,17,0.0,88.9,21.3,37.8,8.7,21.2,12.9,fire ,1 136 | 13,6,2012,30,52,15,2.0,72.3,11.4,7.8,1.4,10.9,0.9,not fire ,1 137 | 14,6,2012,27,79,16,0.7,53.4,6.4,7.3,0.5,6.1,0.3,not fire ,1 138 | 15,6,2012,28,90,15,0.0,66.8,7.2,14.7,1.2,7.1,0.6,not fire ,1 139 | 16,6,2012,29,87,15,0.4,47.4,4.2,8.0,0.2,4.1,0.1,not fire ,1 140 | 17,6,2012,31,69,17,4.7,62.2,3.9,8.0,1.1,3.8,0.4,not fire ,1 141 | 18,6,2012,33,62,10,8.7,65.5,4.6,8.3,0.9,4.4,0.4,not fire ,1 142 | 19,6,2012,32,67,14,4.5,64.6,4.4,8.2,1.0,4.2,0.4,not fire ,1 143 | 20,6,2012,31,72,14,0.2,60.2,3.8,8.0,0.8,3.7,0.3,not fire ,1 144 | 21,6,2012,32,55,14,0.0,86.2,8.3,18.4,5.0,8.2,4.9,fire ,1 145 | 22,6,2012,33,46,14,1.1,78.3,8.1,8.3,1.9,7.7,1.2,not fire ,1 146 | 23,6,2012,33,59,16,0.8,74.2,7.0,8.3,1.6,6.7,0.8,not fire ,1 147 | 24,6,2012,35,68,16,0.0,85.3,10.0,17.0,4.9,9.9,5.3,fire ,1 148 | 25,6,2012,34,70,16,0.0,86.0,12.8,25.6,5.4,12.7,6.7,fire ,1 149 | 26,6,2012,36,62,16,0.0,87.8,16.5,34.5,7.0,16.4,9.5,fire ,1 150 | 27,6,2012,36,55,15,0.0,89.1,20.9,43.3,8.0,20.8,12.0,fire ,1 151 | 28,6,2012,37,37,13,0.0,92.5,27.2,52.4,11.7,27.1,18.4,fire ,1 152 | 29,6,2012,37,36,13,0.6,86.2,17.9,36.7,4.8,17.8,7.2,fire ,1 153 | 30,6,2012,34,42,15,1.7,79.7,12.0,8.5,2.2,11.5,2.2,not fire ,1 154 | 1,7,2012,28,58,18,2.2,63.7,3.2,8.5,1.2,3.3,0.5,not fire ,1 155 | 2,7,2012,33,48,16,0.0,87.6,7.9,17.8,6.8,7.8,6.4,fire ,1 156 | 3,7,2012,34,56,17,0.1,84.7,9.7,27.3,4.7,10.3,5.2,fire ,1 157 | 4,7,2012,34,58,18,0.0,88.0,13.6,36.8,8.0,14.1,9.9,fire ,1 158 | 5,7,2012,34,45,18,0.0,90.5,18.7,46.4,11.3,18.7,15.0,fire ,1 159 | 6,7,2012,35,42,15,0.3,84.7,15.5,45.1,4.3,16.7,6.3,fire ,1 160 | 7,7,2012,38,43,13,0.5,85.0,13.0,35.4,4.1,13.7,5.2,fire ,1 161 | 8,7,2012,35,47,18,6.0,80.8,9.8,9.7,3.1,9.4,3.0,fire ,1 162 | 9,7,2012,36,43,15,1.9,82.3,9.4,9.9,3.2,9.0,3.1,fire ,1 163 | 10,7,2012,34,51,16,3.8,77.5,8.0,9.5,2.0,7.7,1.3,not fire ,1 164 | 11,7,2012,34,56,15,2.9,74.8,7.1,9.5,1.6,6.8,0.8,not fire ,1 165 | 12,7,2012,36,44,13,0.0,90.1,12.6,19.4,8.3,12.5,9.6,fire ,1 166 | 13,7,2012,39,45,13,0.6,85.2,11.3,10.4,4.2,10.9,4.7,fire ,1 167 | 15,7,2012,34,45,17,0.0,90.5,18.0,24.1,10.9,17.7,14.1,fire ,1 168 | 16,7,2012,31,83,17,0.0,84.5,19.4,33.1,4.7,19.2,7.3,fire ,1 169 | 17,7,2012,32,81,17,0.0,84.6,21.1,42.3,4.7,20.9,7.7,fire ,1 170 | 18,7,2012,33,68,15,0.0,86.1,23.9,51.6,5.2,23.9,9.1,fire ,1 171 | 19,7,2012,34,58,16,0.0,88.1,27.8,61.1,7.3,27.7,13.0,fire ,1 172 | 20,7,2012,36,50,16,0.0,89.9,32.7,71.0,9.5,32.6,17.3,fire ,1 173 | 21,7,2012,36,29,18,0.0,93.9,39.6,80.6,18.5,39.5,30.0,fire ,1 174 | 22,7,2012,32,48,18,0.0,91.5,44.2,90.1,13.2,44.0,25.4,fire ,1 175 | 23,7,2012,31,71,17,0.0,87.3,46.6,99.0,6.9,46.5,16.3,fire ,1 176 | 24,7,2012,33,63,17,1.1,72.8,20.9,56.6,1.6,21.7,2.5,not fire ,1 177 | 25,7,2012,39,64,9,1.2,73.8,11.7,15.9,1.1,11.4,0.7,not fire ,1 178 | 26,7,2012,35,58,10,0.2,78.3,10.8,19.7,1.6,10.7,1.0,not fire ,1 179 | 27,7,2012,29,87,18,0.0,80.0,11.8,28.3,2.8,11.8,3.2,not fire ,1 180 | 28,7,2012,33,57,16,0.0,87.5,15.7,37.6,6.7,15.7,9.0,fire ,1 181 | 29,7,2012,34,59,16,0.0,88.1,19.5,47.2,7.4,19.5,10.9,fire ,1 182 | 30,7,2012,36,56,16,0.0,88.9,23.8,57.1,8.2,23.8,13.2,fire ,1 183 | 31,7,2012,37,55,15,0.0,89.3,28.3,67.2,8.3,28.3,14.5,fire ,1 184 | 1,8,2012,38,52,14,0.0,78.3,4.4,10.5,2.0,4.4,0.8,not fire ,1 185 | 2,8,2012,40,34,14,0.0,93.3,10.8,21.4,13.8,10.6,13.5,fire ,1 186 | 3,8,2012,39,33,17,0.0,93.7,17.1,32.1,17.2,16.9,19.5,fire ,1 187 | 4,8,2012,38,35,15,0.0,93.8,23.0,42.7,15.7,22.9,20.9,fire ,1 188 | 5,8,2012,34,42,17,0.1,88.3,23.6,52.5,19.0,23.5,12.6,fire ,1 189 | 6,8,2012,30,54,14,3.1,70.5,11.0,9.1,1.3,10.5,0.8,not fire ,1 190 | 7,8,2012,34,63,13,2.9,69.7,7.2,9.8,1.2,6.9,0.6,not fire ,1 191 | 8,8,2012,37,56,11,0.0,87.4,11.2,20.2,5.2,11.0,5.9,fire ,1 192 | 9,8,2012,39,43,12,0.0,91.7,16.5,30.9,9.6,16.4,12.7,fire ,1 193 | 10,8,2012,39,39,15,0.2,89.3,15.8,35.4,8.2,15.8,10.7,fire ,1 194 | 11,8,2012,40,31,15,0.0,94.2,22.5,46.3,16.6,22.4,21.6,fire ,1 195 | 12,8,2012,39,21,17,0.4,93.0,18.4,41.5,15.5,18.4,18.8,fire ,1 196 | 13,8,2012,35,34,16,0.2,88.3,16.9,45.1,7.5,17.5,10.5,fire ,1 197 | 14,8,2012,37,40,13,0.0,91.9,22.3,55.5,10.8,22.3,15.7,fire ,1 198 | 15,8,2012,35,46,13,0.3,83.9,16.9,54.2,3.5,19.0,5.5,fire ,1 199 | 16,8,2012,40,41,10,0.1,92.0,22.6,65.1,9.5,24.2,14.8,fire ,1 200 | 17,8,2012,42,24,9,0.0,96.0,30.3,76.4,15.7,30.4,24.0,fire ,1 201 | 18,8,2012,37,37,14,0.0,94.3,35.9,86.8,16.0,35.9,26.3,fire ,1 202 | 19,8,2012,35,66,15,0.1,82.7,32.7,96.8,3.3,35.5,7.7,fire ,1 203 | 20,8,2012,36,81,15,0.0,83.7,34.4,107.0,3.8,38.1,9.0,fire ,1 204 | 21,8,2012,36,71,15,0.0,86.0,36.9,117.1,5.1,41.3,12.2,fire ,1 205 | 22,8,2012,37,53,14,0.0,89.5,41.1,127.5,8.0,45.5,18.1,fire ,1 206 | 23,8,2012,36,43,16,0.0,91.2,46.1,137.7,11.5,50.2,24.5,fire ,1 207 | 24,8,2012,35,38,15,0.0,92.1,51.3,147.7,12.2,54.9,26.9,fire ,1 208 | 25,8,2012,34,40,18,0.0,92.1,56.3,157.5,14.3,59.5,31.1,fire ,1 209 | 26,8,2012,33,37,16,0.0,92.2,61.3,167.2,13.1,64.0,30.3,fire ,1 210 | 27,8,2012,36,54,14,0.0,91.0,65.9,177.3,10.0,68.0,26.1,fire ,1 211 | 28,8,2012,35,56,14,0.4,79.2,37.0,166.0,2.1,30.6,6.1,not fire ,1 212 | 29,8,2012,35,53,17,0.5,80.2,20.7,149.2,2.7,30.6,5.9,fire ,1 213 | 30,8,2012,34,49,15,0.0,89.2,24.8,159.1,8.1,35.7,16.0,fire ,1 214 | 31,8,2012,30,59,19,0.0,89.1,27.8,168.2,9.8,39.3,19.4,fire ,1 215 | 1,9,2012,29,86,16,0.0,37.9,0.9,8.2,0.1,1.4,0.0,not fire ,1 216 | 2,9,2012,28,67,19,0.0,75.4,2.9,16.3,2.0,4.0,0.8,not fire ,1 217 | 3,9,2012,28,75,16,0.0,82.2,4.4,24.3,3.3,6.0,2.5,fire ,1 218 | 4,9,2012,30,66,15,0.2,73.5,4.1,26.6,1.5,6.0,0.7,not fire ,1 219 | 5,9,2012,30,58,12,4.1,66.1,4.0,8.4,1.0,3.9,0.4,not fire ,1 220 | 6,9,2012,34,71,14,6.5,64.5,3.3,9.1,1.0,3.5,0.4,not fire ,1 221 | 7,9,2012,31,62,15,0.0,83.3,5.8,17.7,3.8,6.4,3.2,fire ,1 222 | 8,9,2012,30,88,14,0.0,82.5,6.6,26.1,3.0,8.1,2.7,fire ,1 223 | 9,9,2012,30,80,15,0.0,83.1,7.9,34.5,3.5,10.0,3.7,fire ,1 224 | 10,9,2012,29,74,15,1.1,59.5,4.7,8.2,0.8,4.6,0.3,not fire ,1 225 | 11,9,2012,30,73,14,0.0,79.2,6.5,16.6,2.1,6.6,1.2,not fire ,1 226 | 12,9,2012,31,72,14,0.0,84.2,8.3,25.2,3.8,9.1,3.9,fire ,1 227 | 13,9,2012,29,49,19,0.0,88.6,11.5,33.4,9.1,12.4,10.3,fire ,1 228 | 14,9,2012,28,81,15,0.0,84.6,12.6,41.5,4.3,14.3,5.7,fire ,1 229 | 15,9,2012,32,51,13,0.0,88.7,16.0,50.2,6.9,17.8,9.8,fire ,1 230 | 16,9,2012,33,26,13,0.0,93.9,21.2,59.2,14.2,22.4,19.3,fire ,1 231 | 17,9,2012,34,44,12,0.0,92.5,25.2,63.3,11.2,26.2,17.5,fire ,1 232 | 18,9,2012,36,33,13,0.1,90.6,25.8,77.8,9.0,28.2,15.4,fire ,1 233 | 19,9,2012,29,41,8,0.1,83.9,24.9,86.0,2.7,28.9,5.6,fire ,1 234 | 20,9,2012,34,58,13,0.2,79.5,18.7,88.0,2.1,24.4,3.8,not fire ,1 235 | 21,9,2012,35,34,17,0.0,92.2,23.6,97.3,13.8,29.4,21.6,fire ,1 236 | 22,9,2012,33,64,13,0.0,88.9,26.1,106.3,7.1,32.4,13.7,fire ,1 237 | 23,9,2012,35,56,14,0.0,89.0,29.4,115.6,7.5,36.0,15.2,fire ,1 238 | 24,9,2012,26,49,6,2.0,61.3,11.9,28.1,0.6,11.9,0.4,not fire ,1 239 | 25,9,2012,28,70,15,0.0,79.9,13.8,36.1,2.4,14.1,3.0,not fire ,1 240 | 26,9,2012,30,65,14,0.0,85.4,16.0,44.5,4.5,16.9,6.5,fire ,1 241 | 27,9,2012,28,87,15,4.4,41.1,6.5,8.0,0.1,6.2,0.0,not fire ,1 242 | 28,9,2012,27,87,29,0.5,45.9,3.5,7.9,0.4,3.4,0.2,not fire ,1 243 | 29,9,2012,24,54,18,0.1,79.7,4.3,15.2,1.7,5.1,0.7,not fire ,1 244 | 30,9,2012,24,64,15,0.2,67.3,3.8,16.5,1.2,4.8,0.5,not fire ,1 245 | --------------------------------------------------------------------------------