├── h.js
├── hello.js
├── a.py
├── Problem Statement
    └── Train data
    │   ├── .md
    │   └── AttributeInformation.pdf
├── Spam-Email-detection-system-main
    ├── _config.yml
    ├── spam_model.pkl
    ├── static
    │   ├── img
    │   │   ├── logo-w.png
    │   │   ├── safe.png
    │   │   ├── spam-2.png
    │   │   ├── spam.png
    │   │   ├── insurance-protected.png
    │   │   └── pngkey.com-scam-alert-png-4321853.png
    │   ├── audio
    │   │   ├── safe.mpeg
    │   │   └── warning.mpeg
    │   └── css
    │   │   └── style.css
    ├── app.py
    ├── templates
    │   └── index.html
    └── Untitled2.ipynb
├── a-hands-on-discussion-on-hyperparameter-optimization-techniques.pdf
├── id 3 algorithum (1).ipynb
├── Feature Selection
    ├── Embedded method.ipynb
    ├── Filter method.ipynb
    ├── Wrapper method .ipynb
    └── feature-selection-technique-in-machine-learning.ipynb
└── regularization-in-machine-learning
    └── regularization-in-machine-learning.ipynb


/h.js:
--------------------------------------------------------------------------------
1 | console.log("print")
2 | 


--------------------------------------------------------------------------------
/hello.js:
--------------------------------------------------------------------------------
1 | console.log("hello")
2 | 


--------------------------------------------------------------------------------
/a.py:
--------------------------------------------------------------------------------
1 | print("Hello. Developer")
2 | 


--------------------------------------------------------------------------------
/Problem Statement/Train data/.md:
--------------------------------------------------------------------------------
1 | .md
2 | 


--------------------------------------------------------------------------------
/Spam-Email-detection-system-main/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------
/Spam-Email-detection-system-main/spam_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mouneshgouda/Machine_Learning/HEAD/Spam-Email-detection-system-main/spam_model.pkl


--------------------------------------------------------------------------------
/Problem Statement/Train data/AttributeInformation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mouneshgouda/Machine_Learning/HEAD/Problem Statement/Train data/AttributeInformation.pdf


--------------------------------------------------------------------------------
/Spam-Email-detection-system-main/static/img/logo-w.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mouneshgouda/Machine_Learning/HEAD/Spam-Email-detection-system-main/static/img/logo-w.png


--------------------------------------------------------------------------------
/Spam-Email-detection-system-main/static/img/safe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mouneshgouda/Machine_Learning/HEAD/Spam-Email-detection-system-main/static/img/safe.png


--------------------------------------------------------------------------------
/Spam-Email-detection-system-main/static/img/spam-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mouneshgouda/Machine_Learning/HEAD/Spam-Email-detection-system-main/static/img/spam-2.png


--------------------------------------------------------------------------------
/Spam-Email-detection-system-main/static/img/spam.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mouneshgouda/Machine_Learning/HEAD/Spam-Email-detection-system-main/static/img/spam.png


--------------------------------------------------------------------------------
/Spam-Email-detection-system-main/static/audio/safe.mpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mouneshgouda/Machine_Learning/HEAD/Spam-Email-detection-system-main/static/audio/safe.mpeg


--------------------------------------------------------------------------------
/Spam-Email-detection-system-main/static/audio/warning.mpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mouneshgouda/Machine_Learning/HEAD/Spam-Email-detection-system-main/static/audio/warning.mpeg


--------------------------------------------------------------------------------
/Spam-Email-detection-system-main/static/img/insurance-protected.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mouneshgouda/Machine_Learning/HEAD/Spam-Email-detection-system-main/static/img/insurance-protected.png


--------------------------------------------------------------------------------
/a-hands-on-discussion-on-hyperparameter-optimization-techniques.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mouneshgouda/Machine_Learning/HEAD/a-hands-on-discussion-on-hyperparameter-optimization-techniques.pdf


--------------------------------------------------------------------------------
/Spam-Email-detection-system-main/static/img/pngkey.com-scam-alert-png-4321853.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mouneshgouda/Machine_Learning/HEAD/Spam-Email-detection-system-main/static/img/pngkey.com-scam-alert-png-4321853.png


--------------------------------------------------------------------------------
/Spam-Email-detection-system-main/app.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask,render_template,request,jsonify
 2 | import pandas as pd
 3 | import numpy as np 
 4 | import joblib
 5 | 
 6 | app = Flask(__name__)
 7 | 
 8 | model = joblib.load('spam_model.pkl')
 9 | 
10 | @app.route('/',methods=['GET', 'POST'])
11 | def index():
12 |   if request.method == 'POST':
13 |     message = request.form.get('message')
14 |     output = model.predict([message])
15 |     if output == [0]:
16 |       result = "This Message is Not a SPAM Message."
17 |     else:
18 |       result = "This Message is a SPAM Message." 
19 |     return render_template('index.html', result=result,message=message)      
20 | 
21 |   else:
22 |     return render_template('index.html')  
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     app.run(debug=True)


--------------------------------------------------------------------------------
/Spam-Email-detection-system-main/templates/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | 
 4 | <head>
 5 |     <meta charset="UTF-8">
 6 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 7 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 8 |     <link rel="stylesheet" href="{{url_for('static', filename='css/style.css')}}">
 9 |     <link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
10 |     <link rel="shortcut icon" href="static/img/logo-w.png" />
11 |     <title>SPAM Detector Website</title>
12 | </head>
13 | 
14 | <body>
15 |     <div class="container">
16 |         <div class="head-nav">
17 |             <h3 style="font-size:10px; color:yellow; font-weight:bold; font-family:Cambria, Cochin, Georgia, Times, 'Times New Roman', serif;">Disclaimer :- Plesae Click on the three dot and click "Add to Home screen" for better view and quick access</h3>
18 |         </div>
19 |         <div class="logo-heading">
20 |             <img src="{{url_for('static', filename='img/logo-w.png')}}" alt="Spam Detector">
21 |         </div>
22 |         <div class="container-data">
23 |             <form action="/" method="post">
24 |                 <textarea class="text-box" id="sentence" name="message" placeholder="Enter a message. Example: Upto 30% off on sale. Buy now!" onsubmit="return checkforblank()"></textarea>
25 |                 <button type="submit" class="btn">CHECK</button>
26 |             </form>
27 |         </div>
28 |         <br> {% if message %}
29 |         <div class="show-result">
30 |             <div class="output">
31 |                 <h3 style="color:black; font-size:22px; font-weight:bold; font-family:'Courier New', Courier, monospace; text-transform: uppercase;">{{message}}</h3>
32 |             </div>
33 |             <br> {% if result=='This Message is Not a SPAM Message.' %}
34 |             <div class="output-logo">
35 |                 <img src="{{url_for('static', filename='img/spam-2.png')}}">
36 |             </div>
37 |             {% endif %} {% if result=='This Message is Not a SPAM Message.' %}
38 |             <div class="alert">
39 |                 <audio controls autoplay>
40 |             <source src="{{url_for('static', filename='audio/safe.mpeg')}}" type="audio/mp3">
41 |         </audio>
42 |             </div>
43 |             {% endif %} {% if result=='This Message is a SPAM Message.' %}
44 |             <div class="output-logo">
45 |                 <img src="{{url_for('static', filename='img/spam.png')}}">
46 |             </div>
47 |             {% endif %} {% if result=='This Message is Not a SPAM Message.' %}
48 |             <div class="output-not">{{result}}</div>
49 |             {% endif %} {% if result=='This Message is a SPAM Message.' %}
50 |             <div class="output">{{result}}</div>
51 |             {% endif %} {% if result=='This Message is a SPAM Message.' %}
52 |             <div class="alert">
53 |                 <audio controls autoplay>
54 |             <source src="{{url_for('static', filename='audio/warning.mpeg')}}" type="audio/mp3">
55 |         </audio>
56 |             </div>
57 |             {% endif %}
58 |         </div>
59 |         {% else %}
60 |         <h3 style="color:red; padding:30px;font-size:35px; font-weight:bold; font-family:'Courier New', Courier, monospace; text-transform: uppercase;">.Enter A Message To Check The Message is SPAM or NOT-SPAM..</h3>
61 |         {% endif %}
62 |         <div class="head-git">
63 |         </div>
64 |         <div class="head">
65 |         </div>
66 | </body>
67 | 
68 | </html>


--------------------------------------------------------------------------------
/Spam-Email-detection-system-main/static/css/style.css:
--------------------------------------------------------------------------------
  1 | 
  2 | * {
  3 |     border: 0;
  4 |     box-sizing: border-box;
  5 |     margin: 0;
  6 |   }
  7 |   .container{
  8 |     height: content;
  9 |     width: 100%;
 10 |     justify-content: center;
 11 |     align-items: center;
 12 |     display: flex;
 13 |     flex-flow: column;
 14 |   }
 15 |   .head-nav {
 16 |     height:30px;
 17 |     width:100%;
 18 |     background-color: green;
 19 |     display:flex;
 20 |     justify-content: center;
 21 |     align-items: center;
 22 |     padding: 8px;
 23 |   }
 24 |   .logo-heading {
 25 |     height: 250px;
 26 |     width: 250px;
 27 |     margin-top: -20px;
 28 |     justify-content: center;
 29 |     align-items: center;
 30 |     display: flex;
 31 |   }
 32 |   .logo-heading img {
 33 |     height: 100%;
 34 |     width: 100%;
 35 |   }
 36 |   .container-data {
 37 |     height: 200px;
 38 |     width: 100%;
 39 |     justify-content: center;
 40 |     display: flex;
 41 |     align-items: center;
 42 |     background-color:rgb(236, 214, 214);
 43 |     flex-flow: column;
 44 |     margin-top: -40px;
 45 |     padding-left: 10px;
 46 |     padding-right: 10px;
 47 |   }
 48 | .text-box {
 49 |     height: 65px;
 50 |     width:90%;
 51 |     border-radius: 5px;
 52 |     border: 2px solid green;
 53 |     background-color: rgb(252, 248, 248);
 54 |     margin-top: 15px;
 55 |     font-size: 14px;
 56 |     font-weight: bold;
 57 | }
 58 | .btn {
 59 |     height: 35px;
 60 |     width:100px;
 61 |     border-radius: 5px;
 62 |     border: 2px solid black;
 63 |     background-color: green;
 64 |     color: white;
 65 |     margin-top: 15px;
 66 |   }
 67 | 
 68 | .btn:hover {
 69 |     color: white;
 70 |     background-color:red;
 71 |     cursor: pointer;
 72 | }
 73 | .show-result {
 74 |     height: content;
 75 |     width: 100%;
 76 |     display: flex;
 77 |     justify-content: center;
 78 |     flex-flow: column wrap;
 79 |     background-color: rgb(252, 243, 243);
 80 |     margin-bottom: 50px;
 81 |     margin-top: -30px;
 82 |     padding: 35px;
 83 | }
 84 | .output {
 85 |     height: content;
 86 |     width: 100%;
 87 |     display: flex;
 88 |     justify-content: center;
 89 |     align-items: center;
 90 |     font-size: 19px;
 91 |     font-weight: 300;
 92 |     font-family:'Lucida Sans', 'Lucida Sans Regular', 'Lucida Grande', 'Lucida Sans Unicode', Geneva, Verdana, sans-serif;
 93 |     color: red;
 94 |     margin-top: 5px;
 95 |     padding: 30px;
 96 | }
 97 | .output-not {
 98 |   height: content;
 99 |   width: 100%;
100 |   display: flex;
101 |   justify-content: center;
102 |   align-items: center;
103 |   font-size: 19px;
104 |   font-weight: 300;
105 |   font-family:'Lucida Sans', 'Lucida Sans Regular', 'Lucida Grande', 'Lucida Sans Unicode', Geneva, Verdana, sans-serif;
106 |   color: green;
107 |   margin-top: 3px;
108 |   padding: 30px;
109 | }
110 | .output-logo {
111 |   height: 100px;
112 |   width: 100%;
113 |   margin-top:20px;
114 |   justify-content: center;
115 |   display: flex;
116 |   align-items: center;
117 | }
118 | .output-logo img {
119 |   height: 100%;
120 |   width: 100px;
121 | }
122 | 
123 | .head{
124 |   height: 40px;
125 |   width: 100%;
126 |   justify-content: center;
127 |   align-items: center;
128 |   border-top: 2px solid black;
129 |   background-color: yellow;
130 |   display: flex;
131 |   flex-flow: column;
132 | }
133 | .head h2 {
134 |   font-size: 15px;
135 |   font-family: 'Gill Sans', 'Gill Sans MT', Calibri, 'Trebuchet MS', sans-serif;
136 |   color:black;
137 | }
138 | .head-git{
139 |   height: 40px;
140 |   width: 100%;
141 |   justify-content: center;
142 |   align-items: center;
143 |   background-color: rgb(250, 244, 244);
144 |   display: flex;
145 |   flex-flow: row;
146 |   margin-bottom: 5px;
147 | }
148 | .head-git h2 {
149 |   font-size: 18px;
150 |   font-family: 'Gill Sans', 'Gill Sans MT', Calibri, 'Trebuchet MS', sans-serif;
151 |   color:black;
152 | }
153 | .head-git h2 a { 
154 |   text-decoration: none;
155 |   color:blue;
156 |   font-size: 23px;
157 | }
158 | .head-git h2 a:hover { 
159 |   color:rgb(211, 47, 41);
160 |   background-color: beige;
161 | }
162 | .alert {
163 |   visibility: hidden;
164 | }


--------------------------------------------------------------------------------
/id 3 algorithum (1).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "id": "bc63810d",
  7 |    "metadata": {},
  8 |    "outputs": [
  9 |     {
 10 |      "name": "stdout",
 11 |      "output_type": "stream",
 12 |      "text": [
 13 |       "Enter Outlook (Sunny/Overcast/Rain): Overcast\n",
 14 |       "Enter Temperature (Hot/Mild/Cool): Mild\n",
 15 |       "Enter Humidity (High/Normal): Normal\n",
 16 |       "Enter Wind (Weak/Strong): Strong\n",
 17 |       "\n",
 18 |       "Predicted PlayTennis for the new instance: No\n"
 19 |      ]
 20 |     }
 21 |    ],
 22 |    "source": [
 23 |     "import pandas as pd\n",
 24 |     "from sklearn.model_selection import train_test_split\n",
 25 |     "from sklearn.tree import DecisionTreeClassifier\n",
 26 |     "from sklearn.metrics import accuracy_score, classification_report\n",
 27 |     "\n",
 28 |     "# Sample dataset: PlayTennis\n",
 29 |     "data = {\n",
 30 |     "    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny', 'Sunny', 'Rain'],\n",
 31 |     "    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild'],\n",
 32 |     "    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal'],\n",
 33 |     "    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Weak', 'Weak'],\n",
 34 |     "    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No']\n",
 35 |     "}\n",
 36 |     "\n",
 37 |     "df = pd.DataFrame(data)\n",
 38 |     "\n",
 39 |     "# Extract features and target variable\n",
 40 |     "X = pd.get_dummies(df.drop('PlayTennis', axis=1))  # Convert categorical variables to numerical\n",
 41 |     "y = df['PlayTennis']\n",
 42 |     "\n",
 43 |     "# Get user input for new instance\n",
 44 |     "new_outlook = input(\"Enter Outlook (Sunny/Overcast/Rain): \")\n",
 45 |     "new_temperature = input(\"Enter Temperature (Hot/Mild/Cool): \")\n",
 46 |     "new_humidity = input(\"Enter Humidity (High/Normal): \")\n",
 47 |     "new_wind = input(\"Enter Wind (Weak/Strong): \")\n",
 48 |     "\n",
 49 |     "# Create a new DataFrame for user input\n",
 50 |     "new_instance = pd.DataFrame({\n",
 51 |     "    'Outlook_Sunny': [1 if new_outlook == 'Sunny' else 0],\n",
 52 |     "    'Outlook_Overcast': [1 if new_outlook == 'Overcast' else 0],\n",
 53 |     "    'Outlook_Rain': [1 if new_outlook == 'Rain' else 0],\n",
 54 |     "    'Temperature_Hot': [1 if new_temperature == 'Hot' else 0],\n",
 55 |     "    'Temperature_Mild': [1 if new_temperature == 'Mild' else 0],\n",
 56 |     "    'Temperature_Cool': [1 if new_temperature == 'Cool' else 0],\n",
 57 |     "    'Humidity_High': [1 if new_humidity == 'High' else 0],\n",
 58 |     "    'Humidity_Normal': [1 if new_humidity == 'Normal' else 0],\n",
 59 |     "    'Wind_Weak': [1 if new_wind == 'Weak' else 0],\n",
 60 |     "    'Wind_Strong': [1 if new_wind == 'Strong' else 0],\n",
 61 |     "})\n",
 62 |     "\n",
 63 |     "# Train-test split\n",
 64 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
 65 |     "\n",
 66 |     "# Build a Decision Tree classifier using scikit-learn\n",
 67 |     "dt_classifier = DecisionTreeClassifier(random_state=42)\n",
 68 |     "dt_classifier.fit(X_train, y_train)\n",
 69 |     "\n",
 70 |     "new_instance_pred1= dt_classifier.predict(X_test)\n",
 71 |     "\n",
 72 |     "# Make predictions on the new instance\n",
 73 |     "new_instance_pred = dt_classifier.predict(new_instance)\n",
 74 |     "print(f\"\\nPredicted PlayTennis for the new instance: {new_instance_pred[0]}\")\n"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "id": "0213d788",
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "https://towardsdatascience.com/decision-trees-for-classification-complete-example-d0bc17fcf1c2"
 85 |    ]
 86 |   }
 87 |  ],
 88 |  "metadata": {
 89 |   "kernelspec": {
 90 |    "display_name": "Python 3 (ipykernel)",
 91 |    "language": "python",
 92 |    "name": "python3"
 93 |   },
 94 |   "language_info": {
 95 |    "codemirror_mode": {
 96 |     "name": "ipython",
 97 |     "version": 3
 98 |    },
 99 |    "file_extension": ".py",
100 |    "mimetype": "text/x-python",
101 |    "name": "python",
102 |    "nbconvert_exporter": "python",
103 |    "pygments_lexer": "ipython3",
104 |    "version": "3.9.7"
105 |   }
106 |  },
107 |  "nbformat": 4,
108 |  "nbformat_minor": 5
109 | }
110 | 


--------------------------------------------------------------------------------
/Feature Selection/Embedded method.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "921d9e05",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "<h6><div style=\"font-family: Trebuchet MS; background-color: #1b191d; color: #FFFFFF; padding: 12px; font-size: 35px; line-height: 1.5;text-align: center; line-height: 1.;\">Embedded method\n",
  9 |     "-➖📝</div> </h6>"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "id": "5b983074",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "    \n",
 18 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:orange; color: #FFFFFF; padding: 10px; line-height: 1.;\">Importing Nassary Liberarys 📈:</div></h10>"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 5,
 24 |    "id": "e7e6b63c",
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "import numpy as np\n",
 29 |     "from sklearn.datasets import load_breast_cancer\n",
 30 |     "from sklearn.model_selection import train_test_split\n",
 31 |     "from sklearn.linear_model import LogisticRegression\n",
 32 |     "from sklearn.metrics import accuracy_score"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "id": "a4a9b75c",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "    \n",
 41 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:orange; color: #FFFFFF; padding: 10px; line-height: 1.;\">Loading seed dataset  📈:</div></h10>"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 6,
 47 |    "id": "19efbfb2",
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "# Set seed for reproducibility\n",
 52 |     "seed = 42\n",
 53 |     "np.random.seed(seed)\n",
 54 |     "\n",
 55 |     "# Load the Breast Cancer dataset\n",
 56 |     "cancer = load_breast_cancer()\n",
 57 |     "X = cancer.data\n",
 58 |     "y = cancer.target"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "id": "e043d5b1",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "    \n",
 67 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:orange; color: #FFFFFF; padding: 10px; line-height: 1.;\">Split the dataset into training and testing sets as a 20% as a Test and 80% as a Train  :</div></h10>"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 7,
 73 |    "id": "461f5655",
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "# Split the dataset into training and testing sets\n",
 78 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 8,
 84 |    "id": "20de70b8",
 85 |    "metadata": {},
 86 |    "outputs": [
 87 |     {
 88 |      "name": "stderr",
 89 |      "output_type": "stream",
 90 |      "text": [
 91 |       "C:\\Users\\LENOVO\\anaconda3\\lib\\site-packages\\sklearn\\svm\\_base.py:985: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
 92 |       "  warnings.warn(\"Liblinear failed to converge, increase \"\n"
 93 |      ]
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "# Embedded method with LASSO (Logistic Regression with L1 regularization)\n",
 98 |     "lasso_model = LogisticRegression(penalty='l1', solver='liblinear', random_state=seed)\n",
 99 |     "lasso_model.fit(X_train, y_train)\n",
100 |     "\n",
101 |     "# Extract selected features and their coefficients\n",
102 |     "selected_indices_lasso = np.where(lasso_model.coef_[0] != 0)[0]\n",
103 |     "selected_features_lasso = cancer.feature_names[selected_indices_lasso]\n",
104 |     "coefficients_lasso = lasso_model.coef_[0, selected_indices_lasso]"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "id": "c9d4246f",
110 |    "metadata": {},
111 |    "source": [
112 |     "    \n",
113 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:orange; color: #FFFFFF; padding: 10px; line-height: 1.;\">Selecting Features using Embedded Method as L1</div></h10>"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 9,
119 |    "id": "fa025ee2",
120 |    "metadata": {},
121 |    "outputs": [
122 |     {
123 |      "name": "stdout",
124 |      "output_type": "stream",
125 |      "text": [
126 |       "\n",
127 |       "Embedded Method with LASSO (Logistic Regression with L1 regularization)\n",
128 |       "Selected Features (LASSO): ['mean radius' 'mean texture' 'mean perimeter' 'mean area' 'texture error'\n",
129 |       " 'area error' 'worst radius' 'worst texture' 'worst perimeter'\n",
130 |       " 'worst area' 'worst concavity']\n",
131 |       "Coefficients (LASSO): [ 4.25893726  0.13813487 -0.2624774  -0.01633754  1.69950795 -0.09940568\n",
132 |       "  0.04768624 -0.42417917 -0.02965423 -0.01518975 -3.63866352]\n",
133 |       "Accuracy (LASSO): 0.9561\n"
134 |      ]
135 |     },
136 |     {
137 |      "name": "stderr",
138 |      "output_type": "stream",
139 |      "text": [
140 |       "C:\\Users\\LENOVO\\anaconda3\\lib\\site-packages\\sklearn\\svm\\_base.py:985: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
141 |       "  warnings.warn(\"Liblinear failed to converge, increase \"\n"
142 |      ]
143 |     }
144 |    ],
145 |    "source": [
146 |     "\n",
147 |     "# Function to train and evaluate a model\n",
148 |     "def train_and_evaluate(X_train, X_test, y_train, y_test):\n",
149 |     "    model = LogisticRegression(penalty='l1', solver='liblinear', random_state=seed)\n",
150 |     "    model.fit(X_train, y_train)\n",
151 |     "    y_pred = model.predict(X_test)\n",
152 |     "    accuracy = accuracy_score(y_test, y_pred)\n",
153 |     "    return model, accuracy\n",
154 |     "\n",
155 |     "# Train and evaluate the model with selected features\n",
156 |     "lasso_model, accuracy_lasso = train_and_evaluate(X_train[:, selected_indices_lasso], X_test[:, selected_indices_lasso], y_train, y_test)\n",
157 |     "\n",
158 |     "# Print results\n",
159 |     "print(\"\\nEmbedded Method with LASSO (Logistic Regression with L1 regularization)\")\n",
160 |     "print(f\"Selected Features (LASSO): {selected_features_lasso}\")\n",
161 |     "print(f\"Coefficients (LASSO): {coefficients_lasso}\")\n",
162 |     "print(f\"Accuracy (LASSO): {accuracy_lasso:.4f}\")\n"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "id": "66fb004d",
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": []
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": null,
176 |    "id": "216cbea4",
177 |    "metadata": {},
178 |    "outputs": [],
179 |    "source": []
180 |   }
181 |  ],
182 |  "metadata": {
183 |   "kernelspec": {
184 |    "display_name": "Python 3 (ipykernel)",
185 |    "language": "python",
186 |    "name": "python3"
187 |   },
188 |   "language_info": {
189 |    "codemirror_mode": {
190 |     "name": "ipython",
191 |     "version": 3
192 |    },
193 |    "file_extension": ".py",
194 |    "mimetype": "text/x-python",
195 |    "name": "python",
196 |    "nbconvert_exporter": "python",
197 |    "pygments_lexer": "ipython3",
198 |    "version": "3.9.7"
199 |   }
200 |  },
201 |  "nbformat": 4,
202 |  "nbformat_minor": 5
203 | }
204 | 


--------------------------------------------------------------------------------
/Feature Selection/Filter method.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "880546ed",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "<h2><div style=\"font-family: Trebuchet MS; background-color: red; color: #FFFFFF; padding: 12px; line-height: 1.5;\"> Filter Method</div> "
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "200dfba8",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:Black; color: #FFFFFF; padding: 15px; line-height: 1.;\">Importing Nassary Liberarys 📈:</div></h10>"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 13,
 22 |    "id": "645a17c6",
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "\n",
 27 |     "import numpy as np\n",
 28 |     "from sklearn.datasets import load_breast_cancer\n",
 29 |     "from sklearn.model_selection import train_test_split\n",
 30 |     "from sklearn.feature_selection import SelectKBest, f_classif, VarianceThreshold\n",
 31 |     "from sklearn.ensemble import RandomForestClassifier\n",
 32 |     "from sklearn.metrics import accuracy_score"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "id": "1445759e",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "    \n",
 41 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:Black; color: #FFFFFF; padding: 15px; line-height: 1.;\">Loading seed dataset  📈:</div></h10>"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 14,
 47 |    "id": "a5251b89",
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "# Set seed for reproducibility\n",
 52 |     "seed = 42\n",
 53 |     "np.random.seed(seed)\n",
 54 |     "\n",
 55 |     "# Load the Breast Cancer dataset\n",
 56 |     "cancer = load_breast_cancer()\n",
 57 |     "X = cancer.data\n",
 58 |     "y = cancer.target"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "id": "da8dd7d2",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "    \n",
 67 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:Black; color: #FFFFFF; padding: 15px; line-height: 1.;\">Split the dataset into training and testing sets as a 20% as a Test and 80% as a Train  :</div></h10>"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 15,
 73 |    "id": "4817be16",
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "# Split the dataset into training and testing sets\n",
 78 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "id": "329de26e",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:Black; color: #FFFFFF; padding: 15px; line-height: 1.;\">Filter method with ANOVA</div></h10>"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 16,
 92 |    "id": "2046d2f3",
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "# Filter method with ANOVA\n",
 97 |     "k_best_features = 10\n",
 98 |     "anova_selector = SelectKBest(f_classif, k=k_best_features)\n",
 99 |     "X_train_anova = anova_selector.fit_transform(X_train, y_train)\n",
100 |     "X_test_anova = anova_selector.transform(X_test)"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "id": "8632271b",
106 |    "metadata": {},
107 |    "source": [
108 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:Black; color: #FFFFFF; padding: 15px; line-height: 1.;\">Filter method with Variance Threshold</div></h10>"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 17,
114 |    "id": "a38a24ca",
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "# Filter method with Variance Threshold\n",
119 |     "variance_threshold_value = 0.01\n",
120 |     "variance_selector = VarianceThreshold(threshold=variance_threshold_value)\n",
121 |     "X_train_filtered = variance_selector.fit_transform(X_train_anova)\n",
122 |     "X_test_filtered = variance_selector.transform(X_test_anova)\n"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "id": "573f43a2",
128 |    "metadata": {},
129 |    "source": [
130 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:Black; color: #FFFFFF; padding: 15px; line-height: 1.;\">Function to train and evaluate a model</div></h10>"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 18,
136 |    "id": "6b3e9548",
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "# Function to train and evaluate a model\n",
141 |     "def train_and_evaluate(X_train, X_test, y_train, y_test):\n",
142 |     "    model = RandomForestClassifier(random_state=seed)\n",
143 |     "    model.fit(X_train, y_train)\n",
144 |     "    y_pred = model.predict(X_test)\n",
145 |     "    accuracy = accuracy_score(y_test, y_pred)\n",
146 |     "    return accuracy\n",
147 |     "\n",
148 |     "# Train and evaluate the model with ANOVA and Variance Threshold\n",
149 |     "accuracy_anova = train_and_evaluate(X_train_anova, X_test_anova, y_train, y_test)\n",
150 |     "accuracy_variance = train_and_evaluate(X_train_filtered, X_test_filtered, y_train, y_test)\n",
151 |     "\n",
152 |     "# Get selected feature indices\n",
153 |     "selected_indices_variance = np.where(variance_selector.get_support())[0]"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 19,
159 |    "id": "96ac34ec",
160 |    "metadata": {},
161 |    "outputs": [
162 |     {
163 |      "name": "stdout",
164 |      "output_type": "stream",
165 |      "text": [
166 |       "\n",
167 |       "Filter Method with ANOVA\n",
168 |       "Number of Features Selected (ANOVA): 10\n",
169 |       "Selected Feature Indices (ANOVA): [ 0  2  3  6  7 20 22 23 26 27]\n",
170 |       "Selected Feature Names (ANOVA): ['mean radius' 'mean perimeter' 'mean area' 'mean concavity'\n",
171 |       " 'mean concave points' 'worst radius' 'worst perimeter' 'worst area'\n",
172 |       " 'worst concavity' 'worst concave points']\n",
173 |       "Accuracy (ANOVA): 0.9561\n",
174 |       "\n",
175 |       "Filter Method with Variance Threshold\n",
176 |       "Number of Features Selected (Variance Threshold): 7\n",
177 |       "Selected Feature Indices (Variance Threshold): [0 1 2 5 6 7 8]\n",
178 |       "Selected Feature Names (Variance Threshold): ['mean radius' 'mean texture' 'mean perimeter' 'mean compactness'\n",
179 |       " 'mean concavity' 'mean concave points' 'mean symmetry']\n",
180 |       "Accuracy (Variance Threshold): 0.9737\n"
181 |      ]
182 |     }
183 |    ],
184 |    "source": [
185 |     "# Print results\n",
186 |     "print(\"\\nFilter Method with ANOVA\")\n",
187 |     "print(f\"Number of Features Selected (ANOVA): {k_best_features}\")\n",
188 |     "print(f\"Selected Feature Indices (ANOVA): {np.where(anova_selector.get_support())[0]}\")\n",
189 |     "print(f\"Selected Feature Names (ANOVA): {cancer.feature_names[anova_selector.get_support()]}\")\n",
190 |     "print(f\"Accuracy (ANOVA): {accuracy_anova:.4f}\")\n",
191 |     "\n",
192 |     "print(\"\\nFilter Method with Variance Threshold\")\n",
193 |     "print(f\"Number of Features Selected (Variance Threshold): {len(selected_indices_variance)}\")\n",
194 |     "print(f\"Selected Feature Indices (Variance Threshold): {selected_indices_variance}\")\n",
195 |     "print(f\"Selected Feature Names (Variance Threshold): {cancer.feature_names[selected_indices_variance]}\")\n",
196 |     "print(f\"Accuracy (Variance Threshold): {accuracy_variance:.4f}\")\n"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": null,
202 |    "id": "4403d6f9",
203 |    "metadata": {},
204 |    "outputs": [],
205 |    "source": []
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "id": "fad55cab",
211 |    "metadata": {},
212 |    "outputs": [],
213 |    "source": []
214 |   }
215 |  ],
216 |  "metadata": {
217 |   "kernelspec": {
218 |    "display_name": "Python 3 (ipykernel)",
219 |    "language": "python",
220 |    "name": "python3"
221 |   },
222 |   "language_info": {
223 |    "codemirror_mode": {
224 |     "name": "ipython",
225 |     "version": 3
226 |    },
227 |    "file_extension": ".py",
228 |    "mimetype": "text/x-python",
229 |    "name": "python",
230 |    "nbconvert_exporter": "python",
231 |    "pygments_lexer": "ipython3",
232 |    "version": "3.9.7"
233 |   }
234 |  },
235 |  "nbformat": 4,
236 |  "nbformat_minor": 5
237 | }
238 | 


--------------------------------------------------------------------------------
/Feature Selection/Wrapper method .ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "9b15f1ec",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "<h3><div style=\"font-family: Trebuchet MS; background-color:#176BA0;; color: #FFFFFF; padding: 10px; line-height: 1.5;\">1. | Importing Nassary Liberarys 🌟 📚</div></h3>"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 5,
 14 |    "id": "1e09463d",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "from sklearn.datasets import load_breast_cancer\n",
 20 |     "from sklearn.model_selection import train_test_split\n",
 21 |     "from sklearn.ensemble import RandomForestClassifier\n",
 22 |     "from sklearn.feature_selection import RFE\n",
 23 |     "from sklearn.metrics import accuracy_score"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "id": "82180422",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "    \n",
 32 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:Green; color: #FFFFFF; padding: 15px; line-height: 1.;\">Loading seed dataset  📈:</div></h10>"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 6,
 38 |    "id": "34dc6a7c",
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# Set seed for reproducibility\n",
 43 |     "seed = 42\n",
 44 |     "np.random.seed(seed)\n",
 45 |     "\n",
 46 |     "# Load the Breast Cancer dataset\n",
 47 |     "cancer = load_breast_cancer()\n",
 48 |     "X = cancer.data\n",
 49 |     "y = cancer.target"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "id": "3f999bc5",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "    \n",
 58 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:Green; color: #FFFFFF; padding: 15px; line-height: 1.;\">Split the dataset into training and testing sets as a 20% as a Test and 80% as a Train  :</div></h10>"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 11,
 64 |    "id": "1ce7b251",
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "\n",
 69 |     "# Split the dataset into training and testing sets\n",
 70 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "id": "9d80703c",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "    \n",
 79 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:Green; color: #FFFFFF; padding: 15px; line-height: 1.;\">Wrapper method using Recursive Feature Elimination (RFE) :</div></h10>"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 15,
 85 |    "id": "43327b06",
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "\n",
 90 |     "\n",
 91 |     "\n",
 92 |     "# Wrapper method using Recursive Feature Elimination (RFE) with RandomForestClassifier\n",
 93 |     "def wrapper_method_rfe(X_train, X_test, y_train, estimator, num_features, method):\n",
 94 |     "    model = estimator\n",
 95 |     "    if method == \"Forward\":\n",
 96 |     "        selector = RFE(model, n_features_to_select=num_features, step=1)\n",
 97 |     "        selector.fit(X_train, y_train)\n",
 98 |     "    elif method == \"Backward\":\n",
 99 |     "        selector = RFE(model, n_features_to_select=num_features, step=1)\n",
100 |     "        selector.fit(X_train, y_train)\n",
101 |     "        # Since RFE performs backward elimination by default, we need to reverse the selected indices for backward elimination\n",
102 |     "        selected_indices = np.flip(np.where(selector.support_)[0])\n",
103 |     "        X_train_selected = selector.transform(X_train)\n",
104 |     "        X_test_selected = selector.transform(X_test)\n",
105 |     "    else:\n",
106 |     "        raise ValueError(\"Invalid method specified\")\n",
107 |     "\n",
108 |     "    if method != \"Backward\":\n",
109 |     "        X_train_selected = selector.transform(X_train)\n",
110 |     "        X_test_selected = selector.transform(X_test)\n",
111 |     "        selected_indices = np.where(selector.support_)[0]"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "id": "5202b583",
117 |    "metadata": {},
118 |    "source": [
119 |     "    \n",
120 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:Green; color: #FFFFFF; padding: 15px; line-height: 1.;\">Wrapper method using Recursive Feature Elimination (RFE) :</div></h10>"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 19,
126 |    "id": "fe96f1d4",
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "\n",
131 |     "# Wrapper method using Recursive Feature Elimination (RFE) with RandomForestClassifier\n",
132 |     "def wrapper_method_rfe(X_train, X_test, y_train, estimator, num_features, method):\n",
133 |     "    model = estimator\n",
134 |     "    if method == \"Forward\":\n",
135 |     "        selector = RFE(model, n_features_to_select=num_features, step=1)\n",
136 |     "        selector.fit(X_train, y_train)\n",
137 |     "    elif method == \"Backward\":\n",
138 |     "        selector = RFE(model, n_features_to_select=num_features, step=1)\n",
139 |     "        selector.fit(X_train, y_train)\n",
140 |     "        # Since RFE performs backward elimination by default, we need to reverse the selected indices for backward elimination\n",
141 |     "        selected_indices = np.flip(np.where(selector.support_)[0])\n",
142 |     "        X_train_selected = selector.transform(X_train)\n",
143 |     "        X_test_selected = selector.transform(X_test)\n",
144 |     "    else:\n",
145 |     "        raise ValueError(\"Invalid method specified\")\n",
146 |     "\n",
147 |     "    if method != \"Backward\":\n",
148 |     "        X_train_selected = selector.transform(X_train)\n",
149 |     "        X_test_selected = selector.transform(X_test)\n",
150 |     "        selected_indices = np.where(selector.support_)[0]\n",
151 |     "\n",
152 |     "    # Print results\n",
153 |     "    print(f\"\\nWrapper Method with RFE ({method})\")\n",
154 |     "    print(f\"Selected Features (RFE): {selected_indices}\")\n",
155 |     "    print(f\"Number of Features Selected (RFE): {len(selected_indices)}\")\n",
156 |     "\n",
157 |     "    return X_train_selected, X_test_selected\n",
158 |     "\n"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "markdown",
163 |    "id": "3ab76798",
164 |    "metadata": {},
165 |    "source": [
166 |     "    \n",
167 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:Green; color: #FFFFFF; padding: 15px; line-height: 1.;\">Wrapper method using RFE (Forward Selection) And Backword  :</div></h10>"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 20,
173 |    "id": "be46adab",
174 |    "metadata": {},
175 |    "outputs": [
176 |     {
177 |      "name": "stdout",
178 |      "output_type": "stream",
179 |      "text": [
180 |       "\n",
181 |       "Wrapper Method with RFE (Forward)\n",
182 |       "Selected Features (RFE): [ 1  2  6  7 20 21 22 23 26 27]\n",
183 |       "Number of Features Selected (RFE): 10\n",
184 |       "Accuracy (RFE): 0.9649\n",
185 |       "\n",
186 |       "Wrapper Method with RFE (Backward)\n",
187 |       "Selected Features (RFE): [27 26 23 22 21 20  7  6  2  1]\n",
188 |       "Number of Features Selected (RFE): 10\n",
189 |       "Accuracy (RFE): 0.9649\n"
190 |      ]
191 |     }
192 |    ],
193 |    "source": [
194 |     "# Function to train and evaluate a model\n",
195 |     "def train_and_evaluate(X_train, X_test, y_train, y_test):\n",
196 |     "    model = RandomForestClassifier(random_state=seed)\n",
197 |     "    model.fit(X_train, y_train)\n",
198 |     "    y_pred = model.predict(X_test)\n",
199 |     "    accuracy = accuracy_score(y_test, y_pred)\n",
200 |     "\n",
201 |     "    # Print accuracy\n",
202 |     "    print(f\"Accuracy (RFE): {accuracy:.4f}\")\n",
203 |     "\n",
204 |     "    return accuracy\n",
205 |     "\n",
206 |     "# Wrapper method using RFE (Forward Selection)\n",
207 |     "num_features_rfe_forward = 10\n",
208 |     "X_train_rfe_forward, X_test_rfe_forward = wrapper_method_rfe(\n",
209 |     "    X_train, X_test, y_train, RandomForestClassifier(random_state=seed),\n",
210 |     "    num_features_rfe_forward, \"Forward\"\n",
211 |     ")\n",
212 |     "accuracy_rfe_forward = train_and_evaluate(X_train_rfe_forward, X_test_rfe_forward, y_train, y_test)\n",
213 |     "\n",
214 |     "# Wrapper method using RFE (Backward Elimination)\n",
215 |     "num_features_rfe_backward = 10\n",
216 |     "X_train_rfe_backward, X_test_rfe_backward = wrapper_method_rfe(\n",
217 |     "    X_train, X_test, y_train, RandomForestClassifier(random_state=seed),\n",
218 |     "    num_features_rfe_backward, \"Backward\"\n",
219 |     ")\n",
220 |     "accuracy_rfe_backward = train_and_evaluate(X_train_rfe_backward, X_test_rfe_backward, y_train, y_test)\n"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": null,
226 |    "id": "52de39a0",
227 |    "metadata": {},
228 |    "outputs": [],
229 |    "source": []
230 |   }
231 |  ],
232 |  "metadata": {
233 |   "kernelspec": {
234 |    "display_name": "Python 3 (ipykernel)",
235 |    "language": "python",
236 |    "name": "python3"
237 |   },
238 |   "language_info": {
239 |    "codemirror_mode": {
240 |     "name": "ipython",
241 |     "version": 3
242 |    },
243 |    "file_extension": ".py",
244 |    "mimetype": "text/x-python",
245 |    "name": "python",
246 |    "nbconvert_exporter": "python",
247 |    "pygments_lexer": "ipython3",
248 |    "version": "3.9.7"
249 |   }
250 |  },
251 |  "nbformat": 4,
252 |  "nbformat_minor": 5
253 | }
254 | 


--------------------------------------------------------------------------------
/Feature Selection/feature-selection-technique-in-machine-learning.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
  7 |     "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a",
  8 |     "collapsed": true
  9 |    },
 10 |    "source": [
 11 |     "<h2><div style=\"font-family: Trebuchet MS; background-color: red; color: #FFFFFF; padding: 12px; line-height: 1.5;\"> Feature Selection</div></h2> \n",
 12 |     "Feature Selection is one of the most import technique for a great predictive model. It help us to know the most important features of the data set."
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "markdown",
 17 |    "metadata": {
 18 |     "_uuid": "8710c1c227abfd06369844ebc57af6fca32b4632"
 19 |    },
 20 |    "source": [
 21 |     "<h3><div style=\"font-family: Trebuchet MS; background-color:#176BA0;; color: #FFFFFF; padding: 10px; line-height: 1.5;\">1. | I will cover the below points : 🌟 📚</div></h3>\n",
 22 |     "\n",
 23 |     "1. What is Feature Selection?\n",
 24 |     "2. Why it is one the most important techinque to learn for a Data Scientitst?\n",
 25 |     "3. What are the different type of Feature Selection?"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {
 31 |     "_uuid": "c755f87fc7ca150d89268ee9dba94b2720d69657"
 32 |    },
 33 |    "source": [
 34 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:orange; color: #FFFFFF; padding: 1px; line-height: 1.;\">1.1 | 1.Feature Selection: 🌍:</div></h10>\n",
 35 |     "\n",
 36 |     "The process of selecting subset of relevant features for use in model construction which will help to increase the model prediction and decrease the error rate. \n",
 37 |     "In other word you can say its a  process of identifying and removing as much of  irrelevant and redundent information as possible.\n"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {
 43 |     "_uuid": "8779a7d4886a84ff86204ac95a0f6eba11876b58"
 44 |    },
 45 |    "source": [
 46 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:orange; color: #FFFFFF; padding: 1px; line-height: 1.;\">1.1 | 2. Importance of Feature Selection:🌍:</div></h10>\n",
 47 |     "\n",
 48 |     "* Improve the accuracy of model.\n",
 49 |     "* Reduce overfitting.\n",
 50 |     "* Shoter traning time.\n",
 51 |     "* Reduce complexity of model.\n"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {
 57 |     "_uuid": "1411d4d61851c8e1f404cc3275aec9fbc46b3ba8",
 58 |     "collapsed": true
 59 |    },
 60 |    "source": [
 61 |     "\n",
 62 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:orange; color: #FFFFFF; padding: 1px; line-height: 1.;\"> Type of Feature Selection </div></h10>\n",
 63 |     "\n",
 64 |     "*         ***Wrapper Method***\n",
 65 |     "*         ***Filter Method***\n",
 66 |     "*         ***Embedded Method***\n"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {
 72 |     "_uuid": "79ba39eefedbbc4a5ee444d9b88f953d261663fd"
 73 |    },
 74 |    "source": [
 75 |     "\n",
 76 |     "<h3><div style=\"font-family: Trebuchet MS; background-color:#176BA0;; color: #FFFFFF; padding: 10px; line-height: 1.5;\">  Wrapper Method 🌟 📚</div></h3>\n",
 77 |     "\n",
 78 |     "\n",
 79 |     "In this method a subset of features are selected and train a model using them. Based on the inference that we draw from the previous model, we decide to add or remove features from subset.\n",
 80 |     "[For indepth details](https://en.wikipedia.org/wiki/Feature_selection)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {
 86 |     "_uuid": "503925da9749631ebb3a0e07be5587a8583060a1"
 87 |    },
 88 |    "source": [
 89 |     "**Image from wiki**\n",
 90 |     "<p><img src=\"https://upload.wikimedia.org/wikipedia/commons/0/04/Feature_selection_Wrapper_Method.png\" alt=\"Feature selection Wrapper Method.png\" height=\"179\" width=\"640\"><br></p>"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {
 96 |     "_uuid": "e3a24d030c50a3cc50559c03f586bf2288c221f4",
 97 |     "collapsed": true
 98 |    },
 99 |    "source": [
100 |     "\n",
101 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:orange; color: #FFFFFF; padding: 1px; line-height: 1.;\">  Type of Wrapper Method</div></h10>\n",
102 |     "\n",
103 |     "\n",
104 |     "* Forward Selection\n",
105 |     "* Backward Elimination\n",
106 |     "* Exhaustive Feature Selection "
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {
112 |     "_uuid": "2fa60b67f8e4805bcd497bf464d925b64b1900b5"
113 |    },
114 |    "source": [
115 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:orange; color: #FFFFFF; padding: 1px; line-height: 1.;\">  Forward Selection</div></h10>\n",
116 |     "\n",
117 |     "It is a iterative method in which we keep adding feature which  best improves our model till an addition of a new feature does not improve the model performance.<br/><br/>\n",
118 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:orange; color: #FFFFFF; padding: 1px; line-height: 1.;\">  Backward Elimination</div></h10>\n",
119 |     "In this we start with all features and removes the least significant feature at each iteration which improves the model performance. We repeat this until no improvemnt is observed on removal of feature.<br><br>\n",
120 |     "\n",
121 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:orange; color: #FFFFFF; padding: 1px; line-height: 1.;\">  Exhaustive Feature Selection</div></h10>\n",
122 |     "\n",
123 |     "In this the best subset of feature is selected, over all possible feature subsets. For example, if a dataset contains 4 features, the algorithm will evaluate all the feature combinations as follows:\n",
124 |     "* All possible combinations of 1  feature\n",
125 |     "* All possible combinations of 2 features\n",
126 |     "* All possible combinations of 3 features\n",
127 |     "* All possible combinations of 4 features\n",
128 |     "            "
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {
134 |     "_uuid": "eafcc42656451003439a83ee28839b1408d274cb"
135 |    },
136 |    "source": [
137 |     "\n",
138 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:orange; color: #FFFFFF; padding: 1px; line-height: 1.;\">  Pros</div></h10>\n",
139 |     "\n",
140 |     "\n",
141 |     "* Aim to find the best possible feature combintaion.\n",
142 |     "* Better result then filter method.\n",
143 |     "* Can we used for small dataset having less features."
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "metadata": {
149 |     "_uuid": "9fad7d1d69b81bb7e9157860890f028ff8483c82"
150 |    },
151 |    "source": [
152 |     "\n",
153 |     "\n",
154 |     "\n",
155 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:orange; color: #FFFFFF; padding: 1px; line-height: 1.;\">  Cons</div></h10>\n",
156 |     "\n",
157 |     "* Computationally expensive\n",
158 |     "* Often impracticable for large dataset having more features."
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "markdown",
163 |    "metadata": {
164 |     "_uuid": "1c6e339fd12bf741b1d3e00edd2c2ee3c136ea7d"
165 |    },
166 |    "source": [
167 |     "<h3><div style=\"font-family: Trebuchet MS; background-color:#176BA0;; color: #FFFFFF; padding: 10px; line-height: 1.5;\">  Filter Method  📚</div></h3>"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "metadata": {
173 |     "_uuid": "7d104eeabb0bd4ad80a59a9c66d88f5487a88937"
174 |    },
175 |    "source": [
176 |     "Filter methods are generally used as a preprocessing step. The selection of features is independent of any machine learning algorithms. Instead, features are selected on the basis of their scores in various statistical tests for their correlation with the outcome variable."
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {
182 |     "_uuid": "0913e455d8c335346b3fe9099d00105e8a65c1b8",
183 |     "collapsed": true
184 |    },
185 |    "source": [
186 |     "**Image from wiki**\n",
187 |     "<p><img src=\"https://upload.wikimedia.org/wikipedia/commons/2/2c/Filter_Methode.png\" alt=\"Filter Methode.png\" height=\"63\" width=\"640\"></a></p>"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "metadata": {
193 |     "_uuid": "211f9335e79a2ae0ca5a30ec96945f39e7481d07"
194 |    },
195 |    "source": [
196 |     "\n",
197 |     "\n",
198 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:red; color: #FFFFFF; padding: 1px; line-height: 1.;\">  Basic Methods</div></h10>\n",
199 |     "\n",
200 |     "We should consider the below filter methods as a data pre processing steps.\n",
201 |     "* Constant features - Constant features are those that show the same value for all the observations of the dataset. Remove constant features from dataset.\n",
202 |     "* Quasi-constant features  - The column which contain 99% of same data is called Quasi constant column. Remove Quasi constant features from dataset.\n",
203 |     "* Duplicated features - Remove duplicated features from dataset."
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "markdown",
208 |    "metadata": {
209 |     "_uuid": "fce63551ebc4a8d6b671c9a0c4f10ac89d4fdff4"
210 |    },
211 |    "source": [
212 |     "\n",
213 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:red; color: #FFFFFF; padding: 1px; line-height: 1.;\"> Correlation</div></h10>\n",
214 |     "\n",
215 |     "* Correlation is measure of the linear relationship of 2 or more variables.\n",
216 |     "* Through correlation we can predict one variable from other.\n",
217 |     "    * Good variables are highly correlated with the target but uncorrelated among themselves.\n",
218 |     "* If two variables are highly correlated with each other, then we should remove one of them.   \n",
219 |     "  "
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "markdown",
224 |    "metadata": {
225 |     "_uuid": "d007d540b45ed89c084a571db241329c03bd30b3"
226 |    },
227 |    "source": [
228 |     "\n",
229 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:red; color: #FFFFFF; padding: 1px; line-height: 1.;\">  Fisher Score</div></h10>\n",
230 |     "\n",
231 |     "* Measures the dependence of 2 variables\n",
232 |     "* Suited for categorical variables.\n",
233 |     "* Target should be binary.\n",
234 |     "* Variable values should be non negative, typically Boolean or counts.\n"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "markdown",
239 |    "metadata": {
240 |     "_uuid": "59c38c2392896e016e2ab499b2ac829475d6cc19"
241 |    },
242 |    "source": [
243 |     "\n",
244 |     "\n",
245 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:red; color: #FFFFFF; padding: 1px; line-height: 1.;\">  ANOVA (Analysis Of Variance)</div></h10>\n",
246 |     "\n",
247 |     "* Measures the dependency of two variables.\n",
248 |     "* Suited for continuous variables.\n",
249 |     "* Requires a binary target.\n",
250 |     "* Assumes linear relationship between variable and target.\n",
251 |     "* Assumes variables are normally distributed.\n",
252 |     "* Sensitive to sample size\n"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "markdown",
257 |    "metadata": {
258 |     "_uuid": "987b7c270f4b43373b3d70fa216de2f866e64c03"
259 |    },
260 |    "source": [
261 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:red; color: #FFFFFF; padding: 1px; line-height: 1.;\">  ROC-AUC / RMSE</div></h10>\n",
262 |     "\n",
263 |     "* Measures the dependency of two variables.\n",
264 |     "* Suited for all type of variables.\n",
265 |     "* Makes no assumption on the distribution of the variables."
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "markdown",
270 |    "metadata": {
271 |     "_uuid": "d817284e30d6bdb48e9508e3ecdf4fb015d087f2"
272 |    },
273 |    "source": [
274 |     "\n",
275 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:red; color: #FFFFFF; padding: 1px; line-height: 1.;\">  Steps to  select features</div></h10>\n",
276 |     "\n",
277 |     "* Rank features according to a certain criteria (like correlation).\n",
278 |     "    * Each feature is ranked independently of the feature space.\n",
279 |     "* Select highest ranking features.    "
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "markdown",
284 |    "metadata": {
285 |     "_uuid": "e4ddc34ce1df6366a605729d04ca48094ade7009"
286 |    },
287 |    "source": [
288 |     "\n",
289 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:red; color: #FFFFFF; padding: 1px; line-height: 1.;\">  Basic Pros</div></h10>\n",
290 |     "\n",
291 |     "* Fast computation.\n",
292 |     "* Simple yet powerful to quickly remove irrelevant and redundant feature.\n",
293 |     "* Better choice for large dataset over wrapper methods."
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "markdown",
298 |    "metadata": {
299 |     "_uuid": "def3fa408a109f6d51bb1663a3bbcf5e66a231fd"
300 |    },
301 |    "source": [
302 |     "<h10><div style=\"font-family: Trebuchet MS; background-color:red; color: #FFFFFF; padding: 1px; line-height: 1.;\">  Basic Cons</div></h10>\n",
303 |     "\n",
304 |     "* It may select redundant variables because they do not consider the relationships between features.\n",
305 |     "* The prediction accuracy is lesser than wrapper methods."
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "markdown",
310 |    "metadata": {
311 |     "_uuid": "87d76ab632364c86325f8b7749409bb77bf20432"
312 |    },
313 |    "source": [
314 |     "\n",
315 |     "<h3><div style=\"font-family: Trebuchet MS; background-color:#176BA0;; color: #FFFFFF; padding: 10px; line-height: 1.5;\"> Embedded Method  🌟 📚</div></h3>\n",
316 |     "\n",
317 |     "\n",
318 |     "Embedded method combine the features of Filter and Wrapper methods. A learning algorithm takes advantage of its own variable selection process and performs feature selection and classification simultaneously."
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "markdown",
323 |    "metadata": {
324 |     "_uuid": "b61d42f474e71dec29b9ff15752f106a21459c7e"
325 |    },
326 |    "source": [
327 |     "**Image from wiki**\n",
328 |     "<p><img src=\"https://upload.wikimedia.org/wikipedia/commons/b/bf/Feature_selection_Embedded_Method.png\" alt=\"Feature selection Embedded Method.png\" height=\"190\" width=\"640\"></p>"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "markdown",
333 |    "metadata": {
334 |     "_uuid": "17807564445dd5b351e939e8309c60501785b693"
335 |    },
336 |    "source": [
337 |     "<h3><div style=\"font-family: Trebuchet MS; background-color:#176BA0;; color: #FFFFFF; padding: 10px; line-height: 1.5;\"> REGULARISATION 🌟 📚</div></h3>\n",
338 |     "\n",
339 |     "\n",
340 |     "\n",
341 |     "Regularization consists in adding a penalty on the different parameters of the model to reduce the freedom of the model. Hence, the model will be less likely to fit the noise of the training data and will improve the generalization abilities of the model. For linear models there are in general 3 types of regularisation:\n",
342 |     "* The L1 regularization (also called Lasso)\n",
343 |     "* The L2 regularization (also called Ridge)\n",
344 |     "* The L1/L2 regularization (also called Elastic net)"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "markdown",
349 |    "metadata": {
350 |     "_uuid": "10c4a1d2541d1bfcbc3a6c378624be3f4ca9879a"
351 |    },
352 |    "source": [
353 |     "**Image from Scikit learn**\n",
354 |     "<p><img src=\"http://scikit-learn.org/stable/_images/sphx_glr_plot_sgd_penalties_001.png\"></p>"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "markdown",
359 |    "metadata": {
360 |     "_uuid": "9a0bf904c39487ae6b3cc26a9053100526224d72",
361 |     "collapsed": true
362 |    },
363 |    "source": []
364 |   },
365 |   {
366 |    "cell_type": "markdown",
367 |    "metadata": {
368 |     "_uuid": "6675c83145ea22d7780e15a19b074c90884eebd4",
369 |     "collapsed": true
370 |    },
371 |    "source": []
372 |   },
373 |   {
374 |    "cell_type": "code",
375 |    "execution_count": null,
376 |    "metadata": {
377 |     "_uuid": "c1e5227338ce91fb45514340eca51cbd4130c6a4",
378 |     "collapsed": true
379 |    },
380 |    "outputs": [],
381 |    "source": []
382 |   }
383 |  ],
384 |  "metadata": {
385 |   "kernelspec": {
386 |    "display_name": "Python 3 (ipykernel)",
387 |    "language": "python",
388 |    "name": "python3"
389 |   },
390 |   "language_info": {
391 |    "codemirror_mode": {
392 |     "name": "ipython",
393 |     "version": 3
394 |    },
395 |    "file_extension": ".py",
396 |    "mimetype": "text/x-python",
397 |    "name": "python",
398 |    "nbconvert_exporter": "python",
399 |    "pygments_lexer": "ipython3",
400 |    "version": "3.9.7"
401 |   }
402 |  },
403 |  "nbformat": 4,
404 |  "nbformat_minor": 1
405 | }
406 | 


--------------------------------------------------------------------------------
/Spam-Email-detection-system-main/Untitled2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {
  7 |     "executionInfo": {
  8 |      "elapsed": 1401,
  9 |      "status": "ok",
 10 |      "timestamp": 1640778596418,
 11 |      "user": {
 12 |       "displayName": "bibek sah",
 13 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
 14 |       "userId": "08792417367200435838"
 15 |      },
 16 |      "user_tz": -345
 17 |     },
 18 |     "id": "-9boQqt09xM6"
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import numpy as np\n",
 23 |     "import pandas as pd\n",
 24 |     "import matplotlib.pyplot as plt\n",
 25 |     "import seaborn as sns"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 3,
 31 |    "metadata": {
 32 |     "executionInfo": {
 33 |      "elapsed": 28,
 34 |      "status": "ok",
 35 |      "timestamp": 1640778210470,
 36 |      "user": {
 37 |       "displayName": "bibek sah",
 38 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
 39 |       "userId": "08792417367200435838"
 40 |      },
 41 |      "user_tz": -345
 42 |     },
 43 |     "id": "Yzk3k6Y890vh"
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "df=pd.read_csv('spam.csv')"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 4,
 53 |    "metadata": {
 54 |     "colab": {
 55 |      "base_uri": "https://localhost:8080/",
 56 |      "height": 206
 57 |     },
 58 |     "executionInfo": {
 59 |      "elapsed": 27,
 60 |      "status": "ok",
 61 |      "timestamp": 1640778210471,
 62 |      "user": {
 63 |       "displayName": "bibek sah",
 64 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
 65 |       "userId": "08792417367200435838"
 66 |      },
 67 |      "user_tz": -345
 68 |     },
 69 |     "id": "QfNUzF-z-Qzy",
 70 |     "outputId": "51f730e2-032e-49dd-e93d-6902b4ba8ebc"
 71 |    },
 72 |    "outputs": [
 73 |     {
 74 |      "data": {
 75 |       "text/html": [
 76 |        "<div>\n",
 77 |        "<style scoped>\n",
 78 |        "    .dataframe tbody tr th:only-of-type {\n",
 79 |        "        vertical-align: middle;\n",
 80 |        "    }\n",
 81 |        "\n",
 82 |        "    .dataframe tbody tr th {\n",
 83 |        "        vertical-align: top;\n",
 84 |        "    }\n",
 85 |        "\n",
 86 |        "    .dataframe thead th {\n",
 87 |        "        text-align: right;\n",
 88 |        "    }\n",
 89 |        "</style>\n",
 90 |        "<table border=\"1\" class=\"dataframe\">\n",
 91 |        "  <thead>\n",
 92 |        "    <tr style=\"text-align: right;\">\n",
 93 |        "      <th></th>\n",
 94 |        "      <th>Category</th>\n",
 95 |        "      <th>Message</th>\n",
 96 |        "    </tr>\n",
 97 |        "  </thead>\n",
 98 |        "  <tbody>\n",
 99 |        "    <tr>\n",
100 |        "      <th>0</th>\n",
101 |        "      <td>ham</td>\n",
102 |        "      <td>Go until jurong point, crazy.. Available only ...</td>\n",
103 |        "    </tr>\n",
104 |        "    <tr>\n",
105 |        "      <th>1</th>\n",
106 |        "      <td>ham</td>\n",
107 |        "      <td>Ok lar... Joking wif u oni...</td>\n",
108 |        "    </tr>\n",
109 |        "    <tr>\n",
110 |        "      <th>2</th>\n",
111 |        "      <td>spam</td>\n",
112 |        "      <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
113 |        "    </tr>\n",
114 |        "    <tr>\n",
115 |        "      <th>3</th>\n",
116 |        "      <td>ham</td>\n",
117 |        "      <td>U dun say so early hor... U c already then say...</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>4</th>\n",
121 |        "      <td>ham</td>\n",
122 |        "      <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
123 |        "    </tr>\n",
124 |        "  </tbody>\n",
125 |        "</table>\n",
126 |        "</div>"
127 |       ],
128 |       "text/plain": [
129 |        "  Category                                            Message\n",
130 |        "0      ham  Go until jurong point, crazy.. Available only ...\n",
131 |        "1      ham                      Ok lar... Joking wif u oni...\n",
132 |        "2     spam  Free entry in 2 a wkly comp to win FA Cup fina...\n",
133 |        "3      ham  U dun say so early hor... U c already then say...\n",
134 |        "4      ham  Nah I don't think he goes to usf, he lives aro..."
135 |       ]
136 |      },
137 |      "execution_count": 4,
138 |      "metadata": {},
139 |      "output_type": "execute_result"
140 |     }
141 |    ],
142 |    "source": [
143 |     "df.head()"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 5,
149 |    "metadata": {
150 |     "colab": {
151 |      "base_uri": "https://localhost:8080/"
152 |     },
153 |     "executionInfo": {
154 |      "elapsed": 17,
155 |      "status": "ok",
156 |      "timestamp": 1640778210472,
157 |      "user": {
158 |       "displayName": "bibek sah",
159 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
160 |       "userId": "08792417367200435838"
161 |      },
162 |      "user_tz": -345
163 |     },
164 |     "id": "06nXuOFv_cWx",
165 |     "outputId": "019c195a-c7ef-4b65-8c6e-963f4324c3dc"
166 |    },
167 |    "outputs": [
168 |     {
169 |      "data": {
170 |       "text/plain": [
171 |        "array(['ham', 'spam'], dtype=object)"
172 |       ]
173 |      },
174 |      "execution_count": 5,
175 |      "metadata": {},
176 |      "output_type": "execute_result"
177 |     }
178 |    ],
179 |    "source": [
180 |     "df.Category.unique()"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 6,
186 |    "metadata": {
187 |     "executionInfo": {
188 |      "elapsed": 831,
189 |      "status": "ok",
190 |      "timestamp": 1640778295983,
191 |      "user": {
192 |       "displayName": "bibek sah",
193 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
194 |       "userId": "08792417367200435838"
195 |      },
196 |      "user_tz": -345
197 |     },
198 |     "id": "v4a1QjCV_jKC"
199 |    },
200 |    "outputs": [],
201 |    "source": [
202 |     "df['spam']=df['Category'].apply(lambda x: 1 if x=='spam' else 0)"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 7,
208 |    "metadata": {
209 |     "colab": {
210 |      "base_uri": "https://localhost:8080/",
211 |      "height": 206
212 |     },
213 |     "executionInfo": {
214 |      "elapsed": 722,
215 |      "status": "ok",
216 |      "timestamp": 1640778655207,
217 |      "user": {
218 |       "displayName": "bibek sah",
219 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
220 |       "userId": "08792417367200435838"
221 |      },
222 |      "user_tz": -345
223 |     },
224 |     "id": "wFhq-4sBAeTK",
225 |     "outputId": "76771c6a-0589-45fb-8975-32a7a76ea055"
226 |    },
227 |    "outputs": [
228 |     {
229 |      "data": {
230 |       "text/html": [
231 |        "<div>\n",
232 |        "<style scoped>\n",
233 |        "    .dataframe tbody tr th:only-of-type {\n",
234 |        "        vertical-align: middle;\n",
235 |        "    }\n",
236 |        "\n",
237 |        "    .dataframe tbody tr th {\n",
238 |        "        vertical-align: top;\n",
239 |        "    }\n",
240 |        "\n",
241 |        "    .dataframe thead th {\n",
242 |        "        text-align: right;\n",
243 |        "    }\n",
244 |        "</style>\n",
245 |        "<table border=\"1\" class=\"dataframe\">\n",
246 |        "  <thead>\n",
247 |        "    <tr style=\"text-align: right;\">\n",
248 |        "      <th></th>\n",
249 |        "      <th>Category</th>\n",
250 |        "      <th>Message</th>\n",
251 |        "      <th>spam</th>\n",
252 |        "    </tr>\n",
253 |        "  </thead>\n",
254 |        "  <tbody>\n",
255 |        "    <tr>\n",
256 |        "      <th>0</th>\n",
257 |        "      <td>ham</td>\n",
258 |        "      <td>Go until jurong point, crazy.. Available only ...</td>\n",
259 |        "      <td>0</td>\n",
260 |        "    </tr>\n",
261 |        "    <tr>\n",
262 |        "      <th>1</th>\n",
263 |        "      <td>ham</td>\n",
264 |        "      <td>Ok lar... Joking wif u oni...</td>\n",
265 |        "      <td>0</td>\n",
266 |        "    </tr>\n",
267 |        "    <tr>\n",
268 |        "      <th>2</th>\n",
269 |        "      <td>spam</td>\n",
270 |        "      <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
271 |        "      <td>1</td>\n",
272 |        "    </tr>\n",
273 |        "    <tr>\n",
274 |        "      <th>3</th>\n",
275 |        "      <td>ham</td>\n",
276 |        "      <td>U dun say so early hor... U c already then say...</td>\n",
277 |        "      <td>0</td>\n",
278 |        "    </tr>\n",
279 |        "    <tr>\n",
280 |        "      <th>4</th>\n",
281 |        "      <td>ham</td>\n",
282 |        "      <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
283 |        "      <td>0</td>\n",
284 |        "    </tr>\n",
285 |        "  </tbody>\n",
286 |        "</table>\n",
287 |        "</div>"
288 |       ],
289 |       "text/plain": [
290 |        "  Category                                            Message  spam\n",
291 |        "0      ham  Go until jurong point, crazy.. Available only ...     0\n",
292 |        "1      ham                      Ok lar... Joking wif u oni...     0\n",
293 |        "2     spam  Free entry in 2 a wkly comp to win FA Cup fina...     1\n",
294 |        "3      ham  U dun say so early hor... U c already then say...     0\n",
295 |        "4      ham  Nah I don't think he goes to usf, he lives aro...     0"
296 |       ]
297 |      },
298 |      "execution_count": 7,
299 |      "metadata": {},
300 |      "output_type": "execute_result"
301 |     }
302 |    ],
303 |    "source": [
304 |     "df.head(5)"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": 8,
310 |    "metadata": {
311 |     "executionInfo": {
312 |      "elapsed": 679,
313 |      "status": "ok",
314 |      "timestamp": 1640778804504,
315 |      "user": {
316 |       "displayName": "bibek sah",
317 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
318 |       "userId": "08792417367200435838"
319 |      },
320 |      "user_tz": -345
321 |     },
322 |     "id": "K9RIT364B2Bm"
323 |    },
324 |    "outputs": [],
325 |    "source": [
326 |     "from sklearn.model_selection import train_test_split\n",
327 |     "x_train,x_test,y_train,y_test=train_test_split(df.Message,df.spam,test_size=0.2,random_state=42)"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "code",
332 |    "execution_count": 9,
333 |    "metadata": {
334 |     "colab": {
335 |      "base_uri": "https://localhost:8080/"
336 |     },
337 |     "executionInfo": {
338 |      "elapsed": 455,
339 |      "status": "ok",
340 |      "timestamp": 1640778875501,
341 |      "user": {
342 |       "displayName": "bibek sah",
343 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
344 |       "userId": "08792417367200435838"
345 |      },
346 |      "user_tz": -345
347 |     },
348 |     "id": "GhSkE8R8CafK",
349 |     "outputId": "690bd42a-2be3-4a5a-e8c1-0706cb4ce2ef"
350 |    },
351 |    "outputs": [
352 |     {
353 |      "data": {
354 |       "text/plain": [
355 |        "4457"
356 |       ]
357 |      },
358 |      "execution_count": 9,
359 |      "metadata": {},
360 |      "output_type": "execute_result"
361 |     }
362 |    ],
363 |    "source": [
364 |     "len(x_train)"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "code",
369 |    "execution_count": 10,
370 |    "metadata": {
371 |     "colab": {
372 |      "base_uri": "https://localhost:8080/"
373 |     },
374 |     "executionInfo": {
375 |      "elapsed": 693,
376 |      "status": "ok",
377 |      "timestamp": 1640779197055,
378 |      "user": {
379 |       "displayName": "bibek sah",
380 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
381 |       "userId": "08792417367200435838"
382 |      },
383 |      "user_tz": -345
384 |     },
385 |     "id": "d8RKpTA4Cr2d",
386 |     "outputId": "4c0deb38-26c5-409e-fee1-27de285b2b0e"
387 |    },
388 |    "outputs": [
389 |     {
390 |      "data": {
391 |       "text/plain": [
392 |        "1115"
393 |       ]
394 |      },
395 |      "execution_count": 10,
396 |      "metadata": {},
397 |      "output_type": "execute_result"
398 |     }
399 |    ],
400 |    "source": [
401 |     "len(x_test)"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "code",
406 |    "execution_count": 11,
407 |    "metadata": {
408 |     "colab": {
409 |      "base_uri": "https://localhost:8080/"
410 |     },
411 |     "executionInfo": {
412 |      "elapsed": 723,
413 |      "status": "ok",
414 |      "timestamp": 1640779312565,
415 |      "user": {
416 |       "displayName": "bibek sah",
417 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
418 |       "userId": "08792417367200435838"
419 |      },
420 |      "user_tz": -345
421 |     },
422 |     "id": "6Lb2BpL5D6Tw",
423 |     "outputId": "d9c43a76-f4cd-403e-a5b7-d0c99bc48183"
424 |    },
425 |    "outputs": [
426 |     {
427 |      "data": {
428 |       "text/plain": [
429 |        "array([[0, 0, 0, ..., 0, 0, 0],\n",
430 |        "       [0, 0, 0, ..., 0, 0, 0],\n",
431 |        "       [0, 0, 0, ..., 0, 0, 0],\n",
432 |        "       [0, 0, 0, ..., 0, 0, 0],\n",
433 |        "       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)"
434 |       ]
435 |      },
436 |      "execution_count": 11,
437 |      "metadata": {},
438 |      "output_type": "execute_result"
439 |     }
440 |    ],
441 |    "source": [
442 |     "from sklearn.feature_extraction.text import CountVectorizer\n",
443 |     "v=CountVectorizer()\n",
444 |     "cv_messages = v.fit_transform(x_train.values)\n",
445 |     "cv_messages.toarray()[0:5]"
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "code",
450 |    "execution_count": 12,
451 |    "metadata": {
452 |     "executionInfo": {
453 |      "elapsed": 7,
454 |      "status": "ok",
455 |      "timestamp": 1640779609411,
456 |      "user": {
457 |       "displayName": "bibek sah",
458 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
459 |       "userId": "08792417367200435838"
460 |      },
461 |      "user_tz": -345
462 |     },
463 |     "id": "LUFmKWvVEWgO"
464 |    },
465 |    "outputs": [],
466 |    "source": [
467 |     "from sklearn.naive_bayes import MultinomialNB\n",
468 |     "model=MultinomialNB()"
469 |    ]
470 |   },
471 |   {
472 |    "cell_type": "code",
473 |    "execution_count": 13,
474 |    "metadata": {
475 |     "colab": {
476 |      "base_uri": "https://localhost:8080/"
477 |     },
478 |     "executionInfo": {
479 |      "elapsed": 1578,
480 |      "status": "ok",
481 |      "timestamp": 1640779640258,
482 |      "user": {
483 |       "displayName": "bibek sah",
484 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
485 |       "userId": "08792417367200435838"
486 |      },
487 |      "user_tz": -345
488 |     },
489 |     "id": "icy7RxTrFfAm",
490 |     "outputId": "0aee8aff-a9c0-4169-da8a-533f5a49e193"
491 |    },
492 |    "outputs": [
493 |     {
494 |      "data": {
495 |       "text/plain": [
496 |        "MultinomialNB()"
497 |       ]
498 |      },
499 |      "execution_count": 13,
500 |      "metadata": {},
501 |      "output_type": "execute_result"
502 |     }
503 |    ],
504 |    "source": [
505 |     "model.fit(cv_messages,y_train)"
506 |    ]
507 |   },
508 |   {
509 |    "cell_type": "code",
510 |    "execution_count": 14,
511 |    "metadata": {
512 |     "colab": {
513 |      "base_uri": "https://localhost:8080/"
514 |     },
515 |     "executionInfo": {
516 |      "elapsed": 772,
517 |      "status": "ok",
518 |      "timestamp": 1640780294984,
519 |      "user": {
520 |       "displayName": "bibek sah",
521 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
522 |       "userId": "08792417367200435838"
523 |      },
524 |      "user_tz": -345
525 |     },
526 |     "id": "sfdn1y7PFmSX",
527 |     "outputId": "6dc6eb0e-db56-48cd-8109-3072c098f6a6"
528 |    },
529 |    "outputs": [
530 |     {
531 |      "data": {
532 |       "text/plain": [
533 |        "array([1, 0], dtype=int64)"
534 |       ]
535 |      },
536 |      "execution_count": 14,
537 |      "metadata": {},
538 |      "output_type": "execute_result"
539 |     }
540 |    ],
541 |    "source": [
542 |     "email = [\n",
543 |     "         'Upto 30% discount on parking, exclusive offer just for yoy. Dont miss thi reward!',\n",
544 |     "         'Ok lar...joking wif u oni...'\n",
545 |     "]\n",
546 |     "email_count= v.transform(email)\n",
547 |     "model.predict(email_count)"
548 |    ]
549 |   },
550 |   {
551 |    "cell_type": "code",
552 |    "execution_count": 15,
553 |    "metadata": {
554 |     "colab": {
555 |      "base_uri": "https://localhost:8080/"
556 |     },
557 |     "executionInfo": {
558 |      "elapsed": 731,
559 |      "status": "ok",
560 |      "timestamp": 1640780362896,
561 |      "user": {
562 |       "displayName": "bibek sah",
563 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
564 |       "userId": "08792417367200435838"
565 |      },
566 |      "user_tz": -345
567 |     },
568 |     "id": "I0i5fFZ8IGVJ",
569 |     "outputId": "d4c46fa1-af4c-42c7-93d2-11a425b14a48"
570 |    },
571 |    "outputs": [
572 |     {
573 |      "data": {
574 |       "text/plain": [
575 |        "0.9919282511210762"
576 |       ]
577 |      },
578 |      "execution_count": 15,
579 |      "metadata": {},
580 |      "output_type": "execute_result"
581 |     }
582 |    ],
583 |    "source": [
584 |     "x_test_count=v.transform(x_test)\n",
585 |     "model.score(x_test_count,y_test)\n"
586 |    ]
587 |   },
588 |   {
589 |    "cell_type": "code",
590 |    "execution_count": 16,
591 |    "metadata": {
592 |     "executionInfo": {
593 |      "elapsed": 15,
594 |      "status": "ok",
595 |      "timestamp": 1640780413260,
596 |      "user": {
597 |       "displayName": "bibek sah",
598 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
599 |       "userId": "08792417367200435838"
600 |      },
601 |      "user_tz": -345
602 |     },
603 |     "id": "v-ArF0cZIW7x"
604 |    },
605 |    "outputs": [],
606 |    "source": [
607 |     "# sklearn pipeline"
608 |    ]
609 |   },
610 |   {
611 |    "cell_type": "code",
612 |    "execution_count": 17,
613 |    "metadata": {
614 |     "colab": {
615 |      "base_uri": "https://localhost:8080/"
616 |     },
617 |     "executionInfo": {
618 |      "elapsed": 494,
619 |      "status": "ok",
620 |      "timestamp": 1640780722337,
621 |      "user": {
622 |       "displayName": "bibek sah",
623 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
624 |       "userId": "08792417367200435838"
625 |      },
626 |      "user_tz": -345
627 |     },
628 |     "id": "Sj-eM9hgIjOB",
629 |     "outputId": "160dfbdd-303b-4ecc-c2b9-a4fd8dac9510"
630 |    },
631 |    "outputs": [
632 |     {
633 |      "data": {
634 |       "text/plain": [
635 |        "Pipeline(steps=[('vectorizer', CountVectorizer()), ('nb', MultinomialNB())])"
636 |       ]
637 |      },
638 |      "execution_count": 17,
639 |      "metadata": {},
640 |      "output_type": "execute_result"
641 |     }
642 |    ],
643 |    "source": [
644 |     "from sklearn.pipeline import Pipeline\n",
645 |     "clf = Pipeline([\n",
646 |     "      ('vectorizer', CountVectorizer()),\n",
647 |     "      ('nb', MultinomialNB())          \n",
648 |     "]\n",
649 |     ")\n",
650 |     "clf.fit(x_train,y_train)"
651 |    ]
652 |   },
653 |   {
654 |    "cell_type": "code",
655 |    "execution_count": 18,
656 |    "metadata": {
657 |     "colab": {
658 |      "base_uri": "https://localhost:8080/"
659 |     },
660 |     "executionInfo": {
661 |      "elapsed": 697,
662 |      "status": "ok",
663 |      "timestamp": 1640780793192,
664 |      "user": {
665 |       "displayName": "bibek sah",
666 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
667 |       "userId": "08792417367200435838"
668 |      },
669 |      "user_tz": -345
670 |     },
671 |     "id": "c4oNWSVmJuzd",
672 |     "outputId": "4799d618-2345-4c43-f284-fbaca872b976"
673 |    },
674 |    "outputs": [
675 |     {
676 |      "data": {
677 |       "text/plain": [
678 |        "array([1, 0], dtype=int64)"
679 |       ]
680 |      },
681 |      "execution_count": 18,
682 |      "metadata": {},
683 |      "output_type": "execute_result"
684 |     }
685 |    ],
686 |    "source": [
687 |     "email = [\n",
688 |     "        'Upto 30% discount on parking, exclusive offer just for yoy. Dont miss thi reward!',\n",
689 |     "         'Ok lar...joking wif u oni...'  \n",
690 |     "]\n",
691 |     "clf.predict(email)"
692 |    ]
693 |   },
694 |   {
695 |    "cell_type": "code",
696 |    "execution_count": 19,
697 |    "metadata": {
698 |     "colab": {
699 |      "base_uri": "https://localhost:8080/"
700 |     },
701 |     "executionInfo": {
702 |      "elapsed": 1829,
703 |      "status": "ok",
704 |      "timestamp": 1640780963050,
705 |      "user": {
706 |       "displayName": "bibek sah",
707 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
708 |       "userId": "08792417367200435838"
709 |      },
710 |      "user_tz": -345
711 |     },
712 |     "id": "o752BL8PJ_-5",
713 |     "outputId": "2361abbd-386a-4078-b145-1e0aab7c3254"
714 |    },
715 |    "outputs": [
716 |     {
717 |      "data": {
718 |       "text/plain": [
719 |        "0.9919282511210762"
720 |       ]
721 |      },
722 |      "execution_count": 19,
723 |      "metadata": {},
724 |      "output_type": "execute_result"
725 |     }
726 |    ],
727 |    "source": [
728 |     "clf.score(x_test,y_test)"
729 |    ]
730 |   },
731 |   {
732 |    "cell_type": "code",
733 |    "execution_count": 20,
734 |    "metadata": {
735 |     "colab": {
736 |      "base_uri": "https://localhost:8080/"
737 |     },
738 |     "executionInfo": {
739 |      "elapsed": 692,
740 |      "status": "ok",
741 |      "timestamp": 1640781005830,
742 |      "user": {
743 |       "displayName": "bibek sah",
744 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
745 |       "userId": "08792417367200435838"
746 |      },
747 |      "user_tz": -345
748 |     },
749 |     "id": "Wb5rbwIVKo1S",
750 |     "outputId": "4cb35f90-0e81-49c9-8379-0b477420d462"
751 |    },
752 |    "outputs": [
753 |     {
754 |      "data": {
755 |       "text/plain": [
756 |        "['spam_model.pkl']"
757 |       ]
758 |      },
759 |      "execution_count": 20,
760 |      "metadata": {},
761 |      "output_type": "execute_result"
762 |     }
763 |    ],
764 |    "source": [
765 |     "import joblib\n",
766 |     "joblib.dump(clf,'spam_model.pkl')"
767 |    ]
768 |   },
769 |   {
770 |    "cell_type": "code",
771 |    "execution_count": 21,
772 |    "metadata": {
773 |     "executionInfo": {
774 |      "elapsed": 11,
775 |      "status": "ok",
776 |      "timestamp": 1640781042860,
777 |      "user": {
778 |       "displayName": "bibek sah",
779 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjtY0YqqHlXep2Tt94P9dLVJx_J-lV4Nk1cBRPh7A=s64",
780 |       "userId": "08792417367200435838"
781 |      },
782 |      "user_tz": -345
783 |     },
784 |     "id": "pP9Ji7YHKz4h"
785 |    },
786 |    "outputs": [],
787 |    "source": [
788 |     "# model is completed"
789 |    ]
790 |   },
791 |   {
792 |    "cell_type": "code",
793 |    "execution_count": null,
794 |    "metadata": {
795 |     "id": "DegaoHEFK87R"
796 |    },
797 |    "outputs": [],
798 |    "source": []
799 |   }
800 |  ],
801 |  "metadata": {
802 |   "colab": {
803 |    "authorship_tag": "ABX9TyOu3xR/1JAiPy608KPO62Wq",
804 |    "collapsed_sections": [],
805 |    "mount_file_id": "1XRYrPikxSuVab8l-DhT_L5o44vnz3fDE",
806 |    "name": "Untitled2.ipynb",
807 |    "provenance": []
808 |   },
809 |   "kernelspec": {
810 |    "display_name": "Python 3 (ipykernel)",
811 |    "language": "python",
812 |    "name": "python3"
813 |   },
814 |   "language_info": {
815 |    "codemirror_mode": {
816 |     "name": "ipython",
817 |     "version": 3
818 |    },
819 |    "file_extension": ".py",
820 |    "mimetype": "text/x-python",
821 |    "name": "python",
822 |    "nbconvert_exporter": "python",
823 |    "pygments_lexer": "ipython3",
824 |    "version": "3.9.7"
825 |   }
826 |  },
827 |  "nbformat": 4,
828 |  "nbformat_minor": 1
829 | }
830 | 


--------------------------------------------------------------------------------
/regularization-in-machine-learning/regularization-in-machine-learning.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "id": "85a85782",
   6 |    "metadata": {
   7 |     "papermill": {
   8 |      "duration": 0.042361,
   9 |      "end_time": "2021-12-22T19:07:37.877879",
  10 |      "exception": false,
  11 |      "start_time": "2021-12-22T19:07:37.835518",
  12 |      "status": "completed"
  13 |     },
  14 |     "tags": []
  15 |    },
  16 |    "source": [
  17 |     "### Regularization in Machine Learning"
  18 |    ]
  19 |   },
  20 |   {
  21 |    "cell_type": "markdown",
  22 |    "id": "11910490",
  23 |    "metadata": {
  24 |     "papermill": {
  25 |      "duration": 0.035764,
  26 |      "end_time": "2021-12-22T19:07:37.950117",
  27 |      "exception": false,
  28 |      "start_time": "2021-12-22T19:07:37.914353",
  29 |      "status": "completed"
  30 |     },
  31 |     "tags": []
  32 |    },
  33 |    "source": [
  34 |     "# what is regularization in ML\n",
  35 |     "\n",
  36 |     "- a technique to prevent the model from overfitting by adding extra information to it.\n",
  37 |     "- it maintain all variables or features in the model by reducing the magnitude of the variables. \n",
  38 |     "- Hence, it maintains accuracy as well as a generalization of the model.\n",
  39 |     "- In simple words, \"In regularization technique, we reduce the magnitude of the features by keeping the same number of features.\"\n",
  40 |     "- mainly regularizes or reduces the coefficient of features toward zero"
  41 |    ]
  42 |   },
  43 |   {
  44 |    "cell_type": "code",
  45 |    "execution_count": 1,
  46 |    "id": "646c8325",
  47 |    "metadata": {
  48 |     "execution": {
  49 |      "iopub.execute_input": "2021-12-22T19:07:38.029150Z",
  50 |      "iopub.status.busy": "2021-12-22T19:07:38.027353Z",
  51 |      "iopub.status.idle": "2021-12-22T19:07:38.032283Z",
  52 |      "shell.execute_reply": "2021-12-22T19:07:38.032799Z"
  53 |     },
  54 |     "papermill": {
  55 |      "duration": 0.046738,
  56 |      "end_time": "2021-12-22T19:07:38.033119",
  57 |      "exception": false,
  58 |      "start_time": "2021-12-22T19:07:37.986381",
  59 |      "status": "completed"
  60 |     },
  61 |     "tags": []
  62 |    },
  63 |    "outputs": [],
  64 |    "source": [
  65 |     "# Basics of regularization"
  66 |    ]
  67 |   },
  68 |   {
  69 |    "cell_type": "markdown",
  70 |    "id": "c3676748",
  71 |    "metadata": {
  72 |     "papermill": {
  73 |      "duration": 0.036146,
  74 |      "end_time": "2021-12-22T19:07:38.108836",
  75 |      "exception": false,
  76 |      "start_time": "2021-12-22T19:07:38.072690",
  77 |      "status": "completed"
  78 |     },
  79 |     "tags": []
  80 |    },
  81 |    "source": [
  82 |     "- a technique to prevent the model from overfitting by adding extra information to it.\n",
  83 |     "-  maintains accuracy as well as a generalization of the mode\n",
  84 |     "-  reduces the magnitude of the variables, hence maintain all variables or features\n",
  85 |     "-  In simple words, \"In regularization technique, we reduce the magnitude of the features by keeping the same number of features\"\n",
  86 |     "- by adding a penalty or complexity term to the complex model"
  87 |    ]
  88 |   },
  89 |   {
  90 |    "cell_type": "code",
  91 |    "execution_count": 2,
  92 |    "id": "275c826f",
  93 |    "metadata": {
  94 |     "execution": {
  95 |      "iopub.execute_input": "2021-12-22T19:07:38.185704Z",
  96 |      "iopub.status.busy": "2021-12-22T19:07:38.184715Z",
  97 |      "iopub.status.idle": "2021-12-22T19:07:38.188117Z",
  98 |      "shell.execute_reply": "2021-12-22T19:07:38.188755Z"
  99 |     },
 100 |     "papermill": {
 101 |      "duration": 0.043564,
 102 |      "end_time": "2021-12-22T19:07:38.188919",
 103 |      "exception": false,
 104 |      "start_time": "2021-12-22T19:07:38.145355",
 105 |      "status": "completed"
 106 |     },
 107 |     "tags": []
 108 |    },
 109 |    "outputs": [],
 110 |    "source": [
 111 |     "# How does Regularization Work?"
 112 |    ]
 113 |   },
 114 |   {
 115 |    "cell_type": "markdown",
 116 |    "id": "b8aea070",
 117 |    "metadata": {
 118 |     "papermill": {
 119 |      "duration": 0.036276,
 120 |      "end_time": "2021-12-22T19:07:38.261804",
 121 |      "exception": false,
 122 |      "start_time": "2021-12-22T19:07:38.225528",
 123 |      "status": "completed"
 124 |     },
 125 |     "tags": []
 126 |    },
 127 |    "source": [
 128 |     "Let's consider the simple linear regression equation:\n",
 129 |     "y= β0+β1x1+β2x2+β3x3+⋯+βnxn +b\n",
 130 |     "\n",
 131 |     "Y represents the value to be predicted\n",
 132 |     "X1, X2, …Xn are the features for Y.\n",
 133 |     "\n",
 134 |     "β0,β1,…..βn are the weights or magnitude\n",
 135 |     "b represents the intercept.\n",
 136 |     "\n",
 137 |     "The loss function for the linear regression is called as RSS or Residual sum of squares.\n",
 138 |     "\n",
 139 |     "Techniques of Regularization:\n",
 140 |     "• Ridge Regression\n",
 141 |     "• Lasso Regression"
 142 |    ]
 143 |   },
 144 |   {
 145 |    "cell_type": "code",
 146 |    "execution_count": 3,
 147 |    "id": "dfac9cb6",
 148 |    "metadata": {
 149 |     "execution": {
 150 |      "iopub.execute_input": "2021-12-22T19:07:38.338525Z",
 151 |      "iopub.status.busy": "2021-12-22T19:07:38.337603Z",
 152 |      "iopub.status.idle": "2021-12-22T19:07:38.342423Z",
 153 |      "shell.execute_reply": "2021-12-22T19:07:38.343040Z"
 154 |     },
 155 |     "papermill": {
 156 |      "duration": 0.04495,
 157 |      "end_time": "2021-12-22T19:07:38.343227",
 158 |      "exception": false,
 159 |      "start_time": "2021-12-22T19:07:38.298277",
 160 |      "status": "completed"
 161 |     },
 162 |     "tags": []
 163 |    },
 164 |    "outputs": [],
 165 |    "source": [
 166 |     "# Ridge regression:"
 167 |    ]
 168 |   },
 169 |   {
 170 |    "cell_type": "markdown",
 171 |    "id": "8246b5e3",
 172 |    "metadata": {
 173 |     "papermill": {
 174 |      "duration": 0.036138,
 175 |      "end_time": "2021-12-22T19:07:38.416009",
 176 |      "exception": false,
 177 |      "start_time": "2021-12-22T19:07:38.379871",
 178 |      "status": "completed"
 179 |     },
 180 |     "tags": []
 181 |    },
 182 |    "source": [
 183 |     "- a small amount of bias is added\n",
 184 |     "- reduces the complexity of the model, \n",
 185 |     "- also called L2 regularization\n",
 186 |     "- cost function is altered by adding the penalty term to it\n",
 187 |     "- amount of bias added to the model is called Ridge Regression penalty.."
 188 |    ]
 189 |   },
 190 |   {
 191 |    "cell_type": "markdown",
 192 |    "id": "c40b82bb",
 193 |    "metadata": {
 194 |     "papermill": {
 195 |      "duration": 0.036398,
 196 |      "end_time": "2021-12-22T19:07:38.488992",
 197 |      "exception": false,
 198 |      "start_time": "2021-12-22T19:07:38.452594",
 199 |      "status": "completed"
 200 |     },
 201 |     "tags": []
 202 |    },
 203 |    "source": [
 204 |     "From the cost function of Ridge Regression we can see that if the values of λ tends to zero, the equation becomes the cost function of the linear regression model..\n",
 205 |     "\n",
 206 |     "A general linear or polynomial regression will fail if there is high collinearity between the independent variables, so to solve such problems, Ridge regression can be used."
 207 |    ]
 208 |   },
 209 |   {
 210 |    "cell_type": "code",
 211 |    "execution_count": 4,
 212 |    "id": "91722b0d",
 213 |    "metadata": {
 214 |     "execution": {
 215 |      "iopub.execute_input": "2021-12-22T19:07:38.566612Z",
 216 |      "iopub.status.busy": "2021-12-22T19:07:38.565649Z",
 217 |      "iopub.status.idle": "2021-12-22T19:07:38.568855Z",
 218 |      "shell.execute_reply": "2021-12-22T19:07:38.569391Z"
 219 |     },
 220 |     "papermill": {
 221 |      "duration": 0.043271,
 222 |      "end_time": "2021-12-22T19:07:38.569551",
 223 |      "exception": false,
 224 |      "start_time": "2021-12-22T19:07:38.526280",
 225 |      "status": "completed"
 226 |     },
 227 |     "tags": []
 228 |    },
 229 |    "outputs": [],
 230 |    "source": [
 231 |     "# Lasso regression"
 232 |    ]
 233 |   },
 234 |   {
 235 |    "cell_type": "markdown",
 236 |    "id": "e18ae18a",
 237 |    "metadata": {
 238 |     "papermill": {
 239 |      "duration": 0.036278,
 240 |      "end_time": "2021-12-22T19:07:38.642318",
 241 |      "exception": false,
 242 |      "start_time": "2021-12-22T19:07:38.606040",
 243 |      "status": "completed"
 244 |     },
 245 |     "tags": []
 246 |    },
 247 |    "source": [
 248 |     "Lasso Regression:\n",
 249 |     "- stands for Least Absolute Shrinkage and Selection Operator\n",
 250 |     "- also called L1 regularization\n",
 251 |     "- reduces the complexity of the model\n",
 252 |     "- similar to the Ridge Regression except that the penalty term contains only the absolute weights instead of a square of weights\n",
 253 |     "- Since it takes absolute values, hence, it can shrink the slope to zero\n",
 254 |     "- whereas Ridge Regression can only shrink it near to 0.\n",
 255 |     "- Some of the features are completely neglected for model evaluation\n",
 256 |     "- hence Lasso helps in reducing overfitting and also feature selection"
 257 |    ]
 258 |   },
 259 |   {
 260 |    "cell_type": "markdown",
 261 |    "id": "1e22ea3d",
 262 |    "metadata": {
 263 |     "papermill": {
 264 |      "duration": 0.035978,
 265 |      "end_time": "2021-12-22T19:07:38.714585",
 266 |      "exception": false,
 267 |      "start_time": "2021-12-22T19:07:38.678607",
 268 |      "status": "completed"
 269 |     },
 270 |     "tags": []
 271 |    },
 272 |    "source": [
 273 |     "Lasso Regression adds “absolute value of magnitude” of coefficient as penalty term to the loss function(L). \n",
 274 |     "Ridge regression adds “squared magnitude” of coefficient as penalty term to the loss function(L)."
 275 |    ]
 276 |   },
 277 |   {
 278 |    "cell_type": "code",
 279 |    "execution_count": 5,
 280 |    "id": "5dfc333a",
 281 |    "metadata": {
 282 |     "execution": {
 283 |      "iopub.execute_input": "2021-12-22T19:07:38.791141Z",
 284 |      "iopub.status.busy": "2021-12-22T19:07:38.790203Z",
 285 |      "iopub.status.idle": "2021-12-22T19:07:38.793467Z",
 286 |      "shell.execute_reply": "2021-12-22T19:07:38.794295Z"
 287 |     },
 288 |     "papermill": {
 289 |      "duration": 0.043469,
 290 |      "end_time": "2021-12-22T19:07:38.794464",
 291 |      "exception": false,
 292 |      "start_time": "2021-12-22T19:07:38.750995",
 293 |      "status": "completed"
 294 |     },
 295 |     "tags": []
 296 |    },
 297 |    "outputs": [],
 298 |    "source": [
 299 |     "### Implementation of Lasso Regression"
 300 |    ]
 301 |   },
 302 |   {
 303 |    "cell_type": "code",
 304 |    "execution_count": 6,
 305 |    "id": "f1be8d74",
 306 |    "metadata": {
 307 |     "execution": {
 308 |      "iopub.execute_input": "2021-12-22T19:07:38.871418Z",
 309 |      "iopub.status.busy": "2021-12-22T19:07:38.870473Z",
 310 |      "iopub.status.idle": "2021-12-22T19:07:38.879330Z",
 311 |      "shell.execute_reply": "2021-12-22T19:07:38.879908Z"
 312 |     },
 313 |     "papermill": {
 314 |      "duration": 0.048782,
 315 |      "end_time": "2021-12-22T19:07:38.880083",
 316 |      "exception": false,
 317 |      "start_time": "2021-12-22T19:07:38.831301",
 318 |      "status": "completed"
 319 |     },
 320 |     "tags": []
 321 |    },
 322 |    "outputs": [],
 323 |    "source": [
 324 |     "import pandas as pd\n",
 325 |     "import numpy as np\n",
 326 |     "import matplotlib.pyplot as plt"
 327 |    ]
 328 |   },
 329 |   {
 330 |    "cell_type": "markdown",
 331 |    "id": "ab060d3e",
 332 |    "metadata": {
 333 |     "papermill": {
 334 |      "duration": 0.036029,
 335 |      "end_time": "2021-12-22T19:07:38.952464",
 336 |      "exception": false,
 337 |      "start_time": "2021-12-22T19:07:38.916435",
 338 |      "status": "completed"
 339 |     },
 340 |     "tags": []
 341 |    },
 342 |    "source": [
 343 |     "We are going to use the Boston house prediction dataset, that is an inbuilt dataset in sklearn"
 344 |    ]
 345 |   },
 346 |   {
 347 |    "cell_type": "code",
 348 |    "execution_count": 7,
 349 |    "id": "e06717e2",
 350 |    "metadata": {
 351 |     "execution": {
 352 |      "iopub.execute_input": "2021-12-22T19:07:39.030301Z",
 353 |      "iopub.status.busy": "2021-12-22T19:07:39.029298Z",
 354 |      "iopub.status.idle": "2021-12-22T19:07:40.031602Z",
 355 |      "shell.execute_reply": "2021-12-22T19:07:40.032128Z"
 356 |     },
 357 |     "papermill": {
 358 |      "duration": 1.043508,
 359 |      "end_time": "2021-12-22T19:07:40.032329",
 360 |      "exception": false,
 361 |      "start_time": "2021-12-22T19:07:38.988821",
 362 |      "status": "completed"
 363 |     },
 364 |     "tags": []
 365 |    },
 366 |    "outputs": [],
 367 |    "source": [
 368 |     "from sklearn.datasets import load_boston\n",
 369 |     "boston=load_boston()"
 370 |    ]
 371 |   },
 372 |   {
 373 |    "cell_type": "code",
 374 |    "execution_count": 8,
 375 |    "id": "9e81cdf1",
 376 |    "metadata": {
 377 |     "execution": {
 378 |      "iopub.execute_input": "2021-12-22T19:07:40.116864Z",
 379 |      "iopub.status.busy": "2021-12-22T19:07:40.116015Z",
 380 |      "iopub.status.idle": "2021-12-22T19:07:40.119134Z",
 381 |      "shell.execute_reply": "2021-12-22T19:07:40.119605Z"
 382 |     },
 383 |     "papermill": {
 384 |      "duration": 0.048494,
 385 |      "end_time": "2021-12-22T19:07:40.119782",
 386 |      "exception": false,
 387 |      "start_time": "2021-12-22T19:07:40.071288",
 388 |      "status": "completed"
 389 |     },
 390 |     "tags": []
 391 |    },
 392 |    "outputs": [
 393 |     {
 394 |      "data": {
 395 |       "text/plain": [
 396 |        "['DESCR', 'data', 'feature_names', 'filename', 'target']"
 397 |       ]
 398 |      },
 399 |      "execution_count": 8,
 400 |      "metadata": {},
 401 |      "output_type": "execute_result"
 402 |     }
 403 |    ],
 404 |    "source": [
 405 |     "# Getting attributes of boston\n",
 406 |     "dir(boston)"
 407 |    ]
 408 |   },
 409 |   {
 410 |    "cell_type": "code",
 411 |    "execution_count": 9,
 412 |    "id": "25ce3d97",
 413 |    "metadata": {
 414 |     "execution": {
 415 |      "iopub.execute_input": "2021-12-22T19:07:40.198755Z",
 416 |      "iopub.status.busy": "2021-12-22T19:07:40.198029Z",
 417 |      "iopub.status.idle": "2021-12-22T19:07:40.202579Z",
 418 |      "shell.execute_reply": "2021-12-22T19:07:40.203158Z"
 419 |     },
 420 |     "papermill": {
 421 |      "duration": 0.045441,
 422 |      "end_time": "2021-12-22T19:07:40.203322",
 423 |      "exception": false,
 424 |      "start_time": "2021-12-22T19:07:40.157881",
 425 |      "status": "completed"
 426 |     },
 427 |     "tags": []
 428 |    },
 429 |    "outputs": [
 430 |     {
 431 |      "data": {
 432 |       "text/plain": [
 433 |        "\".. _boston_dataset:\\n\\nBoston house prices dataset\\n---------------------------\\n\\n**Data Set Characteristics:**  \\n\\n    :Number of Instances: 506 \\n\\n    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.\\n\\n    :Attribute Information (in order):\\n        - CRIM     per capita crime rate by town\\n        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.\\n        - INDUS    proportion of non-retail business acres per town\\n        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)\\n        - NOX      nitric oxides concentration (parts per 10 million)\\n        - RM       average number of rooms per dwelling\\n        - AGE      proportion of owner-occupied units built prior to 1940\\n        - DIS      weighted distances to five Boston employment centres\\n        - RAD      index of accessibility to radial highways\\n        - TAX      full-value property-tax rate per $10,000\\n        - PTRATIO  pupil-teacher ratio by town\\n        - B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town\\n        - LSTAT    % lower status of the population\\n        - MEDV     Median value of owner-occupied homes in $1000's\\n\\n    :Missing Attribute Values: None\\n\\n    :Creator: Harrison, D. and Rubinfeld, D.L.\\n\\nThis is a copy of UCI ML housing dataset.\\nhttps://archive.ics.uci.edu/ml/machine-learning-databases/housing/\\n\\n\\nThis dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.\\n\\nThe Boston house-price data of Harrison, D. and Rubinfeld, D.L. 'Hedonic\\nprices and the demand for clean air', J. Environ. Economics & Management,\\nvol.5, 81-102, 1978.   Used in Belsley, Kuh & Welsch, 'Regression diagnostics\\n...', Wiley, 1980.   N.B. Various transformations are used in the table on\\npages 244-261 of the latter.\\n\\nThe Boston house-price data has been used in many machine learning papers that address regression\\nproblems.   \\n     \\n.. topic:: References\\n\\n   - Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.\\n   - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.\\n\""
 434 |       ]
 435 |      },
 436 |      "execution_count": 9,
 437 |      "metadata": {},
 438 |      "output_type": "execute_result"
 439 |     }
 440 |    ],
 441 |    "source": [
 442 |     "# printing description\n",
 443 |     "boston.DESCR"
 444 |    ]
 445 |   },
 446 |   {
 447 |    "cell_type": "code",
 448 |    "execution_count": 10,
 449 |    "id": "8ebb5684",
 450 |    "metadata": {
 451 |     "execution": {
 452 |      "iopub.execute_input": "2021-12-22T19:07:40.288364Z",
 453 |      "iopub.status.busy": "2021-12-22T19:07:40.287693Z",
 454 |      "iopub.status.idle": "2021-12-22T19:07:40.290236Z",
 455 |      "shell.execute_reply": "2021-12-22T19:07:40.290736Z"
 456 |     },
 457 |     "papermill": {
 458 |      "duration": 0.048769,
 459 |      "end_time": "2021-12-22T19:07:40.290894",
 460 |      "exception": false,
 461 |      "start_time": "2021-12-22T19:07:40.242125",
 462 |      "status": "completed"
 463 |     },
 464 |     "tags": []
 465 |    },
 466 |    "outputs": [
 467 |     {
 468 |      "data": {
 469 |       "text/plain": [
 470 |        "array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,\n",
 471 |        "        4.9800e+00],\n",
 472 |        "       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,\n",
 473 |        "        9.1400e+00],\n",
 474 |        "       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,\n",
 475 |        "        4.0300e+00],\n",
 476 |        "       ...,\n",
 477 |        "       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,\n",
 478 |        "        5.6400e+00],\n",
 479 |        "       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,\n",
 480 |        "        6.4800e+00],\n",
 481 |        "       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,\n",
 482 |        "        7.8800e+00]])"
 483 |       ]
 484 |      },
 485 |      "execution_count": 10,
 486 |      "metadata": {},
 487 |      "output_type": "execute_result"
 488 |     }
 489 |    ],
 490 |    "source": [
 491 |     "# Printing \"data\" attributes of the dataset, its our input \n",
 492 |     "boston.data"
 493 |    ]
 494 |   },
 495 |   {
 496 |    "cell_type": "code",
 497 |    "execution_count": 11,
 498 |    "id": "2931de15",
 499 |    "metadata": {
 500 |     "execution": {
 501 |      "iopub.execute_input": "2021-12-22T19:07:40.371336Z",
 502 |      "iopub.status.busy": "2021-12-22T19:07:40.370379Z",
 503 |      "iopub.status.idle": "2021-12-22T19:07:40.376451Z",
 504 |      "shell.execute_reply": "2021-12-22T19:07:40.375892Z"
 505 |     },
 506 |     "papermill": {
 507 |      "duration": 0.047497,
 508 |      "end_time": "2021-12-22T19:07:40.376583",
 509 |      "exception": false,
 510 |      "start_time": "2021-12-22T19:07:40.329086",
 511 |      "status": "completed"
 512 |     },
 513 |     "tags": []
 514 |    },
 515 |    "outputs": [
 516 |     {
 517 |      "data": {
 518 |       "text/plain": [
 519 |        "array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',\n",
 520 |        "       'TAX', 'PTRATIO', 'B', 'LSTAT'], dtype='<U7')"
 521 |       ]
 522 |      },
 523 |      "execution_count": 11,
 524 |      "metadata": {},
 525 |      "output_type": "execute_result"
 526 |     }
 527 |    ],
 528 |    "source": [
 529 |     "# Getting features names of the dataset\n",
 530 |     "boston.feature_names"
 531 |    ]
 532 |   },
 533 |   {
 534 |    "cell_type": "code",
 535 |    "execution_count": 12,
 536 |    "id": "83bce137",
 537 |    "metadata": {
 538 |     "execution": {
 539 |      "iopub.execute_input": "2021-12-22T19:07:40.460329Z",
 540 |      "iopub.status.busy": "2021-12-22T19:07:40.459487Z",
 541 |      "iopub.status.idle": "2021-12-22T19:07:40.462647Z",
 542 |      "shell.execute_reply": "2021-12-22T19:07:40.463197Z"
 543 |     },
 544 |     "papermill": {
 545 |      "duration": 0.048007,
 546 |      "end_time": "2021-12-22T19:07:40.463364",
 547 |      "exception": false,
 548 |      "start_time": "2021-12-22T19:07:40.415357",
 549 |      "status": "completed"
 550 |     },
 551 |     "tags": []
 552 |    },
 553 |    "outputs": [
 554 |     {
 555 |      "data": {
 556 |       "text/plain": [
 557 |        "array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9])"
 558 |       ]
 559 |      },
 560 |      "execution_count": 12,
 561 |      "metadata": {},
 562 |      "output_type": "execute_result"
 563 |     }
 564 |    ],
 565 |    "source": [
 566 |     "# Printing first 10 values of target \n",
 567 |     "boston.target[0:10]"
 568 |    ]
 569 |   },
 570 |   {
 571 |    "cell_type": "code",
 572 |    "execution_count": 13,
 573 |    "id": "301b1ea1",
 574 |    "metadata": {
 575 |     "execution": {
 576 |      "iopub.execute_input": "2021-12-22T19:07:40.545474Z",
 577 |      "iopub.status.busy": "2021-12-22T19:07:40.544525Z",
 578 |      "iopub.status.idle": "2021-12-22T19:07:40.550270Z",
 579 |      "shell.execute_reply": "2021-12-22T19:07:40.550774Z"
 580 |     },
 581 |     "papermill": {
 582 |      "duration": 0.048484,
 583 |      "end_time": "2021-12-22T19:07:40.550956",
 584 |      "exception": false,
 585 |      "start_time": "2021-12-22T19:07:40.502472",
 586 |      "status": "completed"
 587 |     },
 588 |     "tags": []
 589 |    },
 590 |    "outputs": [],
 591 |    "source": [
 592 |     "# Describing dataframe from the data\n",
 593 |     "df=pd.DataFrame(boston.data,columns=boston.feature_names)"
 594 |    ]
 595 |   },
 596 |   {
 597 |    "cell_type": "code",
 598 |    "execution_count": 14,
 599 |    "id": "e4137336",
 600 |    "metadata": {
 601 |     "execution": {
 602 |      "iopub.execute_input": "2021-12-22T19:07:40.633471Z",
 603 |      "iopub.status.busy": "2021-12-22T19:07:40.632500Z",
 604 |      "iopub.status.idle": "2021-12-22T19:07:40.657106Z",
 605 |      "shell.execute_reply": "2021-12-22T19:07:40.657590Z"
 606 |     },
 607 |     "papermill": {
 608 |      "duration": 0.067285,
 609 |      "end_time": "2021-12-22T19:07:40.657773",
 610 |      "exception": false,
 611 |      "start_time": "2021-12-22T19:07:40.590488",
 612 |      "status": "completed"
 613 |     },
 614 |     "tags": []
 615 |    },
 616 |    "outputs": [
 617 |     {
 618 |      "data": {
 619 |       "text/html": [
 620 |        "<div>\n",
 621 |        "<style scoped>\n",
 622 |        "    .dataframe tbody tr th:only-of-type {\n",
 623 |        "        vertical-align: middle;\n",
 624 |        "    }\n",
 625 |        "\n",
 626 |        "    .dataframe tbody tr th {\n",
 627 |        "        vertical-align: top;\n",
 628 |        "    }\n",
 629 |        "\n",
 630 |        "    .dataframe thead th {\n",
 631 |        "        text-align: right;\n",
 632 |        "    }\n",
 633 |        "</style>\n",
 634 |        "<table border=\"1\" class=\"dataframe\">\n",
 635 |        "  <thead>\n",
 636 |        "    <tr style=\"text-align: right;\">\n",
 637 |        "      <th></th>\n",
 638 |        "      <th>CRIM</th>\n",
 639 |        "      <th>ZN</th>\n",
 640 |        "      <th>INDUS</th>\n",
 641 |        "      <th>CHAS</th>\n",
 642 |        "      <th>NOX</th>\n",
 643 |        "      <th>RM</th>\n",
 644 |        "      <th>AGE</th>\n",
 645 |        "      <th>DIS</th>\n",
 646 |        "      <th>RAD</th>\n",
 647 |        "      <th>TAX</th>\n",
 648 |        "      <th>PTRATIO</th>\n",
 649 |        "      <th>B</th>\n",
 650 |        "      <th>LSTAT</th>\n",
 651 |        "    </tr>\n",
 652 |        "  </thead>\n",
 653 |        "  <tbody>\n",
 654 |        "    <tr>\n",
 655 |        "      <th>0</th>\n",
 656 |        "      <td>0.00632</td>\n",
 657 |        "      <td>18.0</td>\n",
 658 |        "      <td>2.31</td>\n",
 659 |        "      <td>0.0</td>\n",
 660 |        "      <td>0.538</td>\n",
 661 |        "      <td>6.575</td>\n",
 662 |        "      <td>65.2</td>\n",
 663 |        "      <td>4.0900</td>\n",
 664 |        "      <td>1.0</td>\n",
 665 |        "      <td>296.0</td>\n",
 666 |        "      <td>15.3</td>\n",
 667 |        "      <td>396.9</td>\n",
 668 |        "      <td>4.98</td>\n",
 669 |        "    </tr>\n",
 670 |        "    <tr>\n",
 671 |        "      <th>1</th>\n",
 672 |        "      <td>0.02731</td>\n",
 673 |        "      <td>0.0</td>\n",
 674 |        "      <td>7.07</td>\n",
 675 |        "      <td>0.0</td>\n",
 676 |        "      <td>0.469</td>\n",
 677 |        "      <td>6.421</td>\n",
 678 |        "      <td>78.9</td>\n",
 679 |        "      <td>4.9671</td>\n",
 680 |        "      <td>2.0</td>\n",
 681 |        "      <td>242.0</td>\n",
 682 |        "      <td>17.8</td>\n",
 683 |        "      <td>396.9</td>\n",
 684 |        "      <td>9.14</td>\n",
 685 |        "    </tr>\n",
 686 |        "  </tbody>\n",
 687 |        "</table>\n",
 688 |        "</div>"
 689 |       ],
 690 |       "text/plain": [
 691 |        "      CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD    TAX  \\\n",
 692 |        "0  0.00632  18.0   2.31   0.0  0.538  6.575  65.2  4.0900  1.0  296.0   \n",
 693 |        "1  0.02731   0.0   7.07   0.0  0.469  6.421  78.9  4.9671  2.0  242.0   \n",
 694 |        "\n",
 695 |        "   PTRATIO      B  LSTAT  \n",
 696 |        "0     15.3  396.9   4.98  \n",
 697 |        "1     17.8  396.9   9.14  "
 698 |       ]
 699 |      },
 700 |      "execution_count": 14,
 701 |      "metadata": {},
 702 |      "output_type": "execute_result"
 703 |     }
 704 |    ],
 705 |    "source": [
 706 |     "# Printing first 2 rows of the dataframe 'df'\n",
 707 |     "df.head(2)"
 708 |    ]
 709 |   },
 710 |   {
 711 |    "cell_type": "code",
 712 |    "execution_count": 15,
 713 |    "id": "941a7867",
 714 |    "metadata": {
 715 |     "execution": {
 716 |      "iopub.execute_input": "2021-12-22T19:07:40.746594Z",
 717 |      "iopub.status.busy": "2021-12-22T19:07:40.741140Z",
 718 |      "iopub.status.idle": "2021-12-22T19:07:40.749244Z",
 719 |      "shell.execute_reply": "2021-12-22T19:07:40.749813Z"
 720 |     },
 721 |     "papermill": {
 722 |      "duration": 0.052214,
 723 |      "end_time": "2021-12-22T19:07:40.749987",
 724 |      "exception": false,
 725 |      "start_time": "2021-12-22T19:07:40.697773",
 726 |      "status": "completed"
 727 |     },
 728 |     "tags": []
 729 |    },
 730 |    "outputs": [],
 731 |    "source": [
 732 |     "# adding a new column 'target' from boston.target\n",
 733 |     "df['target']=boston.target"
 734 |    ]
 735 |   },
 736 |   {
 737 |    "cell_type": "code",
 738 |    "execution_count": 16,
 739 |    "id": "313f6168",
 740 |    "metadata": {
 741 |     "execution": {
 742 |      "iopub.execute_input": "2021-12-22T19:07:40.835505Z",
 743 |      "iopub.status.busy": "2021-12-22T19:07:40.834548Z",
 744 |      "iopub.status.idle": "2021-12-22T19:07:40.852884Z",
 745 |      "shell.execute_reply": "2021-12-22T19:07:40.852218Z"
 746 |     },
 747 |     "papermill": {
 748 |      "duration": 0.062957,
 749 |      "end_time": "2021-12-22T19:07:40.853023",
 750 |      "exception": false,
 751 |      "start_time": "2021-12-22T19:07:40.790066",
 752 |      "status": "completed"
 753 |     },
 754 |     "tags": []
 755 |    },
 756 |    "outputs": [
 757 |     {
 758 |      "data": {
 759 |       "text/html": [
 760 |        "<div>\n",
 761 |        "<style scoped>\n",
 762 |        "    .dataframe tbody tr th:only-of-type {\n",
 763 |        "        vertical-align: middle;\n",
 764 |        "    }\n",
 765 |        "\n",
 766 |        "    .dataframe tbody tr th {\n",
 767 |        "        vertical-align: top;\n",
 768 |        "    }\n",
 769 |        "\n",
 770 |        "    .dataframe thead th {\n",
 771 |        "        text-align: right;\n",
 772 |        "    }\n",
 773 |        "</style>\n",
 774 |        "<table border=\"1\" class=\"dataframe\">\n",
 775 |        "  <thead>\n",
 776 |        "    <tr style=\"text-align: right;\">\n",
 777 |        "      <th></th>\n",
 778 |        "      <th>CRIM</th>\n",
 779 |        "      <th>ZN</th>\n",
 780 |        "      <th>INDUS</th>\n",
 781 |        "      <th>CHAS</th>\n",
 782 |        "      <th>NOX</th>\n",
 783 |        "      <th>RM</th>\n",
 784 |        "      <th>AGE</th>\n",
 785 |        "      <th>DIS</th>\n",
 786 |        "      <th>RAD</th>\n",
 787 |        "      <th>TAX</th>\n",
 788 |        "      <th>PTRATIO</th>\n",
 789 |        "      <th>B</th>\n",
 790 |        "      <th>LSTAT</th>\n",
 791 |        "      <th>target</th>\n",
 792 |        "    </tr>\n",
 793 |        "  </thead>\n",
 794 |        "  <tbody>\n",
 795 |        "    <tr>\n",
 796 |        "      <th>0</th>\n",
 797 |        "      <td>0.00632</td>\n",
 798 |        "      <td>18.0</td>\n",
 799 |        "      <td>2.31</td>\n",
 800 |        "      <td>0.0</td>\n",
 801 |        "      <td>0.538</td>\n",
 802 |        "      <td>6.575</td>\n",
 803 |        "      <td>65.2</td>\n",
 804 |        "      <td>4.0900</td>\n",
 805 |        "      <td>1.0</td>\n",
 806 |        "      <td>296.0</td>\n",
 807 |        "      <td>15.3</td>\n",
 808 |        "      <td>396.9</td>\n",
 809 |        "      <td>4.98</td>\n",
 810 |        "      <td>24.0</td>\n",
 811 |        "    </tr>\n",
 812 |        "    <tr>\n",
 813 |        "      <th>1</th>\n",
 814 |        "      <td>0.02731</td>\n",
 815 |        "      <td>0.0</td>\n",
 816 |        "      <td>7.07</td>\n",
 817 |        "      <td>0.0</td>\n",
 818 |        "      <td>0.469</td>\n",
 819 |        "      <td>6.421</td>\n",
 820 |        "      <td>78.9</td>\n",
 821 |        "      <td>4.9671</td>\n",
 822 |        "      <td>2.0</td>\n",
 823 |        "      <td>242.0</td>\n",
 824 |        "      <td>17.8</td>\n",
 825 |        "      <td>396.9</td>\n",
 826 |        "      <td>9.14</td>\n",
 827 |        "      <td>21.6</td>\n",
 828 |        "    </tr>\n",
 829 |        "  </tbody>\n",
 830 |        "</table>\n",
 831 |        "</div>"
 832 |       ],
 833 |       "text/plain": [
 834 |        "      CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD    TAX  \\\n",
 835 |        "0  0.00632  18.0   2.31   0.0  0.538  6.575  65.2  4.0900  1.0  296.0   \n",
 836 |        "1  0.02731   0.0   7.07   0.0  0.469  6.421  78.9  4.9671  2.0  242.0   \n",
 837 |        "\n",
 838 |        "   PTRATIO      B  LSTAT  target  \n",
 839 |        "0     15.3  396.9   4.98    24.0  \n",
 840 |        "1     17.8  396.9   9.14    21.6  "
 841 |       ]
 842 |      },
 843 |      "execution_count": 16,
 844 |      "metadata": {},
 845 |      "output_type": "execute_result"
 846 |     }
 847 |    ],
 848 |    "source": [
 849 |     "df.head(2)"
 850 |    ]
 851 |   },
 852 |   {
 853 |    "cell_type": "code",
 854 |    "execution_count": 17,
 855 |    "id": "be488e24",
 856 |    "metadata": {
 857 |     "execution": {
 858 |      "iopub.execute_input": "2021-12-22T19:07:40.957263Z",
 859 |      "iopub.status.busy": "2021-12-22T19:07:40.956587Z",
 860 |      "iopub.status.idle": "2021-12-22T19:07:40.959429Z",
 861 |      "shell.execute_reply": "2021-12-22T19:07:40.960162Z"
 862 |     },
 863 |     "papermill": {
 864 |      "duration": 0.065797,
 865 |      "end_time": "2021-12-22T19:07:40.960381",
 866 |      "exception": false,
 867 |      "start_time": "2021-12-22T19:07:40.894584",
 868 |      "status": "completed"
 869 |     },
 870 |     "tags": []
 871 |    },
 872 |    "outputs": [
 873 |     {
 874 |      "name": "stdout",
 875 |      "output_type": "stream",
 876 |      "text": [
 877 |       "<class 'pandas.core.frame.DataFrame'>\n",
 878 |       "RangeIndex: 506 entries, 0 to 505\n",
 879 |       "Data columns (total 14 columns):\n",
 880 |       " #   Column   Non-Null Count  Dtype  \n",
 881 |       "---  ------   --------------  -----  \n",
 882 |       " 0   CRIM     506 non-null    float64\n",
 883 |       " 1   ZN       506 non-null    float64\n",
 884 |       " 2   INDUS    506 non-null    float64\n",
 885 |       " 3   CHAS     506 non-null    float64\n",
 886 |       " 4   NOX      506 non-null    float64\n",
 887 |       " 5   RM       506 non-null    float64\n",
 888 |       " 6   AGE      506 non-null    float64\n",
 889 |       " 7   DIS      506 non-null    float64\n",
 890 |       " 8   RAD      506 non-null    float64\n",
 891 |       " 9   TAX      506 non-null    float64\n",
 892 |       " 10  PTRATIO  506 non-null    float64\n",
 893 |       " 11  B        506 non-null    float64\n",
 894 |       " 12  LSTAT    506 non-null    float64\n",
 895 |       " 13  target   506 non-null    float64\n",
 896 |       "dtypes: float64(14)\n",
 897 |       "memory usage: 55.5 KB\n"
 898 |      ]
 899 |     }
 900 |    ],
 901 |    "source": [
 902 |     "# Printing consized summary about the dataset\n",
 903 |     "df.info()"
 904 |    ]
 905 |   },
 906 |   {
 907 |    "cell_type": "markdown",
 908 |    "id": "550c0ff9",
 909 |    "metadata": {
 910 |     "papermill": {
 911 |      "duration": 0.04135,
 912 |      "end_time": "2021-12-22T19:07:41.044965",
 913 |      "exception": false,
 914 |      "start_time": "2021-12-22T19:07:41.003615",
 915 |      "status": "completed"
 916 |     },
 917 |     "tags": []
 918 |    },
 919 |    "source": [
 920 |     "- we have 13 independent variable and one dependent (House price) variable"
 921 |    ]
 922 |   },
 923 |   {
 924 |    "cell_type": "code",
 925 |    "execution_count": 18,
 926 |    "id": "5c923e03",
 927 |    "metadata": {
 928 |     "execution": {
 929 |      "iopub.execute_input": "2021-12-22T19:07:41.135908Z",
 930 |      "iopub.status.busy": "2021-12-22T19:07:41.135229Z",
 931 |      "iopub.status.idle": "2021-12-22T19:07:41.137601Z",
 932 |      "shell.execute_reply": "2021-12-22T19:07:41.138114Z"
 933 |     },
 934 |     "papermill": {
 935 |      "duration": 0.050966,
 936 |      "end_time": "2021-12-22T19:07:41.138293",
 937 |      "exception": false,
 938 |      "start_time": "2021-12-22T19:07:41.087327",
 939 |      "status": "completed"
 940 |     },
 941 |     "tags": []
 942 |    },
 943 |    "outputs": [],
 944 |    "source": [
 945 |     "X=df.iloc[:,:-1].values\n",
 946 |     "y=df.iloc[:,-1].values"
 947 |    ]
 948 |   },
 949 |   {
 950 |    "cell_type": "code",
 951 |    "execution_count": 19,
 952 |    "id": "0dd9ce88",
 953 |    "metadata": {
 954 |     "execution": {
 955 |      "iopub.execute_input": "2021-12-22T19:07:41.224776Z",
 956 |      "iopub.status.busy": "2021-12-22T19:07:41.224113Z",
 957 |      "iopub.status.idle": "2021-12-22T19:07:41.275596Z",
 958 |      "shell.execute_reply": "2021-12-22T19:07:41.275042Z"
 959 |     },
 960 |     "papermill": {
 961 |      "duration": 0.095782,
 962 |      "end_time": "2021-12-22T19:07:41.275750",
 963 |      "exception": false,
 964 |      "start_time": "2021-12-22T19:07:41.179968",
 965 |      "status": "completed"
 966 |     },
 967 |     "tags": []
 968 |    },
 969 |    "outputs": [],
 970 |    "source": [
 971 |     "from sklearn.model_selection import train_test_split"
 972 |    ]
 973 |   },
 974 |   {
 975 |    "cell_type": "code",
 976 |    "execution_count": 20,
 977 |    "id": "1126063d",
 978 |    "metadata": {
 979 |     "execution": {
 980 |      "iopub.execute_input": "2021-12-22T19:07:41.361843Z",
 981 |      "iopub.status.busy": "2021-12-22T19:07:41.361257Z",
 982 |      "iopub.status.idle": "2021-12-22T19:07:41.366540Z",
 983 |      "shell.execute_reply": "2021-12-22T19:07:41.367029Z"
 984 |     },
 985 |     "papermill": {
 986 |      "duration": 0.05011,
 987 |      "end_time": "2021-12-22T19:07:41.367189",
 988 |      "exception": false,
 989 |      "start_time": "2021-12-22T19:07:41.317079",
 990 |      "status": "completed"
 991 |     },
 992 |     "tags": []
 993 |    },
 994 |    "outputs": [],
 995 |    "source": [
 996 |     "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25)"
 997 |    ]
 998 |   },
 999 |   {
1000 |    "cell_type": "code",
1001 |    "execution_count": 21,
1002 |    "id": "1a913510",
1003 |    "metadata": {
1004 |     "execution": {
1005 |      "iopub.execute_input": "2021-12-22T19:07:41.455019Z",
1006 |      "iopub.status.busy": "2021-12-22T19:07:41.454401Z",
1007 |      "iopub.status.idle": "2021-12-22T19:07:41.459162Z",
1008 |      "shell.execute_reply": "2021-12-22T19:07:41.459617Z"
1009 |     },
1010 |     "papermill": {
1011 |      "duration": 0.051016,
1012 |      "end_time": "2021-12-22T19:07:41.459791",
1013 |      "exception": false,
1014 |      "start_time": "2021-12-22T19:07:41.408775",
1015 |      "status": "completed"
1016 |     },
1017 |     "tags": []
1018 |    },
1019 |    "outputs": [
1020 |     {
1021 |      "name": "stdout",
1022 |      "output_type": "stream",
1023 |      "text": [
1024 |       "(379, 13) (379,)\n"
1025 |      ]
1026 |     }
1027 |    ],
1028 |    "source": [
1029 |     "print(X_train.shape,y_train.shape)"
1030 |    ]
1031 |   },
1032 |   {
1033 |    "cell_type": "code",
1034 |    "execution_count": 22,
1035 |    "id": "821c4385",
1036 |    "metadata": {
1037 |     "execution": {
1038 |      "iopub.execute_input": "2021-12-22T19:07:41.547471Z",
1039 |      "iopub.status.busy": "2021-12-22T19:07:41.546860Z",
1040 |      "iopub.status.idle": "2021-12-22T19:07:41.552895Z",
1041 |      "shell.execute_reply": "2021-12-22T19:07:41.552335Z"
1042 |     },
1043 |     "papermill": {
1044 |      "duration": 0.05103,
1045 |      "end_time": "2021-12-22T19:07:41.553036",
1046 |      "exception": false,
1047 |      "start_time": "2021-12-22T19:07:41.502006",
1048 |      "status": "completed"
1049 |     },
1050 |     "tags": []
1051 |    },
1052 |    "outputs": [
1053 |     {
1054 |      "name": "stdout",
1055 |      "output_type": "stream",
1056 |      "text": [
1057 |       "(127, 13) (127,)\n"
1058 |      ]
1059 |     }
1060 |    ],
1061 |    "source": [
1062 |     "print(X_test.shape,y_test.shape)"
1063 |    ]
1064 |   },
1065 |   {
1066 |    "cell_type": "code",
1067 |    "execution_count": 23,
1068 |    "id": "e1df2e09",
1069 |    "metadata": {
1070 |     "execution": {
1071 |      "iopub.execute_input": "2021-12-22T19:07:41.643135Z",
1072 |      "iopub.status.busy": "2021-12-22T19:07:41.642472Z",
1073 |      "iopub.status.idle": "2021-12-22T19:07:41.720937Z",
1074 |      "shell.execute_reply": "2021-12-22T19:07:41.720254Z"
1075 |     },
1076 |     "papermill": {
1077 |      "duration": 0.125329,
1078 |      "end_time": "2021-12-22T19:07:41.721071",
1079 |      "exception": false,
1080 |      "start_time": "2021-12-22T19:07:41.595742",
1081 |      "status": "completed"
1082 |     },
1083 |     "tags": []
1084 |    },
1085 |    "outputs": [],
1086 |    "source": [
1087 |     "# now we will start training of the model on multiple regression\n",
1088 |     "from sklearn.linear_model import LinearRegression\n",
1089 |     "lr=LinearRegression()"
1090 |    ]
1091 |   },
1092 |   {
1093 |    "cell_type": "code",
1094 |    "execution_count": 24,
1095 |    "id": "0cf58a86",
1096 |    "metadata": {
1097 |     "execution": {
1098 |      "iopub.execute_input": "2021-12-22T19:07:41.811926Z",
1099 |      "iopub.status.busy": "2021-12-22T19:07:41.811262Z",
1100 |      "iopub.status.idle": "2021-12-22T19:07:41.832465Z",
1101 |      "shell.execute_reply": "2021-12-22T19:07:41.831844Z"
1102 |     },
1103 |     "papermill": {
1104 |      "duration": 0.06874,
1105 |      "end_time": "2021-12-22T19:07:41.832601",
1106 |      "exception": false,
1107 |      "start_time": "2021-12-22T19:07:41.763861",
1108 |      "status": "completed"
1109 |     },
1110 |     "tags": []
1111 |    },
1112 |    "outputs": [
1113 |     {
1114 |      "data": {
1115 |       "text/plain": [
1116 |        "LinearRegression()"
1117 |       ]
1118 |      },
1119 |      "execution_count": 24,
1120 |      "metadata": {},
1121 |      "output_type": "execute_result"
1122 |     }
1123 |    ],
1124 |    "source": [
1125 |     "lr.fit(X_train, y_train)"
1126 |    ]
1127 |   },
1128 |   {
1129 |    "cell_type": "code",
1130 |    "execution_count": 25,
1131 |    "id": "9d2d414a",
1132 |    "metadata": {
1133 |     "execution": {
1134 |      "iopub.execute_input": "2021-12-22T19:07:41.924444Z",
1135 |      "iopub.status.busy": "2021-12-22T19:07:41.922358Z",
1136 |      "iopub.status.idle": "2021-12-22T19:07:41.927036Z",
1137 |      "shell.execute_reply": "2021-12-22T19:07:41.926431Z"
1138 |     },
1139 |     "papermill": {
1140 |      "duration": 0.051007,
1141 |      "end_time": "2021-12-22T19:07:41.927167",
1142 |      "exception": false,
1143 |      "start_time": "2021-12-22T19:07:41.876160",
1144 |      "status": "completed"
1145 |     },
1146 |     "tags": []
1147 |    },
1148 |    "outputs": [],
1149 |    "source": [
1150 |     "lr_pred=lr.predict(X_test)"
1151 |    ]
1152 |   },
1153 |   {
1154 |    "cell_type": "code",
1155 |    "execution_count": 26,
1156 |    "id": "ea2fddf5",
1157 |    "metadata": {
1158 |     "execution": {
1159 |      "iopub.execute_input": "2021-12-22T19:07:42.021312Z",
1160 |      "iopub.status.busy": "2021-12-22T19:07:42.020599Z",
1161 |      "iopub.status.idle": "2021-12-22T19:07:42.024191Z",
1162 |      "shell.execute_reply": "2021-12-22T19:07:42.024795Z"
1163 |     },
1164 |     "papermill": {
1165 |      "duration": 0.053355,
1166 |      "end_time": "2021-12-22T19:07:42.024956",
1167 |      "exception": false,
1168 |      "start_time": "2021-12-22T19:07:41.971601",
1169 |      "status": "completed"
1170 |     },
1171 |     "tags": []
1172 |    },
1173 |    "outputs": [
1174 |     {
1175 |      "data": {
1176 |       "text/plain": [
1177 |        "19.73771080470582"
1178 |       ]
1179 |      },
1180 |      "execution_count": 26,
1181 |      "metadata": {},
1182 |      "output_type": "execute_result"
1183 |     }
1184 |    ],
1185 |    "source": [
1186 |     "# calculation mean squared error\n",
1187 |     "mse=np.mean((lr_pred-y_test)**2)\n",
1188 |     "mse"
1189 |    ]
1190 |   },
1191 |   {
1192 |    "cell_type": "code",
1193 |    "execution_count": 27,
1194 |    "id": "393d46ab",
1195 |    "metadata": {
1196 |     "execution": {
1197 |      "iopub.execute_input": "2021-12-22T19:07:42.116953Z",
1198 |      "iopub.status.busy": "2021-12-22T19:07:42.116337Z",
1199 |      "iopub.status.idle": "2021-12-22T19:07:42.126336Z",
1200 |      "shell.execute_reply": "2021-12-22T19:07:42.126853Z"
1201 |     },
1202 |     "papermill": {
1203 |      "duration": 0.058001,
1204 |      "end_time": "2021-12-22T19:07:42.127035",
1205 |      "exception": false,
1206 |      "start_time": "2021-12-22T19:07:42.069034",
1207 |      "status": "completed"
1208 |     },
1209 |     "tags": []
1210 |    },
1211 |    "outputs": [
1212 |     {
1213 |      "name": "stdout",
1214 |      "output_type": "stream",
1215 |      "text": [
1216 |       "    Columns  Coefficient Values\n",
1217 |       "0      CRIM           -0.066498\n",
1218 |       "1        ZN            0.053051\n",
1219 |       "2     INDUS            0.041127\n",
1220 |       "3      CHAS            3.502430\n",
1221 |       "4       NOX          -18.380600\n",
1222 |       "5        RM            3.456135\n",
1223 |       "6       AGE            0.012149\n",
1224 |       "7       DIS           -1.543379\n",
1225 |       "8       RAD            0.296151\n",
1226 |       "9       TAX           -0.012449\n",
1227 |       "10  PTRATIO           -0.890911\n",
1228 |       "11        B            0.011632\n",
1229 |       "12    LSTAT           -0.606322\n",
1230 |       "13   target                 NaN\n"
1231 |      ]
1232 |     }
1233 |    ],
1234 |    "source": [
1235 |     "# Putting together the coefficient and their columns\n",
1236 |     "\n",
1237 |     "lr_coeff=pd.DataFrame()\n",
1238 |     "lr_coeff['Columns']=df.columns\n",
1239 |     "lr_coeff['Coefficient Values']=pd.Series(lr.coef_)\n",
1240 |     "\n",
1241 |     "print(lr_coeff)"
1242 |    ]
1243 |   },
1244 |   {
1245 |    "cell_type": "markdown",
1246 |    "id": "749d3787",
1247 |    "metadata": {
1248 |     "papermill": {
1249 |      "duration": 0.044005,
1250 |      "end_time": "2021-12-22T19:07:42.215203",
1251 |      "exception": false,
1252 |      "start_time": "2021-12-22T19:07:42.171198",
1253 |      "status": "completed"
1254 |     },
1255 |     "tags": []
1256 |    },
1257 |    "source": [
1258 |     "- We can see that most of the columns do not significant coefficients and hence they do not contribute much in model performance,\n",
1259 |     "- we need to regularize the model"
1260 |    ]
1261 |   },
1262 |   {
1263 |    "cell_type": "code",
1264 |    "execution_count": 28,
1265 |    "id": "2cc429b5",
1266 |    "metadata": {
1267 |     "execution": {
1268 |      "iopub.execute_input": "2021-12-22T19:07:42.308891Z",
1269 |      "iopub.status.busy": "2021-12-22T19:07:42.308216Z",
1270 |      "iopub.status.idle": "2021-12-22T19:07:42.310474Z",
1271 |      "shell.execute_reply": "2021-12-22T19:07:42.311006Z"
1272 |     },
1273 |     "papermill": {
1274 |      "duration": 0.051247,
1275 |      "end_time": "2021-12-22T19:07:42.311167",
1276 |      "exception": false,
1277 |      "start_time": "2021-12-22T19:07:42.259920",
1278 |      "status": "completed"
1279 |     },
1280 |     "tags": []
1281 |    },
1282 |    "outputs": [],
1283 |    "source": [
1284 |     "# Regularizing using ridge regression\n",
1285 |     "from sklearn.linear_model import Ridge"
1286 |    ]
1287 |   },
1288 |   {
1289 |    "cell_type": "code",
1290 |    "execution_count": 29,
1291 |    "id": "4ffa3dfc",
1292 |    "metadata": {
1293 |     "execution": {
1294 |      "iopub.execute_input": "2021-12-22T19:07:42.403533Z",
1295 |      "iopub.status.busy": "2021-12-22T19:07:42.402930Z",
1296 |      "iopub.status.idle": "2021-12-22T19:07:42.405692Z",
1297 |      "shell.execute_reply": "2021-12-22T19:07:42.406297Z"
1298 |     },
1299 |     "papermill": {
1300 |      "duration": 0.050914,
1301 |      "end_time": "2021-12-22T19:07:42.406456",
1302 |      "exception": false,
1303 |      "start_time": "2021-12-22T19:07:42.355542",
1304 |      "status": "completed"
1305 |     },
1306 |     "tags": []
1307 |    },
1308 |    "outputs": [],
1309 |    "source": [
1310 |     "ridge_reg=Ridge(alpha=1)\n",
1311 |     "# here alpha parameter indicates Regularization strength; it must be a positive floating number"
1312 |    ]
1313 |   },
1314 |   {
1315 |    "cell_type": "code",
1316 |    "execution_count": 30,
1317 |    "id": "a4b5a566",
1318 |    "metadata": {
1319 |     "execution": {
1320 |      "iopub.execute_input": "2021-12-22T19:07:42.498921Z",
1321 |      "iopub.status.busy": "2021-12-22T19:07:42.498315Z",
1322 |      "iopub.status.idle": "2021-12-22T19:07:42.508619Z",
1323 |      "shell.execute_reply": "2021-12-22T19:07:42.509115Z"
1324 |     },
1325 |     "papermill": {
1326 |      "duration": 0.058383,
1327 |      "end_time": "2021-12-22T19:07:42.509276",
1328 |      "exception": false,
1329 |      "start_time": "2021-12-22T19:07:42.450893",
1330 |      "status": "completed"
1331 |     },
1332 |     "tags": []
1333 |    },
1334 |    "outputs": [
1335 |     {
1336 |      "data": {
1337 |       "text/plain": [
1338 |        "Ridge(alpha=1)"
1339 |       ]
1340 |      },
1341 |      "execution_count": 30,
1342 |      "metadata": {},
1343 |      "output_type": "execute_result"
1344 |     }
1345 |    ],
1346 |    "source": [
1347 |     "ridge_reg.fit(X_train,y_train)"
1348 |    ]
1349 |   },
1350 |   {
1351 |    "cell_type": "code",
1352 |    "execution_count": 31,
1353 |    "id": "e42249cc",
1354 |    "metadata": {
1355 |     "execution": {
1356 |      "iopub.execute_input": "2021-12-22T19:07:42.602566Z",
1357 |      "iopub.status.busy": "2021-12-22T19:07:42.601969Z",
1358 |      "iopub.status.idle": "2021-12-22T19:07:42.605585Z",
1359 |      "shell.execute_reply": "2021-12-22T19:07:42.606182Z"
1360 |     },
1361 |     "papermill": {
1362 |      "duration": 0.052095,
1363 |      "end_time": "2021-12-22T19:07:42.606354",
1364 |      "exception": false,
1365 |      "start_time": "2021-12-22T19:07:42.554259",
1366 |      "status": "completed"
1367 |     },
1368 |     "tags": []
1369 |    },
1370 |    "outputs": [],
1371 |    "source": [
1372 |     "y_pred=ridge_reg.predict(X_test)"
1373 |    ]
1374 |   },
1375 |   {
1376 |    "cell_type": "code",
1377 |    "execution_count": 32,
1378 |    "id": "07b3d94d",
1379 |    "metadata": {
1380 |     "execution": {
1381 |      "iopub.execute_input": "2021-12-22T19:07:42.701798Z",
1382 |      "iopub.status.busy": "2021-12-22T19:07:42.701151Z",
1383 |      "iopub.status.idle": "2021-12-22T19:07:42.711342Z",
1384 |      "shell.execute_reply": "2021-12-22T19:07:42.711796Z"
1385 |     },
1386 |     "papermill": {
1387 |      "duration": 0.059316,
1388 |      "end_time": "2021-12-22T19:07:42.711960",
1389 |      "exception": false,
1390 |      "start_time": "2021-12-22T19:07:42.652644",
1391 |      "status": "completed"
1392 |     },
1393 |     "tags": []
1394 |    },
1395 |    "outputs": [
1396 |     {
1397 |      "name": "stdout",
1398 |      "output_type": "stream",
1399 |      "text": [
1400 |       "    columns  Coefficient estimates\n",
1401 |       "0      CRIM              -0.059764\n",
1402 |       "1        ZN               0.053677\n",
1403 |       "2     INDUS               0.004674\n",
1404 |       "3      CHAS               3.309944\n",
1405 |       "4       NOX              -9.918291\n",
1406 |       "5        RM               3.558169\n",
1407 |       "6       AGE               0.003945\n",
1408 |       "7       DIS              -1.419434\n",
1409 |       "8       RAD               0.273208\n",
1410 |       "9       TAX              -0.012888\n",
1411 |       "10  PTRATIO              -0.790406\n",
1412 |       "11        B               0.012675\n",
1413 |       "12    LSTAT              -0.614542\n",
1414 |       "13   target                    NaN\n"
1415 |      ]
1416 |     }
1417 |    ],
1418 |    "source": [
1419 |     "ridge_coeff=pd.DataFrame()\n",
1420 |     "ridge_coeff['columns']=df.columns\n",
1421 |     "ridge_coeff['Coefficient estimates']=pd.Series(ridge_reg.coef_)\n",
1422 |     "print(ridge_coeff)"
1423 |    ]
1424 |   },
1425 |   {
1426 |    "cell_type": "markdown",
1427 |    "id": "c0c94de5",
1428 |    "metadata": {
1429 |     "papermill": {
1430 |      "duration": 0.046599,
1431 |      "end_time": "2021-12-22T19:07:42.804542",
1432 |      "exception": false,
1433 |      "start_time": "2021-12-22T19:07:42.757943",
1434 |      "status": "completed"
1435 |     },
1436 |     "tags": []
1437 |    },
1438 |    "source": [
1439 |     "- As we can observe from the above plots that alpha helps in regularizing the coefficient and make them converge faster. \n",
1440 |     "- it shows some of the coefficients become zero. In Ridge Regularization, the coefficients can never be 0, they are just too small to observe in above plots. "
1441 |    ]
1442 |   },
1443 |   {
1444 |    "cell_type": "markdown",
1445 |    "id": "2547f3b9",
1446 |    "metadata": {
1447 |     "papermill": {
1448 |      "duration": 0.045682,
1449 |      "end_time": "2021-12-22T19:07:42.896858",
1450 |      "exception": false,
1451 |      "start_time": "2021-12-22T19:07:42.851176",
1452 |      "status": "completed"
1453 |     },
1454 |     "tags": []
1455 |    },
1456 |    "source": [
1457 |     "### Implementation of lasso regression using sklearn"
1458 |    ]
1459 |   },
1460 |   {
1461 |    "cell_type": "markdown",
1462 |    "id": "66b93314",
1463 |    "metadata": {
1464 |     "papermill": {
1465 |      "duration": 0.045735,
1466 |      "end_time": "2021-12-22T19:07:42.988699",
1467 |      "exception": false,
1468 |      "start_time": "2021-12-22T19:07:42.942964",
1469 |      "status": "completed"
1470 |     },
1471 |     "tags": []
1472 |    },
1473 |    "source": [
1474 |     "- we add Mean Absolute value of coefficients in place of mean square value\n",
1475 |     "- Unlike Ridge Regression, Lasso regression can completely eliminate the variable by reducing its coefficient value to 0."
1476 |    ]
1477 |   },
1478 |   {
1479 |    "cell_type": "code",
1480 |    "execution_count": 33,
1481 |    "id": "3b51c8d7",
1482 |    "metadata": {
1483 |     "execution": {
1484 |      "iopub.execute_input": "2021-12-22T19:07:43.084868Z",
1485 |      "iopub.status.busy": "2021-12-22T19:07:43.084217Z",
1486 |      "iopub.status.idle": "2021-12-22T19:07:43.088349Z",
1487 |      "shell.execute_reply": "2021-12-22T19:07:43.087847Z"
1488 |     },
1489 |     "papermill": {
1490 |      "duration": 0.054006,
1491 |      "end_time": "2021-12-22T19:07:43.088502",
1492 |      "exception": false,
1493 |      "start_time": "2021-12-22T19:07:43.034496",
1494 |      "status": "completed"
1495 |     },
1496 |     "tags": []
1497 |    },
1498 |    "outputs": [],
1499 |    "source": [
1500 |     "from sklearn.linear_model import Lasso\n",
1501 |     "lasso=Lasso(alpha=1)"
1502 |    ]
1503 |   },
1504 |   {
1505 |    "cell_type": "code",
1506 |    "execution_count": 34,
1507 |    "id": "00805f88",
1508 |    "metadata": {
1509 |     "execution": {
1510 |      "iopub.execute_input": "2021-12-22T19:07:43.183361Z",
1511 |      "iopub.status.busy": "2021-12-22T19:07:43.182730Z",
1512 |      "iopub.status.idle": "2021-12-22T19:07:43.190670Z",
1513 |      "shell.execute_reply": "2021-12-22T19:07:43.191217Z"
1514 |     },
1515 |     "papermill": {
1516 |      "duration": 0.057067,
1517 |      "end_time": "2021-12-22T19:07:43.191394",
1518 |      "exception": false,
1519 |      "start_time": "2021-12-22T19:07:43.134327",
1520 |      "status": "completed"
1521 |     },
1522 |     "tags": []
1523 |    },
1524 |    "outputs": [],
1525 |    "source": [
1526 |     "lasso.fit(X_train,y_train)\n",
1527 |     "y_pred1=lasso.predict(X_test)"
1528 |    ]
1529 |   },
1530 |   {
1531 |    "cell_type": "code",
1532 |    "execution_count": 35,
1533 |    "id": "816b5ade",
1534 |    "metadata": {
1535 |     "execution": {
1536 |      "iopub.execute_input": "2021-12-22T19:07:43.286534Z",
1537 |      "iopub.status.busy": "2021-12-22T19:07:43.285930Z",
1538 |      "iopub.status.idle": "2021-12-22T19:07:43.289250Z",
1539 |      "shell.execute_reply": "2021-12-22T19:07:43.289839Z"
1540 |     },
1541 |     "papermill": {
1542 |      "duration": 0.052542,
1543 |      "end_time": "2021-12-22T19:07:43.290000",
1544 |      "exception": false,
1545 |      "start_time": "2021-12-22T19:07:43.237458",
1546 |      "status": "completed"
1547 |     },
1548 |     "tags": []
1549 |    },
1550 |    "outputs": [],
1551 |    "source": [
1552 |     "lasso_mse=np.mean((y_pred1-y_test)**2)"
1553 |    ]
1554 |   },
1555 |   {
1556 |    "cell_type": "code",
1557 |    "execution_count": 36,
1558 |    "id": "b275aa1f",
1559 |    "metadata": {
1560 |     "execution": {
1561 |      "iopub.execute_input": "2021-12-22T19:07:43.385343Z",
1562 |      "iopub.status.busy": "2021-12-22T19:07:43.384744Z",
1563 |      "iopub.status.idle": "2021-12-22T19:07:43.389077Z",
1564 |      "shell.execute_reply": "2021-12-22T19:07:43.389546Z"
1565 |     },
1566 |     "papermill": {
1567 |      "duration": 0.053477,
1568 |      "end_time": "2021-12-22T19:07:43.389750",
1569 |      "exception": false,
1570 |      "start_time": "2021-12-22T19:07:43.336273",
1571 |      "status": "completed"
1572 |     },
1573 |     "tags": []
1574 |    },
1575 |    "outputs": [
1576 |     {
1577 |      "name": "stdout",
1578 |      "output_type": "stream",
1579 |      "text": [
1580 |       "25.283708842642042\n"
1581 |      ]
1582 |     }
1583 |    ],
1584 |    "source": [
1585 |     "print(lasso_mse)"
1586 |    ]
1587 |   },
1588 |   {
1589 |    "cell_type": "code",
1590 |    "execution_count": 37,
1591 |    "id": "ac5b3180",
1592 |    "metadata": {
1593 |     "execution": {
1594 |      "iopub.execute_input": "2021-12-22T19:07:43.487451Z",
1595 |      "iopub.status.busy": "2021-12-22T19:07:43.486800Z",
1596 |      "iopub.status.idle": "2021-12-22T19:07:43.493024Z",
1597 |      "shell.execute_reply": "2021-12-22T19:07:43.493570Z"
1598 |     },
1599 |     "papermill": {
1600 |      "duration": 0.056385,
1601 |      "end_time": "2021-12-22T19:07:43.493760",
1602 |      "exception": false,
1603 |      "start_time": "2021-12-22T19:07:43.437375",
1604 |      "status": "completed"
1605 |     },
1606 |     "tags": []
1607 |    },
1608 |    "outputs": [],
1609 |    "source": [
1610 |     "lasso_coef=pd.DataFrame()\n",
1611 |     "lasso_coef['columns']=df.columns\n",
1612 |     "lasso_coef['coeffienct values']=pd.Series(lasso.coef_)"
1613 |    ]
1614 |   },
1615 |   {
1616 |    "cell_type": "code",
1617 |    "execution_count": 38,
1618 |    "id": "322c1e1a",
1619 |    "metadata": {
1620 |     "execution": {
1621 |      "iopub.execute_input": "2021-12-22T19:07:43.591109Z",
1622 |      "iopub.status.busy": "2021-12-22T19:07:43.590431Z",
1623 |      "iopub.status.idle": "2021-12-22T19:07:43.599134Z",
1624 |      "shell.execute_reply": "2021-12-22T19:07:43.599675Z"
1625 |     },
1626 |     "papermill": {
1627 |      "duration": 0.058995,
1628 |      "end_time": "2021-12-22T19:07:43.599844",
1629 |      "exception": false,
1630 |      "start_time": "2021-12-22T19:07:43.540849",
1631 |      "status": "completed"
1632 |     },
1633 |     "tags": []
1634 |    },
1635 |    "outputs": [
1636 |     {
1637 |      "data": {
1638 |       "text/html": [
1639 |        "<div>\n",
1640 |        "<style scoped>\n",
1641 |        "    .dataframe tbody tr th:only-of-type {\n",
1642 |        "        vertical-align: middle;\n",
1643 |        "    }\n",
1644 |        "\n",
1645 |        "    .dataframe tbody tr th {\n",
1646 |        "        vertical-align: top;\n",
1647 |        "    }\n",
1648 |        "\n",
1649 |        "    .dataframe thead th {\n",
1650 |        "        text-align: right;\n",
1651 |        "    }\n",
1652 |        "</style>\n",
1653 |        "<table border=\"1\" class=\"dataframe\">\n",
1654 |        "  <thead>\n",
1655 |        "    <tr style=\"text-align: right;\">\n",
1656 |        "      <th></th>\n",
1657 |        "      <th>columns</th>\n",
1658 |        "      <th>coeffienct values</th>\n",
1659 |        "    </tr>\n",
1660 |        "  </thead>\n",
1661 |        "  <tbody>\n",
1662 |        "    <tr>\n",
1663 |        "      <th>0</th>\n",
1664 |        "      <td>CRIM</td>\n",
1665 |        "      <td>-0.000000</td>\n",
1666 |        "    </tr>\n",
1667 |        "    <tr>\n",
1668 |        "      <th>1</th>\n",
1669 |        "      <td>ZN</td>\n",
1670 |        "      <td>0.052337</td>\n",
1671 |        "    </tr>\n",
1672 |        "    <tr>\n",
1673 |        "      <th>2</th>\n",
1674 |        "      <td>INDUS</td>\n",
1675 |        "      <td>-0.000000</td>\n",
1676 |        "    </tr>\n",
1677 |        "    <tr>\n",
1678 |        "      <th>3</th>\n",
1679 |        "      <td>CHAS</td>\n",
1680 |        "      <td>0.000000</td>\n",
1681 |        "    </tr>\n",
1682 |        "    <tr>\n",
1683 |        "      <th>4</th>\n",
1684 |        "      <td>NOX</td>\n",
1685 |        "      <td>-0.000000</td>\n",
1686 |        "    </tr>\n",
1687 |        "    <tr>\n",
1688 |        "      <th>5</th>\n",
1689 |        "      <td>RM</td>\n",
1690 |        "      <td>0.905588</td>\n",
1691 |        "    </tr>\n",
1692 |        "    <tr>\n",
1693 |        "      <th>6</th>\n",
1694 |        "      <td>AGE</td>\n",
1695 |        "      <td>0.030446</td>\n",
1696 |        "    </tr>\n",
1697 |        "    <tr>\n",
1698 |        "      <th>7</th>\n",
1699 |        "      <td>DIS</td>\n",
1700 |        "      <td>-0.743750</td>\n",
1701 |        "    </tr>\n",
1702 |        "    <tr>\n",
1703 |        "      <th>8</th>\n",
1704 |        "      <td>RAD</td>\n",
1705 |        "      <td>0.219849</td>\n",
1706 |        "    </tr>\n",
1707 |        "    <tr>\n",
1708 |        "      <th>9</th>\n",
1709 |        "      <td>TAX</td>\n",
1710 |        "      <td>-0.014176</td>\n",
1711 |        "    </tr>\n",
1712 |        "    <tr>\n",
1713 |        "      <th>10</th>\n",
1714 |        "      <td>PTRATIO</td>\n",
1715 |        "      <td>-0.601588</td>\n",
1716 |        "    </tr>\n",
1717 |        "    <tr>\n",
1718 |        "      <th>11</th>\n",
1719 |        "      <td>B</td>\n",
1720 |        "      <td>0.011533</td>\n",
1721 |        "    </tr>\n",
1722 |        "    <tr>\n",
1723 |        "      <th>12</th>\n",
1724 |        "      <td>LSTAT</td>\n",
1725 |        "      <td>-0.831386</td>\n",
1726 |        "    </tr>\n",
1727 |        "    <tr>\n",
1728 |        "      <th>13</th>\n",
1729 |        "      <td>target</td>\n",
1730 |        "      <td>NaN</td>\n",
1731 |        "    </tr>\n",
1732 |        "  </tbody>\n",
1733 |        "</table>\n",
1734 |        "</div>"
1735 |       ],
1736 |       "text/plain": [
1737 |        "    columns  coeffienct values\n",
1738 |        "0      CRIM          -0.000000\n",
1739 |        "1        ZN           0.052337\n",
1740 |        "2     INDUS          -0.000000\n",
1741 |        "3      CHAS           0.000000\n",
1742 |        "4       NOX          -0.000000\n",
1743 |        "5        RM           0.905588\n",
1744 |        "6       AGE           0.030446\n",
1745 |        "7       DIS          -0.743750\n",
1746 |        "8       RAD           0.219849\n",
1747 |        "9       TAX          -0.014176\n",
1748 |        "10  PTRATIO          -0.601588\n",
1749 |        "11        B           0.011533\n",
1750 |        "12    LSTAT          -0.831386\n",
1751 |        "13   target                NaN"
1752 |       ]
1753 |      },
1754 |      "execution_count": 38,
1755 |      "metadata": {},
1756 |      "output_type": "execute_result"
1757 |     }
1758 |    ],
1759 |    "source": [
1760 |     "lasso_coef"
1761 |    ]
1762 |   },
1763 |   {
1764 |    "cell_type": "code",
1765 |    "execution_count": 39,
1766 |    "id": "50658745",
1767 |    "metadata": {
1768 |     "execution": {
1769 |      "iopub.execute_input": "2021-12-22T19:07:43.697914Z",
1770 |      "iopub.status.busy": "2021-12-22T19:07:43.697194Z",
1771 |      "iopub.status.idle": "2021-12-22T19:07:43.701581Z",
1772 |      "shell.execute_reply": "2021-12-22T19:07:43.702185Z"
1773 |     },
1774 |     "papermill": {
1775 |      "duration": 0.054965,
1776 |      "end_time": "2021-12-22T19:07:43.702345",
1777 |      "exception": false,
1778 |      "start_time": "2021-12-22T19:07:43.647380",
1779 |      "status": "completed"
1780 |     },
1781 |     "tags": []
1782 |    },
1783 |    "outputs": [
1784 |     {
1785 |      "data": {
1786 |       "text/plain": [
1787 |        "pandas.core.frame.DataFrame"
1788 |       ]
1789 |      },
1790 |      "execution_count": 39,
1791 |      "metadata": {},
1792 |      "output_type": "execute_result"
1793 |     }
1794 |    ],
1795 |    "source": [
1796 |     "type(lasso_coef)"
1797 |    ]
1798 |   },
1799 |   {
1800 |    "cell_type": "markdown",
1801 |    "id": "5108bea9",
1802 |    "metadata": {
1803 |     "papermill": {
1804 |      "duration": 0.047194,
1805 |      "end_time": "2021-12-22T19:07:43.797052",
1806 |      "exception": false,
1807 |      "start_time": "2021-12-22T19:07:43.749858",
1808 |      "status": "completed"
1809 |     },
1810 |     "tags": []
1811 |    },
1812 |    "source": [
1813 |     "### Python implementation of Elastic Net "
1814 |    ]
1815 |   },
1816 |   {
1817 |    "cell_type": "code",
1818 |    "execution_count": 40,
1819 |    "id": "f9e0ea19",
1820 |    "metadata": {
1821 |     "execution": {
1822 |      "iopub.execute_input": "2021-12-22T19:07:43.896026Z",
1823 |      "iopub.status.busy": "2021-12-22T19:07:43.895359Z",
1824 |      "iopub.status.idle": "2021-12-22T19:07:43.898529Z",
1825 |      "shell.execute_reply": "2021-12-22T19:07:43.899059Z"
1826 |     },
1827 |     "papermill": {
1828 |      "duration": 0.05412,
1829 |      "end_time": "2021-12-22T19:07:43.899277",
1830 |      "exception": false,
1831 |      "start_time": "2021-12-22T19:07:43.845157",
1832 |      "status": "completed"
1833 |     },
1834 |     "tags": []
1835 |    },
1836 |    "outputs": [],
1837 |    "source": [
1838 |     "from sklearn.linear_model import ElasticNet\n",
1839 |     "elastic=ElasticNet(alpha=1)"
1840 |    ]
1841 |   },
1842 |   {
1843 |    "cell_type": "code",
1844 |    "execution_count": 41,
1845 |    "id": "514ea47a",
1846 |    "metadata": {
1847 |     "execution": {
1848 |      "iopub.execute_input": "2021-12-22T19:07:43.998460Z",
1849 |      "iopub.status.busy": "2021-12-22T19:07:43.997850Z",
1850 |      "iopub.status.idle": "2021-12-22T19:07:44.004364Z",
1851 |      "shell.execute_reply": "2021-12-22T19:07:44.004936Z"
1852 |     },
1853 |     "papermill": {
1854 |      "duration": 0.057813,
1855 |      "end_time": "2021-12-22T19:07:44.005105",
1856 |      "exception": false,
1857 |      "start_time": "2021-12-22T19:07:43.947292",
1858 |      "status": "completed"
1859 |     },
1860 |     "tags": []
1861 |    },
1862 |    "outputs": [
1863 |     {
1864 |      "data": {
1865 |       "text/plain": [
1866 |        "ElasticNet(alpha=1)"
1867 |       ]
1868 |      },
1869 |      "execution_count": 41,
1870 |      "metadata": {},
1871 |      "output_type": "execute_result"
1872 |     }
1873 |    ],
1874 |    "source": [
1875 |     "elastic.fit(X_train,y_train)"
1876 |    ]
1877 |   },
1878 |   {
1879 |    "cell_type": "code",
1880 |    "execution_count": 42,
1881 |    "id": "6ca049fb",
1882 |    "metadata": {
1883 |     "execution": {
1884 |      "iopub.execute_input": "2021-12-22T19:07:44.105947Z",
1885 |      "iopub.status.busy": "2021-12-22T19:07:44.105288Z",
1886 |      "iopub.status.idle": "2021-12-22T19:07:44.108773Z",
1887 |      "shell.execute_reply": "2021-12-22T19:07:44.109289Z"
1888 |     },
1889 |     "papermill": {
1890 |      "duration": 0.055931,
1891 |      "end_time": "2021-12-22T19:07:44.109453",
1892 |      "exception": false,
1893 |      "start_time": "2021-12-22T19:07:44.053522",
1894 |      "status": "completed"
1895 |     },
1896 |     "tags": []
1897 |    },
1898 |    "outputs": [],
1899 |    "source": [
1900 |     "y_pred2=elastic.predict(X_test)"
1901 |    ]
1902 |   },
1903 |   {
1904 |    "cell_type": "code",
1905 |    "execution_count": 43,
1906 |    "id": "40aeee4e",
1907 |    "metadata": {
1908 |     "execution": {
1909 |      "iopub.execute_input": "2021-12-22T19:07:44.209854Z",
1910 |      "iopub.status.busy": "2021-12-22T19:07:44.209204Z",
1911 |      "iopub.status.idle": "2021-12-22T19:07:44.214732Z",
1912 |      "shell.execute_reply": "2021-12-22T19:07:44.214113Z"
1913 |     },
1914 |     "papermill": {
1915 |      "duration": 0.056737,
1916 |      "end_time": "2021-12-22T19:07:44.214870",
1917 |      "exception": false,
1918 |      "start_time": "2021-12-22T19:07:44.158133",
1919 |      "status": "completed"
1920 |     },
1921 |     "tags": []
1922 |    },
1923 |    "outputs": [
1924 |     {
1925 |      "name": "stdout",
1926 |      "output_type": "stream",
1927 |      "text": [
1928 |       "24.422988143894155\n"
1929 |      ]
1930 |     }
1931 |    ],
1932 |    "source": [
1933 |     "elastic_mse=np.mean((y_pred2-y_test)**2)\n",
1934 |     "# Here for reminding, mean squared error is the mean of sqaure of diffrence in y_predicted and y_test\n",
1935 |     "\n",
1936 |     "print(elastic_mse)"
1937 |    ]
1938 |   },
1939 |   {
1940 |    "cell_type": "code",
1941 |    "execution_count": 44,
1942 |    "id": "76811e85",
1943 |    "metadata": {
1944 |     "execution": {
1945 |      "iopub.execute_input": "2021-12-22T19:07:44.316624Z",
1946 |      "iopub.status.busy": "2021-12-22T19:07:44.315981Z",
1947 |      "iopub.status.idle": "2021-12-22T19:07:44.325120Z",
1948 |      "shell.execute_reply": "2021-12-22T19:07:44.325614Z"
1949 |     },
1950 |     "papermill": {
1951 |      "duration": 0.061949,
1952 |      "end_time": "2021-12-22T19:07:44.325803",
1953 |      "exception": false,
1954 |      "start_time": "2021-12-22T19:07:44.263854",
1955 |      "status": "completed"
1956 |     },
1957 |     "tags": []
1958 |    },
1959 |    "outputs": [
1960 |     {
1961 |      "name": "stdout",
1962 |      "output_type": "stream",
1963 |      "text": [
1964 |       "    columns  coeff values\n",
1965 |       "0      CRIM     -0.022867\n",
1966 |       "1        ZN      0.055481\n",
1967 |       "2     INDUS     -0.000000\n",
1968 |       "3      CHAS      0.000000\n",
1969 |       "4       NOX     -0.000000\n",
1970 |       "5        RM      0.926176\n",
1971 |       "6       AGE      0.029873\n",
1972 |       "7       DIS     -0.802898\n",
1973 |       "8       RAD      0.261508\n",
1974 |       "9       TAX     -0.015532\n",
1975 |       "10  PTRATIO     -0.648044\n",
1976 |       "11        B      0.011629\n",
1977 |       "12    LSTAT     -0.823327\n",
1978 |       "13   target           NaN\n"
1979 |      ]
1980 |     }
1981 |    ],
1982 |    "source": [
1983 |     "# making dataframe of column wise coefficient of elasticnet\n",
1984 |     "\n",
1985 |     "elastic_coeff=pd.DataFrame()\n",
1986 |     "elastic_coeff['columns']=df.columns\n",
1987 |     "elastic_coeff['coeff values']=pd.Series(elastic.coef_)\n",
1988 |     "\n",
1989 |     "print(elastic_coeff)"
1990 |    ]
1991 |   },
1992 |   {
1993 |    "cell_type": "code",
1994 |    "execution_count": 45,
1995 |    "id": "c45c3e50",
1996 |    "metadata": {
1997 |     "execution": {
1998 |      "iopub.execute_input": "2021-12-22T19:07:44.428261Z",
1999 |      "iopub.status.busy": "2021-12-22T19:07:44.427600Z",
2000 |      "iopub.status.idle": "2021-12-22T19:07:44.431913Z",
2001 |      "shell.execute_reply": "2021-12-22T19:07:44.432475Z"
2002 |     },
2003 |     "papermill": {
2004 |      "duration": 0.057216,
2005 |      "end_time": "2021-12-22T19:07:44.432632",
2006 |      "exception": false,
2007 |      "start_time": "2021-12-22T19:07:44.375416",
2008 |      "status": "completed"
2009 |     },
2010 |     "tags": []
2011 |    },
2012 |    "outputs": [
2013 |     {
2014 |      "data": {
2015 |       "text/plain": [
2016 |        "pandas.core.frame.DataFrame"
2017 |       ]
2018 |      },
2019 |      "execution_count": 45,
2020 |      "metadata": {},
2021 |      "output_type": "execute_result"
2022 |     }
2023 |    ],
2024 |    "source": [
2025 |     "type(elastic_coeff)"
2026 |    ]
2027 |   },
2028 |   {
2029 |    "cell_type": "markdown",
2030 |    "id": "365d0b90",
2031 |    "metadata": {
2032 |     "papermill": {
2033 |      "duration": 0.049713,
2034 |      "end_time": "2021-12-22T19:07:44.532152",
2035 |      "exception": false,
2036 |      "start_time": "2021-12-22T19:07:44.482439",
2037 |      "status": "completed"
2038 |     },
2039 |     "tags": []
2040 |    },
2041 |    "source": [
2042 |     "- Elastic Net is a combination of both of the above regularization. It contains both the L1 and L2 as its penalty term. \n",
2043 |     "- It performs better than Ridge and Lasso Regression for most of the test cases"
2044 |    ]
2045 |   }
2046 |  ],
2047 |  "metadata": {
2048 |   "kernelspec": {
2049 |    "display_name": "Python 3 (ipykernel)",
2050 |    "language": "python",
2051 |    "name": "python3"
2052 |   },
2053 |   "language_info": {
2054 |    "codemirror_mode": {
2055 |     "name": "ipython",
2056 |     "version": 3
2057 |    },
2058 |    "file_extension": ".py",
2059 |    "mimetype": "text/x-python",
2060 |    "name": "python",
2061 |    "nbconvert_exporter": "python",
2062 |    "pygments_lexer": "ipython3",
2063 |    "version": "3.9.7"
2064 |   },
2065 |   "papermill": {
2066 |    "default_parameters": {},
2067 |    "duration": 17.062862,
2068 |    "end_time": "2021-12-22T19:07:45.291877",
2069 |    "environment_variables": {},
2070 |    "exception": null,
2071 |    "input_path": "__notebook__.ipynb",
2072 |    "output_path": "__notebook__.ipynb",
2073 |    "parameters": {},
2074 |    "start_time": "2021-12-22T19:07:28.229015",
2075 |    "version": "2.3.3"
2076 |   }
2077 |  },
2078 |  "nbformat": 4,
2079 |  "nbformat_minor": 5
2080 | }
2081 | 


--------------------------------------------------------------------------------