├── ML Practice ├── LabTask │ ├── Screenshot.png │ └── Home.csv ├── weather.csv ├── LGR_iras.ipynb ├── ML Mid Task │ ├── spam.ipynb │ └── SVM.ipynb ├── Class tasks.ipynb ├── weather.ipynb ├── Ridge_Lasso_diff .ipynb └── spam.ipynb ├── README.md ├── Intro to Machine Learning Kaggle Course ├── exercise-random-forests.ipynb ├── exercise-underfitting-and-overfitting.ipynb ├── exercise-model-validation.ipynb ├── exercise-your-first-machine-learning-model.ipynb ├── exercise-explore-your-data.ipynb └── exercise-machine-learning-competitions.ipynb └── Support Vector Machine └── SVM (1).ipynb /ML Practice/LabTask/Screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shaikh-Yaqoob/Machine-Learning-Playground/HEAD/ML Practice/LabTask/Screenshot.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine-Learning-Playground 2 | Welcome to the ML Playground! Dive into ML—explore, experiment, and learn with projects, datasets, and algorithms. Beginners and enthusiasts, join us in the world of data-driven magic. Contribute, learn, and unlock ML's power! 3 | -------------------------------------------------------------------------------- /ML Practice/LabTask/Home.csv: -------------------------------------------------------------------------------- 1 | Area,Size,Age,Price 2 | 2000,3,3,500000 3 | 4000,4,4,600000 4 | 6000,8,6,700000 5 | 3000,6,7,600000 6 | 2000,3,4,450000 7 | 3000,2,8,650000 8 | 5000,3,6,500000 9 | 2500,4,3,400000 10 | 4000,6,5,350000 11 | 3500,5,5,700000 12 | 5000,4,2,800000 13 | -------------------------------------------------------------------------------- /ML Practice/weather.csv: -------------------------------------------------------------------------------- 1 | outlook,temperature,humidity,windy,play 2 | overcast,hot,high,FALSE,yes 3 | overcast,cool,normal,TRUE,yes 4 | overcast,mild,high,TRUE,yes 5 | overcast,hot,normal,FALSE,yes 6 | rainy,mild,high,FALSE,yes 7 | rainy,cool,normal,FALSE,yes 8 | rainy,cool,normal,TRUE,no 9 | rainy,mild,normal,FALSE,yes 10 | rainy,mild,high,TRUE,no 11 | sunny,hot,high,FALSE,no 12 | sunny,hot,high,TRUE,no 13 | sunny,mild,high,FALSE,no 14 | sunny,cool,normal,FALSE,yes 15 | sunny,mild,normal,TRUE,yes -------------------------------------------------------------------------------- /Intro to Machine Learning Kaggle Course/exercise-random-forests.ipynb: -------------------------------------------------------------------------------- 1 | {"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"**This notebook is an exercise in the [Introduction to Machine Learning](https://www.kaggle.com/learn/intro-to-machine-learning) course. You can reference the tutorial at [this link](https://www.kaggle.com/dansbecker/random-forests).**\n\n---\n","metadata":{}},{"cell_type":"markdown","source":"## Recap\nHere's the code you've written so far.","metadata":{}},{"cell_type":"code","source":"# Code you have previously used to load data\nimport pandas as pd\nfrom sklearn.metrics import mean_absolute_error\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.tree import DecisionTreeRegressor\n\n\n# Path of the file to read\niowa_file_path = '../input/home-data-for-ml-course/train.csv'\n\nhome_data = pd.read_csv(iowa_file_path)\n# Create target object and call it y\ny = home_data.SalePrice\n# Create X\nfeatures = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'FullBath', 'BedroomAbvGr', 'TotRmsAbvGrd']\nX = home_data[features]\n\n# Split into validation and training data\ntrain_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1)\n\n# Specify Model\niowa_model = DecisionTreeRegressor(random_state=1)\n# Fit Model\niowa_model.fit(train_X, train_y)\n\n# Make validation predictions and calculate mean absolute error\nval_predictions = iowa_model.predict(val_X)\nval_mae = mean_absolute_error(val_predictions, val_y)\nprint(\"Validation MAE when not specifying max_leaf_nodes: {:,.0f}\".format(val_mae))\n\n# Using best value for max_leaf_nodes\niowa_model = DecisionTreeRegressor(max_leaf_nodes=100, random_state=1)\niowa_model.fit(train_X, train_y)\nval_predictions = iowa_model.predict(val_X)\nval_mae = mean_absolute_error(val_predictions, val_y)\nprint(\"Validation MAE for best value of max_leaf_nodes: {:,.0f}\".format(val_mae))\n\n\n# Set up code checking\nfrom learntools.core import binder\nbinder.bind(globals())\nfrom learntools.machine_learning.ex6 import *\nprint(\"\\nSetup complete\")","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:51:49.847350Z","iopub.execute_input":"2023-10-02T09:51:49.848190Z","iopub.status.idle":"2023-10-02T09:51:52.341527Z","shell.execute_reply.started":"2023-10-02T09:51:49.848140Z","shell.execute_reply":"2023-10-02T09:51:52.340191Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"Validation MAE when not specifying max_leaf_nodes: 29,653\nValidation MAE for best value of max_leaf_nodes: 27,283\n\nSetup complete\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Exercises\nData science isn't always this easy. But replacing the decision tree with a Random Forest is going to be an easy win.","metadata":{}},{"cell_type":"markdown","source":"## Step 1: Use a Random Forest","metadata":{}},{"cell_type":"code","source":"from sklearn.ensemble import RandomForestRegressor\n\n# Define the model. Set random_state to 1\nrf_model = RandomForestRegressor(n_estimators=100, random_state=1)\n\n# fit your model\nrf_model.fit(train_X, train_y)\n\n\n# Calculate the mean absolute error of your Random Forest model on the validation data\npred_y = rf_model.predict(val_X)\nrf_val_mae = mean_absolute_error(val_y, pred_y)\n\nprint(\"Validation MAE for Random Forest Model: {}\".format(rf_val_mae))\n\n# Check your answer\nstep_1.check()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:53:10.791095Z","iopub.execute_input":"2023-10-02T09:53:10.791958Z","iopub.status.idle":"2023-10-02T09:53:11.397709Z","shell.execute_reply.started":"2023-10-02T09:53:10.791925Z","shell.execute_reply":"2023-10-02T09:53:11.396451Z"},"trusted":true},"execution_count":2,"outputs":[{"name":"stdout","text":"Validation MAE for Random Forest Model: 21857.15912981083\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"","application/javascript":"parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 1.0, \"interactionType\": 1, \"questionType\": 2, \"questionId\": \"1_CheckRfScore\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Correct","text/markdown":"Correct"},"metadata":{}}]},{"cell_type":"code","source":"# The lines below will show you a hint or the solution.\n# step_1.hint() \n# step_1.solution()\n","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"So far, you have followed specific instructions at each step of your project. This helped learn key ideas and build your first model, but now you know enough to try things on your own. \n\nMachine Learning competitions are a great way to try your own ideas and learn more as you independently navigate a machine learning project. \n\n# Keep Going\n\nYou are ready for **[Machine Learning Competitions](https://www.kaggle.com/alexisbcook/machine-learning-competitions).**\n","metadata":{}},{"cell_type":"markdown","source":"---\n\n\n\n\n*Have questions or comments? Visit the [course discussion forum](https://www.kaggle.com/learn/intro-to-machine-learning/discussion) to chat with other learners.*","metadata":{}}]} -------------------------------------------------------------------------------- /Intro to Machine Learning Kaggle Course/exercise-underfitting-and-overfitting.ipynb: -------------------------------------------------------------------------------- 1 | {"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"**This notebook is an exercise in the [Introduction to Machine Learning](https://www.kaggle.com/learn/intro-to-machine-learning) course. You can reference the tutorial at [this link](https://www.kaggle.com/dansbecker/underfitting-and-overfitting).**\n\n---\n","metadata":{}},{"cell_type":"markdown","source":"## Recap\nYou've built your first model, and now it's time to optimize the size of the tree to make better predictions. Run this cell to set up your coding environment where the previous step left off.","metadata":{}},{"cell_type":"code","source":"# Code you have previously used to load data\nimport pandas as pd\nfrom sklearn.metrics import mean_absolute_error\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.tree import DecisionTreeRegressor\n\n\n# Path of the file to read\niowa_file_path = '../input/home-data-for-ml-course/train.csv'\n\nhome_data = pd.read_csv(iowa_file_path)\n# Create target object and call it y\ny = home_data.SalePrice\n# Create X\nfeatures = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'FullBath', 'BedroomAbvGr', 'TotRmsAbvGrd']\nX = home_data[features]\n\n# Split into validation and training data\ntrain_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1)\n\n# Specify Model\niowa_model = DecisionTreeRegressor(random_state=1)\n# Fit Model\niowa_model.fit(train_X, train_y)\n\n# Make validation predictions and calculate mean absolute error\nval_predictions = iowa_model.predict(val_X)\nval_mae = mean_absolute_error(val_predictions, val_y)\nprint(\"Validation MAE: {:,.0f}\".format(val_mae))\n\n# Set up code checking\nfrom learntools.core import binder\nbinder.bind(globals())\nfrom learntools.machine_learning.ex5 import *\nprint(\"\\nSetup complete\")","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:48:01.823647Z","iopub.execute_input":"2023-10-02T09:48:01.824221Z","iopub.status.idle":"2023-10-02T09:48:04.192726Z","shell.execute_reply.started":"2023-10-02T09:48:01.824136Z","shell.execute_reply":"2023-10-02T09:48:04.191117Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"Validation MAE: 29,653\n\nSetup complete\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Exercises\nYou could write the function `get_mae` yourself. For now, we'll supply it. This is the same function you read about in the previous lesson. Just run the cell below.","metadata":{}},{"cell_type":"code","source":"def get_mae(max_leaf_nodes, train_X, val_X, train_y, val_y):\n model = DecisionTreeRegressor(max_leaf_nodes=max_leaf_nodes, random_state=0)\n model.fit(train_X, train_y)\n preds_val = model.predict(val_X)\n mae = mean_absolute_error(val_y, preds_val)\n return(mae)","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:48:09.633976Z","iopub.execute_input":"2023-10-02T09:48:09.634368Z","iopub.status.idle":"2023-10-02T09:48:09.640161Z","shell.execute_reply.started":"2023-10-02T09:48:09.634340Z","shell.execute_reply":"2023-10-02T09:48:09.638889Z"},"trusted":true},"execution_count":2,"outputs":[]},{"cell_type":"markdown","source":"## Step 1: Compare Different Tree Sizes\nWrite a loop that tries the following values for *max_leaf_nodes* from a set of possible values.\n\nCall the *get_mae* function on each value of max_leaf_nodes. Store the output in some way that allows you to select the value of `max_leaf_nodes` that gives the most accurate model on your data.","metadata":{}},{"cell_type":"code","source":"candidate_max_leaf_nodes = [5, 25, 50, 100, 250, 500]\n# Write loop to find the ideal tree size from candidate_max_leaf_nodes\nmae_scores = {}\nfor size in candidate_max_leaf_nodes:\n mae_scores[size] = get_mae(size, train_X, val_X, train_y, val_y)\n\n# Store the best value of max_leaf_nodes (it will be either 5, 25, 50, 100, 250 or 500)\nbest_tree_size = min(mae_scores, key=mae_scores.get)\n\n# Check your answer\nstep_1.check()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:49:12.853542Z","iopub.execute_input":"2023-10-02T09:49:12.853975Z","iopub.status.idle":"2023-10-02T09:49:12.905004Z","shell.execute_reply.started":"2023-10-02T09:49:12.853943Z","shell.execute_reply":"2023-10-02T09:49:12.903768Z"},"trusted":true},"execution_count":3,"outputs":[{"output_type":"display_data","data":{"text/plain":"","application/javascript":"parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.5, \"interactionType\": 1, \"questionType\": 1, \"questionId\": \"1_BestTreeSize\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Correct","text/markdown":"Correct"},"metadata":{}}]},{"cell_type":"code","source":"# The lines below will show you a hint or the solution.\n# step_1.hint() \n# step_1.solution()","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Step 2: Fit Model Using All Data\nYou know the best tree size. If you were going to deploy this model in practice, you would make it even more accurate by using all of the data and keeping that tree size. That is, you don't need to hold out the validation data now that you've made all your modeling decisions.","metadata":{}},{"cell_type":"code","source":"# Fill in argument to make optimal size and uncomment\nfinal_model = DecisionTreeRegressor(max_leaf_nodes=best_tree_size, random_state=1)\n\n# fit the final model and uncomment the next two lines\nfinal_model.fit(X, y)\n\n# Check your answer\nstep_2.check()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:49:56.935671Z","iopub.execute_input":"2023-10-02T09:49:56.936036Z","iopub.status.idle":"2023-10-02T09:49:56.954701Z","shell.execute_reply.started":"2023-10-02T09:49:56.936010Z","shell.execute_reply":"2023-10-02T09:49:56.953747Z"},"trusted":true},"execution_count":4,"outputs":[{"output_type":"display_data","data":{"text/plain":"","application/javascript":"parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.5, \"interactionType\": 1, \"questionType\": 2, \"questionId\": \"2_FitModelWithAllData\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Correct","text/markdown":"Correct"},"metadata":{}}]},{"cell_type":"code","source":"# step_2.hint()\n# step_2.solution()","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"You've tuned this model and improved your results. But we are still using Decision Tree models, which are not very sophisticated by modern machine learning standards. In the next step you will learn to use Random Forests to improve your models even more.\n\n# Keep Going\n\nYou are ready for **[Random Forests](https://www.kaggle.com/dansbecker/random-forests).**\n","metadata":{}},{"cell_type":"markdown","source":"---\n\n\n\n\n*Have questions or comments? Visit the [course discussion forum](https://www.kaggle.com/learn/intro-to-machine-learning/discussion) to chat with other learners.*","metadata":{}}]} -------------------------------------------------------------------------------- /ML Practice/LGR_iras.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "21518716", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "from sklearn import datasets" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 3, 16 | "id": "0b115abb", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "iris = datasets.load_iris()\n", 21 | "X = iris.data\n", 22 | "y = iris.target" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 4, 28 | "id": "30e1785e", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "y_binary = (y == 0).astype(int)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 5, 38 | "id": "48b85dc1", 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "from sklearn.model_selection import train_test_split\n", 43 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 7, 49 | "id": "97b236f1", 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "from sklearn.linear_model import LogisticRegression\n", 54 | "clf = LogisticRegression()" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 8, 60 | "id": "268cb22e", 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/html": [ 66 | "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" 67 | ], 68 | "text/plain": [ 69 | "LogisticRegression()" 70 | ] 71 | }, 72 | "execution_count": 8, 73 | "metadata": {}, 74 | "output_type": "execute_result" 75 | } 76 | ], 77 | "source": [ 78 | "clf.fit(X_train, y_train)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 9, 84 | "id": "ef809e80", 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "y_pred = clf.predict(X_test)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 11, 94 | "id": "8721d73c", 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "from sklearn.metrics import accuracy_score" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 12, 104 | "id": "376062cc", 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "name": "stdout", 109 | "output_type": "stream", 110 | "text": [ 111 | "Accuracy: 1.0\n" 112 | ] 113 | } 114 | ], 115 | "source": [ 116 | "accuracy = accuracy_score(y_test, y_pred)\n", 117 | "print(f'Accuracy: {accuracy}')" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "id": "5c18a65e", 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [] 127 | } 128 | ], 129 | "metadata": { 130 | "kernelspec": { 131 | "display_name": "Python 3 (ipykernel)", 132 | "language": "python", 133 | "name": "python3" 134 | }, 135 | "language_info": { 136 | "codemirror_mode": { 137 | "name": "ipython", 138 | "version": 3 139 | }, 140 | "file_extension": ".py", 141 | "mimetype": "text/x-python", 142 | "name": "python", 143 | "nbconvert_exporter": "python", 144 | "pygments_lexer": "ipython3", 145 | "version": "3.11.4" 146 | } 147 | }, 148 | "nbformat": 4, 149 | "nbformat_minor": 5 150 | } 151 | -------------------------------------------------------------------------------- /Intro to Machine Learning Kaggle Course/exercise-model-validation.ipynb: -------------------------------------------------------------------------------- 1 | {"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"**This notebook is an exercise in the [Introduction to Machine Learning](https://www.kaggle.com/learn/intro-to-machine-learning) course. You can reference the tutorial at [this link](https://www.kaggle.com/dansbecker/model-validation).**\n\n---\n","metadata":{}},{"cell_type":"markdown","source":"## Recap\nYou've built a model. In this exercise you will test how good your model is.\n\nRun the cell below to set up your coding environment where the previous exercise left off.","metadata":{}},{"cell_type":"code","source":"# Code you have previously used to load data\nimport pandas as pd\nfrom sklearn.tree import DecisionTreeRegressor\n\n# Path of the file to read\niowa_file_path = '../input/home-data-for-ml-course/train.csv'\n\nhome_data = pd.read_csv(iowa_file_path)\ny = home_data.SalePrice\nfeature_columns = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'FullBath', 'BedroomAbvGr', 'TotRmsAbvGrd']\nX = home_data[feature_columns]\n\n# Specify Model\niowa_model = DecisionTreeRegressor()\n# Fit Model\niowa_model.fit(X, y)\n\nprint(\"First in-sample predictions:\", iowa_model.predict(X.head()))\nprint(\"Actual target values for those homes:\", y.head().tolist())\n\n# Set up code checking\nfrom learntools.core import binder\nbinder.bind(globals())\nfrom learntools.machine_learning.ex4 import *\nprint(\"Setup Complete\")","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:39:21.273273Z","iopub.execute_input":"2023-10-02T09:39:21.273635Z","iopub.status.idle":"2023-10-02T09:39:23.338620Z","shell.execute_reply.started":"2023-10-02T09:39:21.273608Z","shell.execute_reply":"2023-10-02T09:39:23.337498Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"First in-sample predictions: [208500. 181500. 223500. 140000. 250000.]\nActual target values for those homes: [208500, 181500, 223500, 140000, 250000]\nSetup Complete\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Exercises\n\n## Step 1: Split Your Data\nUse the `train_test_split` function to split up your data.\n\nGive it the argument `random_state=1` so the `check` functions know what to expect when verifying your code.\n\nRecall, your features are loaded in the DataFrame **X** and your target is loaded in **y**.\n","metadata":{}},{"cell_type":"code","source":"# Import the train_test_split function and uncomment\nfrom sklearn.model_selection import train_test_split\n\n# fill in and uncomment\ntrain_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1)\n\n# Check your answer\nstep_1.check()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:43:07.867846Z","iopub.execute_input":"2023-10-02T09:43:07.868244Z","iopub.status.idle":"2023-10-02T09:43:07.882580Z","shell.execute_reply.started":"2023-10-02T09:43:07.868213Z","shell.execute_reply":"2023-10-02T09:43:07.881487Z"},"trusted":true},"execution_count":2,"outputs":[{"output_type":"display_data","data":{"text/plain":"","application/javascript":"parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.25, \"interactionType\": 1, \"questionType\": 2, \"questionId\": \"1_SplitData\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Correct","text/markdown":"Correct"},"metadata":{}}]},{"cell_type":"code","source":"# The lines below will show you a hint or the solution.\n# step_1.hint() \n# step_1.solution()\n","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Step 2: Specify and Fit the Model\n\nCreate a `DecisionTreeRegressor` model and fit it to the relevant data.\nSet `random_state` to 1 again when creating the model.","metadata":{}},{"cell_type":"code","source":"# You imported DecisionTreeRegressor in your last exercise\n# and that code has been copied to the setup code above. So, no need to\n# import it again\n\n# Specify the model\niowa_model = DecisionTreeRegressor(random_state=1)\n\n# Fit iowa_model with the training data.\niowa_model.fit(train_X, train_y)\n\n\n# Check your answer\nstep_2.check()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:44:11.438161Z","iopub.execute_input":"2023-10-02T09:44:11.438547Z","iopub.status.idle":"2023-10-02T09:44:11.467147Z","shell.execute_reply.started":"2023-10-02T09:44:11.438520Z","shell.execute_reply":"2023-10-02T09:44:11.466265Z"},"trusted":true},"execution_count":3,"outputs":[{"name":"stdout","text":"[186500. 184000. 130000. 92000. 164500. 220000. 335000. 144152. 215000.\n 262000.]\n[186500. 184000. 130000. 92000. 164500. 220000. 335000. 144152. 215000.\n 262000.]\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"","application/javascript":"parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.25, \"interactionType\": 1, \"questionType\": 2, \"questionId\": \"2_FitModelWithTrain\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Correct","text/markdown":"Correct"},"metadata":{}}]},{"cell_type":"code","source":"# step_2.hint()\n# step_2.solution()","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Step 3: Make Predictions with Validation data\n","metadata":{}},{"cell_type":"code","source":"# Predict with all validation observations\nval_predictions = iowa_model.predict(val_X)\n\n# Check your answer\nstep_3.check()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:44:33.614288Z","iopub.execute_input":"2023-10-02T09:44:33.614704Z","iopub.status.idle":"2023-10-02T09:44:33.626027Z","shell.execute_reply.started":"2023-10-02T09:44:33.614672Z","shell.execute_reply":"2023-10-02T09:44:33.625367Z"},"trusted":true},"execution_count":4,"outputs":[{"output_type":"display_data","data":{"text/plain":"","application/javascript":"parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.25, \"interactionType\": 1, \"questionType\": 2, \"questionId\": \"3_ValPreds\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Correct","text/markdown":"Correct"},"metadata":{}}]},{"cell_type":"code","source":"# step_3.hint()\n# step_3.solution()","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"Inspect your predictions and actual values from validation data.","metadata":{}},{"cell_type":"code","source":"# print the top few validation predictions\nprint(iowa_model.predict(val_X.head()))\n# print the top few actual prices from validation data\nprint(val_y.head().tolist())","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:45:16.064274Z","iopub.execute_input":"2023-10-02T09:45:16.064763Z","iopub.status.idle":"2023-10-02T09:45:16.077517Z","shell.execute_reply.started":"2023-10-02T09:45:16.064728Z","shell.execute_reply":"2023-10-02T09:45:16.076433Z"},"trusted":true},"execution_count":5,"outputs":[{"name":"stdout","text":"[186500. 184000. 130000. 92000. 164500.]\n[231500, 179500, 122000, 84500, 142000]\n","output_type":"stream"}]},{"cell_type":"markdown","source":"What do you notice that is different from what you saw with in-sample predictions (which are printed after the top code cell in this page).\n\nDo you remember why validation predictions differ from in-sample (or training) predictions? This is an important idea from the last lesson.\n\n## Step 4: Calculate the Mean Absolute Error in Validation Data\n","metadata":{}},{"cell_type":"code","source":"from sklearn.metrics import mean_absolute_error\nval_mae = mean_absolute_error(val_predictions, val_y)\n\n# uncomment following line to see the validation_mae\nprint(val_mae)\n\n# Check your answer\nstep_4.check()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:46:05.173568Z","iopub.execute_input":"2023-10-02T09:46:05.173917Z","iopub.status.idle":"2023-10-02T09:46:05.184104Z","shell.execute_reply.started":"2023-10-02T09:46:05.173892Z","shell.execute_reply":"2023-10-02T09:46:05.183040Z"},"trusted":true},"execution_count":6,"outputs":[{"name":"stdout","text":"29652.931506849316\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"","application/javascript":"parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.25, \"interactionType\": 1, \"questionType\": 1, \"questionId\": \"4_MAE\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Correct","text/markdown":"Correct"},"metadata":{}}]},{"cell_type":"code","source":"# step_4.hint()\n# step_4.solution()","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"Is that MAE good? There isn't a general rule for what values are good that applies across applications. But you'll see how to use (and improve) this number in the next step.\n\n# Keep Going\n\nYou are ready for **[Underfitting and Overfitting](https://www.kaggle.com/dansbecker/underfitting-and-overfitting).**\n","metadata":{}},{"cell_type":"markdown","source":"---\n\n\n\n\n*Have questions or comments? Visit the [course discussion forum](https://www.kaggle.com/learn/intro-to-machine-learning/discussion) to chat with other learners.*","metadata":{}}]} -------------------------------------------------------------------------------- /Intro to Machine Learning Kaggle Course/exercise-your-first-machine-learning-model.ipynb: -------------------------------------------------------------------------------- 1 | {"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"**This notebook is an exercise in the [Introduction to Machine Learning](https://www.kaggle.com/learn/intro-to-machine-learning) course. You can reference the tutorial at [this link](https://www.kaggle.com/dansbecker/your-first-machine-learning-model).**\n\n---\n","metadata":{}},{"cell_type":"markdown","source":"## Recap\nSo far, you have loaded your data and reviewed it with the following code. Run this cell to set up your coding environment where the previous step left off.","metadata":{}},{"cell_type":"code","source":"# Code you have previously used to load data\nimport pandas as pd\n\n# Path of the file to read\niowa_file_path = '../input/home-data-for-ml-course/train.csv'\n\nhome_data = pd.read_csv(iowa_file_path)\n\n# Set up code checking\nfrom learntools.core import binder\nbinder.bind(globals())\nfrom learntools.machine_learning.ex3 import *\n\nprint(\"Setup Complete\")","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:24:13.843708Z","iopub.execute_input":"2023-10-02T09:24:13.844117Z","iopub.status.idle":"2023-10-02T09:24:13.879729Z","shell.execute_reply.started":"2023-10-02T09:24:13.844073Z","shell.execute_reply":"2023-10-02T09:24:13.878345Z"},"trusted":true},"execution_count":5,"outputs":[{"name":"stdout","text":"Setup Complete\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Exercises\n\n## Step 1: Specify Prediction Target\nSelect the target variable, which corresponds to the sales price. Save this to a new variable called `y`. You'll need to print a list of the columns to find the name of the column you need.\n","metadata":{}},{"cell_type":"code","source":"# print the list of columns in the dataset to find the name of the prediction target\nhome_data.columns","metadata":{"execution":{"iopub.status.busy":"2023-10-02T08:49:08.308347Z","iopub.execute_input":"2023-10-02T08:49:08.308682Z","iopub.status.idle":"2023-10-02T08:49:08.317085Z","shell.execute_reply.started":"2023-10-02T08:49:08.308656Z","shell.execute_reply":"2023-10-02T08:49:08.316279Z"},"trusted":true},"execution_count":4,"outputs":[{"execution_count":4,"output_type":"execute_result","data":{"text/plain":"Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',\n 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',\n 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',\n 'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',\n 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',\n 'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',\n 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1',\n 'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating',\n 'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF',\n 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',\n 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',\n 'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',\n 'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',\n 'GarageCond', 'PavedDrive', 'WoodDeckSF', 'OpenPorchSF',\n 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'PoolQC',\n 'Fence', 'MiscFeature', 'MiscVal', 'MoSold', 'YrSold', 'SaleType',\n 'SaleCondition', 'SalePrice'],\n dtype='object')"},"metadata":{}}]},{"cell_type":"code","source":"y = home_data.SalePrice\n\n# Check your answer\nstep_1.check()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:25:17.327120Z","iopub.execute_input":"2023-10-02T09:25:17.327488Z","iopub.status.idle":"2023-10-02T09:25:17.343113Z","shell.execute_reply.started":"2023-10-02T09:25:17.327462Z","shell.execute_reply":"2023-10-02T09:25:17.342270Z"},"trusted":true},"execution_count":6,"outputs":[{"output_type":"display_data","data":{"text/plain":"","application/javascript":"parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.25, \"interactionType\": 1, \"questionType\": 2, \"questionId\": \"1_SetTarget\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Correct","text/markdown":"Correct"},"metadata":{}}]},{"cell_type":"code","source":"# The lines below will show you a hint or the solution.\n# step_1.hint() \n# step_1.solution()","metadata":{"collapsed":true,"jupyter":{"outputs_hidden":true}},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Step 2: Create X\nNow you will create a DataFrame called `X` holding the predictive features.\n\nSince you want only some columns from the original data, you'll first create a list with the names of the columns you want in `X`.\n\nYou'll use just the following columns in the list (you can copy and paste the whole list to save some typing, though you'll still need to add quotes):\n * LotArea\n * YearBuilt\n * 1stFlrSF\n * 2ndFlrSF\n * FullBath\n * BedroomAbvGr\n * TotRmsAbvGrd\n\nAfter you've created that list of features, use it to create the DataFrame that you'll use to fit the model.","metadata":{}},{"cell_type":"code","source":"# Create the list of features below\nfeature_names = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'FullBath', 'BedroomAbvGr', 'TotRmsAbvGrd']\n\n# Select data corresponding to features in feature_names\nX = home_data[feature_names]\n\n\n# Check your answer\nstep_2.check()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:31:00.291238Z","iopub.execute_input":"2023-10-02T09:31:00.291685Z","iopub.status.idle":"2023-10-02T09:31:00.312976Z","shell.execute_reply.started":"2023-10-02T09:31:00.291635Z","shell.execute_reply":"2023-10-02T09:31:00.311922Z"},"trusted":true},"execution_count":7,"outputs":[{"output_type":"display_data","data":{"text/plain":"","application/javascript":"parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.25, \"interactionType\": 1, \"questionType\": 2, \"questionId\": \"2_SelectPredictionData\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Correct","text/markdown":"Correct"},"metadata":{}}]},{"cell_type":"code","source":"# step_2.hint()\n# step_2.solution()","metadata":{"collapsed":true,"jupyter":{"outputs_hidden":true}},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Review Data\nBefore building a model, take a quick look at **X** to verify it looks sensible","metadata":{}},{"cell_type":"code","source":"# Review data\n# print description or statistics from X\nprint(X.describe())\n# print the top few lines\nprint(y.head())","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:32:15.814219Z","iopub.execute_input":"2023-10-02T09:32:15.814571Z","iopub.status.idle":"2023-10-02T09:32:15.847857Z","shell.execute_reply.started":"2023-10-02T09:32:15.814547Z","shell.execute_reply":"2023-10-02T09:32:15.847105Z"},"trusted":true},"execution_count":8,"outputs":[{"name":"stdout","text":" LotArea YearBuilt 1stFlrSF 2ndFlrSF FullBath \\\ncount 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 \nmean 10516.828082 1971.267808 1162.626712 346.992466 1.565068 \nstd 9981.264932 30.202904 386.587738 436.528436 0.550916 \nmin 1300.000000 1872.000000 334.000000 0.000000 0.000000 \n25% 7553.500000 1954.000000 882.000000 0.000000 1.000000 \n50% 9478.500000 1973.000000 1087.000000 0.000000 2.000000 \n75% 11601.500000 2000.000000 1391.250000 728.000000 2.000000 \nmax 215245.000000 2010.000000 4692.000000 2065.000000 3.000000 \n\n BedroomAbvGr TotRmsAbvGrd \ncount 1460.000000 1460.000000 \nmean 2.866438 6.517808 \nstd 0.815778 1.625393 \nmin 0.000000 2.000000 \n25% 2.000000 5.000000 \n50% 3.000000 6.000000 \n75% 3.000000 7.000000 \nmax 8.000000 14.000000 \n0 208500\n1 181500\n2 223500\n3 140000\n4 250000\nName: SalePrice, dtype: int64\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## Step 3: Specify and Fit Model\nCreate a `DecisionTreeRegressor` and save it iowa_model. Ensure you've done the relevant import from sklearn to run this command.\n\nThen fit the model you just created using the data in `X` and `y` that you saved above.","metadata":{}},{"cell_type":"code","source":"from sklearn.tree import DecisionTreeRegressor\n#specify the model. \n#For model reproducibility, set a numeric value for random_state when specifying the model\niowa_model = DecisionTreeRegressor(random_state=1)\n\n\n# Fit the model\niowa_model.fit(X,y)\n\n# Check your answer\nstep_3.check()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:34:27.660352Z","iopub.execute_input":"2023-10-02T09:34:27.660740Z","iopub.status.idle":"2023-10-02T09:34:27.681978Z","shell.execute_reply.started":"2023-10-02T09:34:27.660715Z","shell.execute_reply":"2023-10-02T09:34:27.681190Z"},"trusted":true},"execution_count":10,"outputs":[{"output_type":"display_data","data":{"text/plain":"","application/javascript":"parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.25, \"interactionType\": 1, \"questionType\": 2, \"questionId\": \"3_CreateModel\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Correct","text/markdown":"Correct"},"metadata":{}}]},{"cell_type":"code","source":"# step_3.hint()\n# step_3.solution()","metadata":{"collapsed":true,"jupyter":{"outputs_hidden":true}},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Step 4: Make Predictions\nMake predictions with the model's `predict` command using `X` as the data. Save the results to a variable called `predictions`.","metadata":{}},{"cell_type":"code","source":"predictions = iowa_model.predict(X)\nprint(predictions)\n\n# Check your answer\nstep_4.check()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:35:17.832646Z","iopub.execute_input":"2023-10-02T09:35:17.833055Z","iopub.status.idle":"2023-10-02T09:35:17.848306Z","shell.execute_reply.started":"2023-10-02T09:35:17.833026Z","shell.execute_reply":"2023-10-02T09:35:17.846800Z"},"trusted":true},"execution_count":11,"outputs":[{"name":"stdout","text":"[208500. 181500. 223500. ... 266500. 142125. 147500.]\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"","application/javascript":"parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.25, \"interactionType\": 1, \"questionType\": 2, \"questionId\": \"4_MakePredictions\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Correct","text/markdown":"Correct"},"metadata":{}}]},{"cell_type":"code","source":"# step_4.hint()\n# step_4.solution()","metadata":{"collapsed":true,"jupyter":{"outputs_hidden":true}},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Think About Your Results\n\nUse the `head` method to compare the top few predictions to the actual home values (in `y`) for those same homes. Anything surprising?\n","metadata":{}},{"cell_type":"code","source":"# You can write code in this cell\nprint(y.head())\nprint(predictions[:10])","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:35:54.804368Z","iopub.execute_input":"2023-10-02T09:35:54.804764Z","iopub.status.idle":"2023-10-02T09:35:54.811671Z","shell.execute_reply.started":"2023-10-02T09:35:54.804734Z","shell.execute_reply":"2023-10-02T09:35:54.810568Z"},"trusted":true},"execution_count":12,"outputs":[{"name":"stdout","text":"0 208500\n1 181500\n2 223500\n3 140000\n4 250000\nName: SalePrice, dtype: int64\n[208500. 181500. 223500. 140000. 250000. 143000. 307000. 200000. 129900.\n 118000.]\n","output_type":"stream"}]},{"cell_type":"markdown","source":"It's natural to ask how accurate the model's predictions will be and how you can improve that. That will be you're next step.\n\n# Keep Going\n\nYou are ready for **[Model Validation](https://www.kaggle.com/dansbecker/model-validation).**\n","metadata":{}},{"cell_type":"markdown","source":"---\n\n\n\n\n*Have questions or comments? Visit the [course discussion forum](https://www.kaggle.com/learn/intro-to-machine-learning/discussion) to chat with other learners.*","metadata":{}}]} -------------------------------------------------------------------------------- /Intro to Machine Learning Kaggle Course/exercise-explore-your-data.ipynb: -------------------------------------------------------------------------------- 1 | {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"**[Machine Learning Course Home Page](https://www.kaggle.com/learn/machine-learning)**\n\n---\n","metadata":{}},{"cell_type":"markdown","source":"This exercise will test your ability to read a data file and understand statistics about the data.\n\nIn later exercises, you will apply techniques to filter the data, build a machine learning model, and iteratively improve your model.\n\nThe course examples use data from Melbourne. To ensure you can apply these techniques on your own, you will have to apply them to a new dataset (with house prices from Iowa).\n\nThe exercises use a \"notebook\" coding environment. In case you are unfamiliar with notebooks, we have a [90-second intro video](https://www.youtube.com/watch?v=4C2qMnaIKL4).\n\n# Exercises\n\nRun the following cell to set up code-checking, which will verify your work as you go.","metadata":{}},{"cell_type":"code","source":"# Set up code checking\nfrom learntools.core import binder\nbinder.bind(globals())\nfrom learntools.machine_learning.ex2 import *\nprint(\"Setup Complete\")","metadata":{"execution":{"iopub.status.busy":"2023-10-02T08:34:02.654056Z","iopub.execute_input":"2023-10-02T08:34:02.654921Z","iopub.status.idle":"2023-10-02T08:34:03.155142Z","shell.execute_reply.started":"2023-10-02T08:34:02.654879Z","shell.execute_reply":"2023-10-02T08:34:03.154009Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Step 1: Loading Data\nRead the Iowa data file into a Pandas DataFrame called `home_data`.","metadata":{}},{"cell_type":"code","source":"import pandas as pd\n\n# Path of the file to read\niowa_file_path = '../input/home-data-for-ml-course/train.csv'\n\n# Fill in the line below to read the file into a variable home_data\nhome_data = pd.read_csv(iowa_file_path)\n\n# Call line below with no argument to check that you've loaded the data correctly\nstep_1.check()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:17:54.216153Z","iopub.execute_input":"2023-10-02T09:17:54.216592Z","iopub.status.idle":"2023-10-02T09:17:54.255960Z","shell.execute_reply.started":"2023-10-02T09:17:54.216558Z","shell.execute_reply":"2023-10-02T09:17:54.254769Z"},"trusted":true},"execution_count":7,"outputs":[{"output_type":"display_data","data":{"text/plain":"","application/javascript":"parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.5, \"interactionType\": 1, \"questionType\": 1, \"questionId\": \"1_LoadHomeData\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Correct","text/markdown":"Correct"},"metadata":{}}]},{"cell_type":"code","source":"# Lines below will give you a hint or solution code\n#step_1.hint()\n#step_1.solution()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T08:36:50.542021Z","iopub.execute_input":"2023-10-02T08:36:50.542476Z","iopub.status.idle":"2023-10-02T08:36:50.548245Z","shell.execute_reply.started":"2023-10-02T08:36:50.542446Z","shell.execute_reply":"2023-10-02T08:36:50.547078Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Step 2: Review The Data\nUse the command you learned to view summary statistics of the data. Then fill in variables to answer the following questions","metadata":{}},{"cell_type":"code","source":"# Print summary statistics in next line\nhome_data.describe()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:18:12.618701Z","iopub.execute_input":"2023-10-02T09:18:12.619145Z","iopub.status.idle":"2023-10-02T09:18:12.719941Z","shell.execute_reply.started":"2023-10-02T09:18:12.619113Z","shell.execute_reply":"2023-10-02T09:18:12.718841Z"},"trusted":true},"execution_count":8,"outputs":[{"execution_count":8,"output_type":"execute_result","data":{"text/plain":" Id MSSubClass LotFrontage LotArea OverallQual \\\ncount 1460.000000 1460.000000 1201.000000 1460.000000 1460.000000 \nmean 730.500000 56.897260 70.049958 10516.828082 6.099315 \nstd 421.610009 42.300571 24.284752 9981.264932 1.382997 \nmin 1.000000 20.000000 21.000000 1300.000000 1.000000 \n25% 365.750000 20.000000 59.000000 7553.500000 5.000000 \n50% 730.500000 50.000000 69.000000 9478.500000 6.000000 \n75% 1095.250000 70.000000 80.000000 11601.500000 7.000000 \nmax 1460.000000 190.000000 313.000000 215245.000000 10.000000 \n\n OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 ... \\\ncount 1460.000000 1460.000000 1460.000000 1452.000000 1460.000000 ... \nmean 5.575342 1971.267808 1984.865753 103.685262 443.639726 ... \nstd 1.112799 30.202904 20.645407 181.066207 456.098091 ... \nmin 1.000000 1872.000000 1950.000000 0.000000 0.000000 ... \n25% 5.000000 1954.000000 1967.000000 0.000000 0.000000 ... \n50% 5.000000 1973.000000 1994.000000 0.000000 383.500000 ... \n75% 6.000000 2000.000000 2004.000000 166.000000 712.250000 ... \nmax 9.000000 2010.000000 2010.000000 1600.000000 5644.000000 ... \n\n WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch ScreenPorch \\\ncount 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 \nmean 94.244521 46.660274 21.954110 3.409589 15.060959 \nstd 125.338794 66.256028 61.119149 29.317331 55.757415 \nmin 0.000000 0.000000 0.000000 0.000000 0.000000 \n25% 0.000000 0.000000 0.000000 0.000000 0.000000 \n50% 0.000000 25.000000 0.000000 0.000000 0.000000 \n75% 168.000000 68.000000 0.000000 0.000000 0.000000 \nmax 857.000000 547.000000 552.000000 508.000000 480.000000 \n\n PoolArea MiscVal MoSold YrSold SalePrice \ncount 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 \nmean 2.758904 43.489041 6.321918 2007.815753 180921.195890 \nstd 40.177307 496.123024 2.703626 1.328095 79442.502883 \nmin 0.000000 0.000000 1.000000 2006.000000 34900.000000 \n25% 0.000000 0.000000 5.000000 2007.000000 129975.000000 \n50% 0.000000 0.000000 6.000000 2008.000000 163000.000000 \n75% 0.000000 0.000000 8.000000 2009.000000 214000.000000 \nmax 738.000000 15500.000000 12.000000 2010.000000 755000.000000 \n\n[8 rows x 38 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
IdMSSubClassLotFrontageLotAreaOverallQualOverallCondYearBuiltYearRemodAddMasVnrAreaBsmtFinSF1...WoodDeckSFOpenPorchSFEnclosedPorch3SsnPorchScreenPorchPoolAreaMiscValMoSoldYrSoldSalePrice
count1460.0000001460.0000001201.0000001460.0000001460.0000001460.0000001460.0000001460.0000001452.0000001460.000000...1460.0000001460.0000001460.0000001460.0000001460.0000001460.0000001460.0000001460.0000001460.0000001460.000000
mean730.50000056.89726070.04995810516.8280826.0993155.5753421971.2678081984.865753103.685262443.639726...94.24452146.66027421.9541103.40958915.0609592.75890443.4890416.3219182007.815753180921.195890
std421.61000942.30057124.2847529981.2649321.3829971.11279930.20290420.645407181.066207456.098091...125.33879466.25602861.11914929.31733155.75741540.177307496.1230242.7036261.32809579442.502883
min1.00000020.00000021.0000001300.0000001.0000001.0000001872.0000001950.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000001.0000002006.00000034900.000000
25%365.75000020.00000059.0000007553.5000005.0000005.0000001954.0000001967.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000005.0000002007.000000129975.000000
50%730.50000050.00000069.0000009478.5000006.0000005.0000001973.0000001994.0000000.000000383.500000...0.00000025.0000000.0000000.0000000.0000000.0000000.0000006.0000002008.000000163000.000000
75%1095.25000070.00000080.00000011601.5000007.0000006.0000002000.0000002004.000000166.000000712.250000...168.00000068.0000000.0000000.0000000.0000000.0000000.0000008.0000002009.000000214000.000000
max1460.000000190.000000313.000000215245.00000010.0000009.0000002010.0000002010.0000001600.0000005644.000000...857.000000547.000000552.000000508.000000480.000000738.00000015500.00000012.0000002010.000000755000.000000
\n

8 rows × 38 columns

\n
"},"metadata":{}}]},{"cell_type":"code","source":"# What is the average lot size (rounded to nearest integer)?\navg_lot_size = round(home_data.LotArea.mean())\n\n# As of today, how old is the newest home (current year - the date in which it was built)\nfrom datetime import datetime\nnewest_home_age = datetime.now().year - home_data.YearBuilt.max()\n\n# Checks your answers\nstep_2.check()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:20:09.974476Z","iopub.execute_input":"2023-10-02T09:20:09.974982Z","iopub.status.idle":"2023-10-02T09:20:09.987946Z","shell.execute_reply.started":"2023-10-02T09:20:09.974944Z","shell.execute_reply":"2023-10-02T09:20:09.986548Z"},"trusted":true},"execution_count":11,"outputs":[{"output_type":"display_data","data":{"text/plain":"","application/javascript":"parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 0.5, \"interactionType\": 1, \"questionType\": 1, \"questionId\": \"2_HomeDescription\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Correct","text/markdown":"Correct"},"metadata":{}}]},{"cell_type":"code","source":"#step_2.hint()\n#step_2.solution()","metadata":{"collapsed":true,"jupyter":{"outputs_hidden":true}},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Think About Your Data\n\nThe newest house in your data isn't that new. A few potential explanations for this:\n1. They haven't built new houses where this data was collected.\n1. The data was collected a long time ago. Houses built after the data publication wouldn't show up.\n\nIf the reason is explanation #1 above, does that affect your trust in the model you build with this data? What about if it is reason #2?\n\nHow could you dig into the data to see which explanation is more plausible?\n\nCheck out this **[discussion thread](https://www.kaggle.com/learn-forum/60581)** to see what others think or to add your ideas.\n\n# Keep Going\n\nYou are ready for **[Your First Machine Learning Model](https://www.kaggle.com/dansbecker/your-first-machine-learning-model).**\n","metadata":{}},{"cell_type":"markdown","source":"---\n**[Machine Learning Course Home Page](https://www.kaggle.com/learn/machine-learning)**\n\n","metadata":{}}]} -------------------------------------------------------------------------------- /Intro to Machine Learning Kaggle Course/exercise-machine-learning-competitions.ipynb: -------------------------------------------------------------------------------- 1 | {"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"**This notebook is an exercise in the [Introduction to Machine Learning](https://www.kaggle.com/learn/intro-to-machine-learning) course. You can reference the tutorial at [this link](https://www.kaggle.com/alexisbcook/machine-learning-competitions).**\n\n---\n","metadata":{}},{"cell_type":"markdown","source":"# Introduction\n\nIn this exercise, you will create and submit predictions for a Kaggle competition. You can then improve your model (e.g. by adding features) to apply what you've learned and move up the leaderboard.\n\nBegin by running the code cell below to set up code checking and the filepaths for the dataset.","metadata":{}},{"cell_type":"code","source":"# Set up code checking\nfrom learntools.core import binder\nbinder.bind(globals())\nfrom learntools.machine_learning.ex7 import *\n\n# Set up filepaths\nimport os\nif not os.path.exists(\"../input/train.csv\"):\n os.symlink(\"../input/home-data-for-ml-course/train.csv\", \"../input/train.csv\") \n os.symlink(\"../input/home-data-for-ml-course/test.csv\", \"../input/test.csv\") ","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:55:40.160557Z","iopub.execute_input":"2023-10-02T09:55:40.160943Z","iopub.status.idle":"2023-10-02T09:55:40.570562Z","shell.execute_reply.started":"2023-10-02T09:55:40.160912Z","shell.execute_reply":"2023-10-02T09:55:40.569520Z"},"trusted":true},"execution_count":1,"outputs":[]},{"cell_type":"markdown","source":"Here's some of the code you've written so far. Start by running it again.","metadata":{}},{"cell_type":"code","source":"# Import helpful libraries\nimport pandas as pd\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.metrics import mean_absolute_error\nfrom sklearn.model_selection import train_test_split\n\n# Load the data, and separate the target\niowa_file_path = '../input/train.csv'\nhome_data = pd.read_csv(iowa_file_path)\ny = home_data.SalePrice\n\n# Create X (After completing the exercise, you can return to modify this line!)\nfeatures = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'FullBath', 'BedroomAbvGr', 'TotRmsAbvGrd']\n\n# Select columns corresponding to features, and preview the data\nX = home_data[features]\nX.head()\n\n# Split into validation and training data\ntrain_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1)\n\n# Define a random forest model\nrf_model = RandomForestRegressor(random_state=1)\nrf_model.fit(train_X, train_y)\nrf_val_predictions = rf_model.predict(val_X)\nrf_val_mae = mean_absolute_error(rf_val_predictions, val_y)\n\nprint(\"Validation MAE for Random Forest Model: {:,.0f}\".format(rf_val_mae))","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:55:50.347012Z","iopub.execute_input":"2023-10-02T09:55:50.347495Z","iopub.status.idle":"2023-10-02T09:55:51.924153Z","shell.execute_reply.started":"2023-10-02T09:55:50.347466Z","shell.execute_reply":"2023-10-02T09:55:51.923117Z"},"trusted":true},"execution_count":2,"outputs":[{"name":"stdout","text":"Validation MAE for Random Forest Model: 21,857\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Train a model for the competition\n\nThe code cell above trains a Random Forest model on **`train_X`** and **`train_y`**. \n\nUse the code cell below to build a Random Forest model and train it on all of **`X`** and **`y`**.","metadata":{}},{"cell_type":"code","source":"# To improve accuracy, create a new Random Forest model which you will train on all training data\nrf_model_on_full_data = RandomForestRegressor(n_estimators=72, random_state=1)\n\n# fit rf_model_on_full_data on all data from the training data\nrf_model_on_full_data.fit(X, y)\n","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:57:04.385537Z","iopub.execute_input":"2023-10-02T09:57:04.385918Z","iopub.status.idle":"2023-10-02T09:57:04.838426Z","shell.execute_reply.started":"2023-10-02T09:57:04.385890Z","shell.execute_reply":"2023-10-02T09:57:04.837131Z"},"trusted":true},"execution_count":3,"outputs":[{"execution_count":3,"output_type":"execute_result","data":{"text/plain":"RandomForestRegressor(n_estimators=72, random_state=1)","text/html":"
RandomForestRegressor(n_estimators=72, random_state=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
"},"metadata":{}}]},{"cell_type":"markdown","source":"Now, read the file of \"test\" data, and apply your model to make predictions.","metadata":{}},{"cell_type":"code","source":"# path to file you will use for predictions\ntest_data_path = '../input/test.csv'\n\n# read test data file using pandas\ntest_data = pd.read_csv(test_data_path)\n\n# create test_X which comes from test_data but includes only the columns you used for prediction.\n# The list of columns is stored in a variable called features\ntest_X = test_data[features]\n\n# make predictions which we will submit. \ntest_preds = rf_model_on_full_data.predict(test_X)\n","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:58:10.938046Z","iopub.execute_input":"2023-10-02T09:58:10.938468Z","iopub.status.idle":"2023-10-02T09:58:10.995101Z","shell.execute_reply.started":"2023-10-02T09:58:10.938436Z","shell.execute_reply":"2023-10-02T09:58:10.994000Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"markdown","source":"Before submitting, run a check to make sure your `test_preds` have the right format.","metadata":{}},{"cell_type":"code","source":"# Check your answer (To get credit for completing the exercise, you must get a \"Correct\" result!)\nstep_1.check()\n# step_1.solution()","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:59:03.773061Z","iopub.execute_input":"2023-10-02T09:59:03.773932Z","iopub.status.idle":"2023-10-02T09:59:03.784290Z","shell.execute_reply.started":"2023-10-02T09:59:03.773891Z","shell.execute_reply":"2023-10-02T09:59:03.782972Z"},"trusted":true},"execution_count":5,"outputs":[{"output_type":"display_data","data":{"text/plain":"","application/javascript":"parent.postMessage({\"jupyterEvent\": \"custom.exercise_interaction\", \"data\": {\"outcomeType\": 1, \"valueTowardsCompletion\": 1.0, \"interactionType\": 1, \"questionType\": 2, \"questionId\": \"1_CheckSubmittablePreds\", \"learnToolsVersion\": \"0.3.4\", \"failureMessage\": \"\", \"exceptionClass\": \"\", \"trace\": \"\"}}, \"*\")"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Correct","text/markdown":"Correct"},"metadata":{}}]},{"cell_type":"markdown","source":"# Generate a submission\n\nRun the code cell below to generate a CSV file with your predictions that you can use to submit to the competition.","metadata":{}},{"cell_type":"code","source":"# Run the code to save predictions in the format used for competition scoring\n\noutput = pd.DataFrame({'Id': test_data.Id,\n 'SalePrice': test_preds})\noutput.to_csv('submission.csv', index=False)","metadata":{"execution":{"iopub.status.busy":"2023-10-02T09:59:09.610935Z","iopub.execute_input":"2023-10-02T09:59:09.611302Z","iopub.status.idle":"2023-10-02T09:59:09.625670Z","shell.execute_reply.started":"2023-10-02T09:59:09.611277Z","shell.execute_reply":"2023-10-02T09:59:09.624520Z"},"trusted":true},"execution_count":6,"outputs":[]},{"cell_type":"markdown","source":"# Submit to the competition\n\nTo test your results, you'll need to join the competition (if you haven't already). So open a new window by clicking on **[this link](https://www.kaggle.com/c/home-data-for-ml-course)**. Then click on the **Join Competition** button.\n\n![join competition image](https://storage.googleapis.com/kaggle-media/learn/images/axBzctl.png)\n\nNext, follow the instructions below:\n1. Begin by clicking on the **Save Version** button in the top right corner of the window. This will generate a pop-up window. \n2. Ensure that the **Save and Run All** option is selected, and then click on the **Save** button.\n3. This generates a window in the bottom left corner of the notebook. After it has finished running, click on the number to the right of the **Save Version** button. This pulls up a list of versions on the right of the screen. Click on the ellipsis **(...)** to the right of the most recent version, and select **Open in Viewer**. This brings you into view mode of the same page. You will need to scroll down to get back to these instructions.\n4. Click on the **Data** tab near the top of the screen. Then, click on the file you would like to submit, and click on the **Submit** button to submit your results to the leaderboard.\n\nYou have now successfully submitted to the competition!\n\nIf you want to keep working to improve your performance, select the **Edit** button in the top right of the screen. Then you can change your code and repeat the process. There's a lot of room to improve, and you will climb up the leaderboard as you work.\n\n\n# Continue Your Progress\nThere are many ways to improve your model, and **experimenting is a great way to learn at this point.**\n\nThe best way to improve your model is to add features. To add more features to the data, revisit the first code cell, and change this line of code to include more column names:\n```python\nfeatures = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'FullBath', 'BedroomAbvGr', 'TotRmsAbvGrd']\n```\n\nSome features will cause errors because of issues like missing values or non-numeric data types. Here is a complete list of potential columns that you might like to use, and that won't throw errors:\n- 'MSSubClass'\n- 'LotArea'\n- 'OverallQual' \n- 'OverallCond' \n- 'YearBuilt'\n- 'YearRemodAdd' \n- '1stFlrSF'\n- '2ndFlrSF' \n- 'LowQualFinSF' \n- 'GrLivArea'\n- 'FullBath'\n- 'HalfBath'\n- 'BedroomAbvGr' \n- 'KitchenAbvGr' \n- 'TotRmsAbvGrd' \n- 'Fireplaces' \n- 'WoodDeckSF' \n- 'OpenPorchSF'\n- 'EnclosedPorch' \n- '3SsnPorch' \n- 'ScreenPorch' \n- 'PoolArea' \n- 'MiscVal' \n- 'MoSold' \n- 'YrSold'\n\nLook at the list of columns and think about what might affect home prices. To learn more about each of these features, take a look at the data description on the **[competition page](https://www.kaggle.com/c/home-data-for-ml-course/data)**.\n\nAfter updating the code cell above that defines the features, re-run all of the code cells to evaluate the model and generate a new submission file. \n\n\n# What's next?\n\nAs mentioned above, some of the features will throw an error if you try to use them to train your model. The **[Intermediate Machine Learning](https://www.kaggle.com/learn/intermediate-machine-learning)** course will teach you how to handle these types of features. You will also learn to use **xgboost**, a technique giving even better accuracy than Random Forest.\n\nThe **[Pandas](https://kaggle.com/Learn/Pandas)** course will give you the data manipulation skills to quickly go from conceptual idea to implementation in your data science projects. \n\nYou are also ready for the **[Deep Learning](https://kaggle.com/Learn/intro-to-Deep-Learning)** course, where you will build models with better-than-human level performance at computer vision tasks.","metadata":{}},{"cell_type":"markdown","source":"---\n\n\n\n\n*Have questions or comments? Visit the [course discussion forum](https://www.kaggle.com/learn/intro-to-machine-learning/discussion) to chat with other learners.*","metadata":{}}]} -------------------------------------------------------------------------------- /ML Practice/ML Mid Task/spam.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 76, 6 | "id": "fbec0516", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 77, 16 | "id": "aa630f42", 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/html": [ 22 | "
\n", 23 | "\n", 36 | "\n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | "
CategoryMessage
0hamGo until jurong point, crazy.. Available only ...
1hamOk lar... Joking wif u oni...
2spamFree entry in 2 a wkly comp to win FA Cup fina...
3hamU dun say so early hor... U c already then say...
4hamNah I don't think he goes to usf, he lives aro...
.........
5567spamThis is the 2nd time we have tried 2 contact u...
5568hamWill ü b going to esplanade fr home?
5569hamPity, * was in mood for that. So...any other s...
5570hamThe guy did some bitching but I acted like i'd...
5571hamRofl. Its true to its name
\n", 102 | "

5572 rows × 2 columns

\n", 103 | "
" 104 | ], 105 | "text/plain": [ 106 | " Category Message\n", 107 | "0 ham Go until jurong point, crazy.. Available only ...\n", 108 | "1 ham Ok lar... Joking wif u oni...\n", 109 | "2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n", 110 | "3 ham U dun say so early hor... U c already then say...\n", 111 | "4 ham Nah I don't think he goes to usf, he lives aro...\n", 112 | "... ... ...\n", 113 | "5567 spam This is the 2nd time we have tried 2 contact u...\n", 114 | "5568 ham Will ü b going to esplanade fr home?\n", 115 | "5569 ham Pity, * was in mood for that. So...any other s...\n", 116 | "5570 ham The guy did some bitching but I acted like i'd...\n", 117 | "5571 ham Rofl. Its true to its name\n", 118 | "\n", 119 | "[5572 rows x 2 columns]" 120 | ] 121 | }, 122 | "execution_count": 77, 123 | "metadata": {}, 124 | "output_type": "execute_result" 125 | } 126 | ], 127 | "source": [ 128 | "data = pd.read_csv(\"spam.csv\")\n", 129 | "data" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 78, 135 | "id": "8d639b7f", 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "data": { 140 | "text/html": [ 141 | "
\n", 142 | "\n", 155 | "\n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | "
CategoryMessagespam
0hamGo until jurong point, crazy.. Available only ...0
1hamOk lar... Joking wif u oni...0
2spamFree entry in 2 a wkly comp to win FA Cup fina...1
3hamU dun say so early hor... U c already then say...0
4hamNah I don't think he goes to usf, he lives aro...0
\n", 197 | "
" 198 | ], 199 | "text/plain": [ 200 | " Category Message spam\n", 201 | "0 ham Go until jurong point, crazy.. Available only ... 0\n", 202 | "1 ham Ok lar... Joking wif u oni... 0\n", 203 | "2 spam Free entry in 2 a wkly comp to win FA Cup fina... 1\n", 204 | "3 ham U dun say so early hor... U c already then say... 0\n", 205 | "4 ham Nah I don't think he goes to usf, he lives aro... 0" 206 | ] 207 | }, 208 | "execution_count": 78, 209 | "metadata": {}, 210 | "output_type": "execute_result" 211 | } 212 | ], 213 | "source": [ 214 | "data[\"spam\"] = data['Category'].apply(lambda x: 1 if x=='spam' else 0)\n", 215 | "data.head()" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 79, 221 | "id": "e48df089", 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "from sklearn.model_selection import train_test_split" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 80, 231 | "id": "d97be7ec", 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "x_train, x_test, y_train, y_test = train_test_split(data.Message,data.spam, test_size=0.2)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 81, 241 | "id": "4abc65a5", 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "from sklearn.feature_extraction.text import CountVectorizer" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 82, 251 | "id": "c20ee513", 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "v = CountVectorizer()\n", 256 | "x_train_count = v.fit_transform(x_train.values)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 83, 262 | "id": "a3952a95", 263 | "metadata": {}, 264 | "outputs": [ 265 | { 266 | "data": { 267 | "text/html": [ 268 | "
MultinomialNB()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" 269 | ], 270 | "text/plain": [ 271 | "MultinomialNB()" 272 | ] 273 | }, 274 | "execution_count": 83, 275 | "metadata": {}, 276 | "output_type": "execute_result" 277 | } 278 | ], 279 | "source": [ 280 | "from sklearn.naive_bayes import MultinomialNB\n", 281 | "model = MultinomialNB()\n", 282 | "model.fit(x_train_count,y_train)" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 84, 288 | "id": "b58eb592", 289 | "metadata": {}, 290 | "outputs": [ 291 | { 292 | "data": { 293 | "text/plain": [ 294 | "array([0, 1], dtype=int64)" 295 | ] 296 | }, 297 | "execution_count": 84, 298 | "metadata": {}, 299 | "output_type": "execute_result" 300 | } 301 | ], 302 | "source": [ 303 | "emails = [\n", 304 | " 'Hey mohan, can we get together to watch footbal game tomorrow?',\n", 305 | " 'Upto 20% discount on parking, exclusive offer just for you. Dont miss this reward!'\n", 306 | "]\n", 307 | "emails_count = v.transform(emails)\n", 308 | "model.predict(emails_count)" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": 85, 314 | "id": "22f14717", 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "x_test_count = v.transform(x_test)\n", 319 | "y_pred = model.predict(x_test_count)" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 99, 325 | "id": "7d21d464", 326 | "metadata": {}, 327 | "outputs": [ 328 | { 329 | "name": "stdout", 330 | "output_type": "stream", 331 | "text": [ 332 | "Total number of messages falsely predicted as spam but were ham: 0\n" 333 | ] 334 | } 335 | ], 336 | "source": [ 337 | "result_df = pd.DataFrame({'True_Label': y_test, 'Predicted_Label': y_pred, 'Email_Text': x_test})\n", 338 | "false_positives = result_df[(result_df['True_Label'] == 'ham') & (result_df['Predicted_Label'] == 'spam')]\n", 339 | "total_false_positives = len(false_positives)\n", 340 | "print(f'Total number of messages falsely predicted as spam but were ham: {total_false_positives}')" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "id": "064f1268", 347 | "metadata": {}, 348 | "outputs": [], 349 | "source": [] 350 | } 351 | ], 352 | "metadata": { 353 | "kernelspec": { 354 | "display_name": "Python 3 (ipykernel)", 355 | "language": "python", 356 | "name": "python3" 357 | }, 358 | "language_info": { 359 | "codemirror_mode": { 360 | "name": "ipython", 361 | "version": 3 362 | }, 363 | "file_extension": ".py", 364 | "mimetype": "text/x-python", 365 | "name": "python", 366 | "nbconvert_exporter": "python", 367 | "pygments_lexer": "ipython3", 368 | "version": "3.11.4" 369 | } 370 | }, 371 | "nbformat": 4, 372 | "nbformat_minor": 5 373 | } 374 | -------------------------------------------------------------------------------- /ML Practice/Class tasks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "4b8905df", 6 | "metadata": {}, 7 | "source": [ 8 | "Task 1: Logistic Regression\n", 9 | "Task Description: Using the famous Iris dataset, train a logistic regression model to classify flowers into the species 'setosa' and 'not-setosa'." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 22, 15 | "id": "cd48c076", 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "name": "stdout", 20 | "output_type": "stream", 21 | "text": [ 22 | "Accuracy: 1.0\n" 23 | ] 24 | } 25 | ], 26 | "source": [ 27 | "from sklearn.datasets import load_iris\n", 28 | "from sklearn.linear_model import LogisticRegression\n", 29 | "from sklearn.model_selection import train_test_split\n", 30 | "from sklearn.metrics import accuracy_score\n", 31 | "\n", 32 | "# Load the Iris dataset\n", 33 | "\n", 34 | "iris = load_iris()\n", 35 | "X = iris.data\n", 36 | "y = iris.target\n", 37 | "\n", 38 | "# Convert the target to binary: 'setosa' or 'not-setosa'\n", 39 | "\n", 40 | "\n", 41 | "y_binary = (y == 0).astype(int)\n", 42 | "\n", 43 | "\n", 44 | "\n", 45 | "\n", 46 | "# Split the data\n", 47 | "\n", 48 | "X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)\n", 49 | "\n", 50 | "\n", 51 | "# Initialize and train a logistic regression classifier\n", 52 | "\n", 53 | "classifier = LogisticRegression()\n", 54 | "\n", 55 | "\n", 56 | "# TODO: Fit the classifier to the training data\n", 57 | "\n", 58 | "classifier.fit(X_train, y_train)\n", 59 | " \n", 60 | "\n", 61 | "# TODO: Predict on the test data\n", 62 | "\n", 63 | "y_pred = classifier.predict(X_test)\n", 64 | "\n", 65 | "# TODO: Calculate and print the accuracy\n", 66 | "\n", 67 | "accuracy = accuracy_score(y_test, y_pred)\n", 68 | "print(\"Accuracy:\", accuracy)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 3, 74 | "id": "7101860d", 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "ename": "NameError", 79 | "evalue": "name 'y_binary' is not defined", 80 | "output_type": "error", 81 | "traceback": [ 82 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 83 | "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", 84 | "Cell \u001b[1;32mIn[3], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m y_binary\n", 85 | "\u001b[1;31mNameError\u001b[0m: name 'y_binary' is not defined" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "y_binary" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "id": "9187fa31", 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 20, 104 | "id": "8679c4de", 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "array([[5.1, 3.5, 1.4, 0.2],\n", 111 | " [4.9, 3. , 1.4, 0.2],\n", 112 | " [4.7, 3.2, 1.3, 0.2],\n", 113 | " [4.6, 3.1, 1.5, 0.2],\n", 114 | " [5. , 3.6, 1.4, 0.2],\n", 115 | " [5.4, 3.9, 1.7, 0.4],\n", 116 | " [4.6, 3.4, 1.4, 0.3],\n", 117 | " [5. , 3.4, 1.5, 0.2],\n", 118 | " [4.4, 2.9, 1.4, 0.2],\n", 119 | " [4.9, 3.1, 1.5, 0.1],\n", 120 | " [5.4, 3.7, 1.5, 0.2],\n", 121 | " [4.8, 3.4, 1.6, 0.2],\n", 122 | " [4.8, 3. , 1.4, 0.1],\n", 123 | " [4.3, 3. , 1.1, 0.1],\n", 124 | " [5.8, 4. , 1.2, 0.2],\n", 125 | " [5.7, 4.4, 1.5, 0.4],\n", 126 | " [5.4, 3.9, 1.3, 0.4],\n", 127 | " [5.1, 3.5, 1.4, 0.3],\n", 128 | " [5.7, 3.8, 1.7, 0.3],\n", 129 | " [5.1, 3.8, 1.5, 0.3],\n", 130 | " [5.4, 3.4, 1.7, 0.2],\n", 131 | " [5.1, 3.7, 1.5, 0.4],\n", 132 | " [4.6, 3.6, 1. , 0.2],\n", 133 | " [5.1, 3.3, 1.7, 0.5],\n", 134 | " [4.8, 3.4, 1.9, 0.2],\n", 135 | " [5. , 3. , 1.6, 0.2],\n", 136 | " [5. , 3.4, 1.6, 0.4],\n", 137 | " [5.2, 3.5, 1.5, 0.2],\n", 138 | " [5.2, 3.4, 1.4, 0.2],\n", 139 | " [4.7, 3.2, 1.6, 0.2],\n", 140 | " [4.8, 3.1, 1.6, 0.2],\n", 141 | " [5.4, 3.4, 1.5, 0.4],\n", 142 | " [5.2, 4.1, 1.5, 0.1],\n", 143 | " [5.5, 4.2, 1.4, 0.2],\n", 144 | " [4.9, 3.1, 1.5, 0.2],\n", 145 | " [5. , 3.2, 1.2, 0.2],\n", 146 | " [5.5, 3.5, 1.3, 0.2],\n", 147 | " [4.9, 3.6, 1.4, 0.1],\n", 148 | " [4.4, 3. , 1.3, 0.2],\n", 149 | " [5.1, 3.4, 1.5, 0.2],\n", 150 | " [5. , 3.5, 1.3, 0.3],\n", 151 | " [4.5, 2.3, 1.3, 0.3],\n", 152 | " [4.4, 3.2, 1.3, 0.2],\n", 153 | " [5. , 3.5, 1.6, 0.6],\n", 154 | " [5.1, 3.8, 1.9, 0.4],\n", 155 | " [4.8, 3. , 1.4, 0.3],\n", 156 | " [5.1, 3.8, 1.6, 0.2],\n", 157 | " [4.6, 3.2, 1.4, 0.2],\n", 158 | " [5.3, 3.7, 1.5, 0.2],\n", 159 | " [5. , 3.3, 1.4, 0.2],\n", 160 | " [7. , 3.2, 4.7, 1.4],\n", 161 | " [6.4, 3.2, 4.5, 1.5],\n", 162 | " [6.9, 3.1, 4.9, 1.5],\n", 163 | " [5.5, 2.3, 4. , 1.3],\n", 164 | " [6.5, 2.8, 4.6, 1.5],\n", 165 | " [5.7, 2.8, 4.5, 1.3],\n", 166 | " [6.3, 3.3, 4.7, 1.6],\n", 167 | " [4.9, 2.4, 3.3, 1. ],\n", 168 | " [6.6, 2.9, 4.6, 1.3],\n", 169 | " [5.2, 2.7, 3.9, 1.4],\n", 170 | " [5. , 2. , 3.5, 1. ],\n", 171 | " [5.9, 3. , 4.2, 1.5],\n", 172 | " [6. , 2.2, 4. , 1. ],\n", 173 | " [6.1, 2.9, 4.7, 1.4],\n", 174 | " [5.6, 2.9, 3.6, 1.3],\n", 175 | " [6.7, 3.1, 4.4, 1.4],\n", 176 | " [5.6, 3. , 4.5, 1.5],\n", 177 | " [5.8, 2.7, 4.1, 1. ],\n", 178 | " [6.2, 2.2, 4.5, 1.5],\n", 179 | " [5.6, 2.5, 3.9, 1.1],\n", 180 | " [5.9, 3.2, 4.8, 1.8],\n", 181 | " [6.1, 2.8, 4. , 1.3],\n", 182 | " [6.3, 2.5, 4.9, 1.5],\n", 183 | " [6.1, 2.8, 4.7, 1.2],\n", 184 | " [6.4, 2.9, 4.3, 1.3],\n", 185 | " [6.6, 3. , 4.4, 1.4],\n", 186 | " [6.8, 2.8, 4.8, 1.4],\n", 187 | " [6.7, 3. , 5. , 1.7],\n", 188 | " [6. , 2.9, 4.5, 1.5],\n", 189 | " [5.7, 2.6, 3.5, 1. ],\n", 190 | " [5.5, 2.4, 3.8, 1.1],\n", 191 | " [5.5, 2.4, 3.7, 1. ],\n", 192 | " [5.8, 2.7, 3.9, 1.2],\n", 193 | " [6. , 2.7, 5.1, 1.6],\n", 194 | " [5.4, 3. , 4.5, 1.5],\n", 195 | " [6. , 3.4, 4.5, 1.6],\n", 196 | " [6.7, 3.1, 4.7, 1.5],\n", 197 | " [6.3, 2.3, 4.4, 1.3],\n", 198 | " [5.6, 3. , 4.1, 1.3],\n", 199 | " [5.5, 2.5, 4. , 1.3],\n", 200 | " [5.5, 2.6, 4.4, 1.2],\n", 201 | " [6.1, 3. , 4.6, 1.4],\n", 202 | " [5.8, 2.6, 4. , 1.2],\n", 203 | " [5. , 2.3, 3.3, 1. ],\n", 204 | " [5.6, 2.7, 4.2, 1.3],\n", 205 | " [5.7, 3. , 4.2, 1.2],\n", 206 | " [5.7, 2.9, 4.2, 1.3],\n", 207 | " [6.2, 2.9, 4.3, 1.3],\n", 208 | " [5.1, 2.5, 3. , 1.1],\n", 209 | " [5.7, 2.8, 4.1, 1.3],\n", 210 | " [6.3, 3.3, 6. , 2.5],\n", 211 | " [5.8, 2.7, 5.1, 1.9],\n", 212 | " [7.1, 3. , 5.9, 2.1],\n", 213 | " [6.3, 2.9, 5.6, 1.8],\n", 214 | " [6.5, 3. , 5.8, 2.2],\n", 215 | " [7.6, 3. , 6.6, 2.1],\n", 216 | " [4.9, 2.5, 4.5, 1.7],\n", 217 | " [7.3, 2.9, 6.3, 1.8],\n", 218 | " [6.7, 2.5, 5.8, 1.8],\n", 219 | " [7.2, 3.6, 6.1, 2.5],\n", 220 | " [6.5, 3.2, 5.1, 2. ],\n", 221 | " [6.4, 2.7, 5.3, 1.9],\n", 222 | " [6.8, 3. , 5.5, 2.1],\n", 223 | " [5.7, 2.5, 5. , 2. ],\n", 224 | " [5.8, 2.8, 5.1, 2.4],\n", 225 | " [6.4, 3.2, 5.3, 2.3],\n", 226 | " [6.5, 3. , 5.5, 1.8],\n", 227 | " [7.7, 3.8, 6.7, 2.2],\n", 228 | " [7.7, 2.6, 6.9, 2.3],\n", 229 | " [6. , 2.2, 5. , 1.5],\n", 230 | " [6.9, 3.2, 5.7, 2.3],\n", 231 | " [5.6, 2.8, 4.9, 2. ],\n", 232 | " [7.7, 2.8, 6.7, 2. ],\n", 233 | " [6.3, 2.7, 4.9, 1.8],\n", 234 | " [6.7, 3.3, 5.7, 2.1],\n", 235 | " [7.2, 3.2, 6. , 1.8],\n", 236 | " [6.2, 2.8, 4.8, 1.8],\n", 237 | " [6.1, 3. , 4.9, 1.8],\n", 238 | " [6.4, 2.8, 5.6, 2.1],\n", 239 | " [7.2, 3. , 5.8, 1.6],\n", 240 | " [7.4, 2.8, 6.1, 1.9],\n", 241 | " [7.9, 3.8, 6.4, 2. ],\n", 242 | " [6.4, 2.8, 5.6, 2.2],\n", 243 | " [6.3, 2.8, 5.1, 1.5],\n", 244 | " [6.1, 2.6, 5.6, 1.4],\n", 245 | " [7.7, 3. , 6.1, 2.3],\n", 246 | " [6.3, 3.4, 5.6, 2.4],\n", 247 | " [6.4, 3.1, 5.5, 1.8],\n", 248 | " [6. , 3. , 4.8, 1.8],\n", 249 | " [6.9, 3.1, 5.4, 2.1],\n", 250 | " [6.7, 3.1, 5.6, 2.4],\n", 251 | " [6.9, 3.1, 5.1, 2.3],\n", 252 | " [5.8, 2.7, 5.1, 1.9],\n", 253 | " [6.8, 3.2, 5.9, 2.3],\n", 254 | " [6.7, 3.3, 5.7, 2.5],\n", 255 | " [6.7, 3. , 5.2, 2.3],\n", 256 | " [6.3, 2.5, 5. , 1.9],\n", 257 | " [6.5, 3. , 5.2, 2. ],\n", 258 | " [6.2, 3.4, 5.4, 2.3],\n", 259 | " [5.9, 3. , 5.1, 1.8]])" 260 | ] 261 | }, 262 | "execution_count": 20, 263 | "metadata": {}, 264 | "output_type": "execute_result" 265 | } 266 | ], 267 | "source": [ 268 | "X" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 21, 274 | "id": "5d90a97e", 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "data": { 279 | "text/plain": [ 280 | "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", 281 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", 282 | " 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 283 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 284 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 285 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 286 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" 287 | ] 288 | }, 289 | "execution_count": 21, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [ 295 | "y_binary" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 23, 301 | "id": "0cab3b00", 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "data": { 306 | "text/plain": [ 307 | "[1,\n", 308 | " 1,\n", 309 | " 1,\n", 310 | " 1,\n", 311 | " 1,\n", 312 | " 1,\n", 313 | " 1,\n", 314 | " 1,\n", 315 | " 1,\n", 316 | " 1,\n", 317 | " 1,\n", 318 | " 1,\n", 319 | " 1,\n", 320 | " 1,\n", 321 | " 1,\n", 322 | " 1,\n", 323 | " 1,\n", 324 | " 1,\n", 325 | " 1,\n", 326 | " 1,\n", 327 | " 1,\n", 328 | " 1,\n", 329 | " 1,\n", 330 | " 1,\n", 331 | " 1,\n", 332 | " 1,\n", 333 | " 1,\n", 334 | " 1,\n", 335 | " 1,\n", 336 | " 1,\n", 337 | " 1,\n", 338 | " 1,\n", 339 | " 1,\n", 340 | " 1,\n", 341 | " 1,\n", 342 | " 1,\n", 343 | " 1,\n", 344 | " 1,\n", 345 | " 1,\n", 346 | " 1,\n", 347 | " 1,\n", 348 | " 1,\n", 349 | " 1,\n", 350 | " 1,\n", 351 | " 1,\n", 352 | " 1,\n", 353 | " 1,\n", 354 | " 1,\n", 355 | " 1,\n", 356 | " 1,\n", 357 | " 0,\n", 358 | " 0,\n", 359 | " 0,\n", 360 | " 0,\n", 361 | " 0,\n", 362 | " 0,\n", 363 | " 0,\n", 364 | " 0,\n", 365 | " 0,\n", 366 | " 0,\n", 367 | " 0,\n", 368 | " 0,\n", 369 | " 0,\n", 370 | " 0,\n", 371 | " 0,\n", 372 | " 0,\n", 373 | " 0,\n", 374 | " 0,\n", 375 | " 0,\n", 376 | " 0,\n", 377 | " 0,\n", 378 | " 0,\n", 379 | " 0,\n", 380 | " 0,\n", 381 | " 0,\n", 382 | " 0,\n", 383 | " 0,\n", 384 | " 0,\n", 385 | " 0,\n", 386 | " 0,\n", 387 | " 0,\n", 388 | " 0,\n", 389 | " 0,\n", 390 | " 0,\n", 391 | " 0,\n", 392 | " 0,\n", 393 | " 0,\n", 394 | " 0,\n", 395 | " 0,\n", 396 | " 0,\n", 397 | " 0,\n", 398 | " 0,\n", 399 | " 0,\n", 400 | " 0,\n", 401 | " 0,\n", 402 | " 0,\n", 403 | " 0,\n", 404 | " 0,\n", 405 | " 0,\n", 406 | " 0,\n", 407 | " 0,\n", 408 | " 0,\n", 409 | " 0,\n", 410 | " 0,\n", 411 | " 0,\n", 412 | " 0,\n", 413 | " 0,\n", 414 | " 0,\n", 415 | " 0,\n", 416 | " 0,\n", 417 | " 0,\n", 418 | " 0,\n", 419 | " 0,\n", 420 | " 0,\n", 421 | " 0,\n", 422 | " 0,\n", 423 | " 0,\n", 424 | " 0,\n", 425 | " 0,\n", 426 | " 0,\n", 427 | " 0,\n", 428 | " 0,\n", 429 | " 0,\n", 430 | " 0,\n", 431 | " 0,\n", 432 | " 0,\n", 433 | " 0,\n", 434 | " 0,\n", 435 | " 0,\n", 436 | " 0,\n", 437 | " 0,\n", 438 | " 0,\n", 439 | " 0,\n", 440 | " 0,\n", 441 | " 0,\n", 442 | " 0,\n", 443 | " 0,\n", 444 | " 0,\n", 445 | " 0,\n", 446 | " 0,\n", 447 | " 0,\n", 448 | " 0,\n", 449 | " 0,\n", 450 | " 0,\n", 451 | " 0,\n", 452 | " 0,\n", 453 | " 0,\n", 454 | " 0,\n", 455 | " 0,\n", 456 | " 0]" 457 | ] 458 | }, 459 | "execution_count": 23, 460 | "metadata": {}, 461 | "output_type": "execute_result" 462 | } 463 | ], 464 | "source": [ 465 | "y_binar" 466 | ] 467 | }, 468 | { 469 | "cell_type": "markdown", 470 | "id": "2c7c558b", 471 | "metadata": {}, 472 | "source": [ 473 | "Task 2: Naïve Bayes\n", 474 | "Task Description: Implement a simple text classification task to classify messages as 'spam' or 'ham' using the Naïve Bayes classifier." 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": 1, 480 | "id": "14ec0724", 481 | "metadata": {}, 482 | "outputs": [ 483 | { 484 | "name": "stdout", 485 | "output_type": "stream", 486 | "text": [ 487 | "Accuracy: 0.0\n" 488 | ] 489 | } 490 | ], 491 | "source": [ 492 | "from sklearn.feature_extraction.text import CountVectorizer\n", 493 | "from sklearn.naive_bayes import MultinomialNB\n", 494 | "from sklearn.model_selection import train_test_split\n", 495 | "from sklearn.metrics import accuracy_score\n", 496 | "\n", 497 | "# Sample data\n", 498 | "messages = [\"win cash now\", \"call me back\", \"important meeting at 10\", \"get free tickets\"]\n", 499 | "labels = [\"spam\", \"ham\", \"ham\", \"spam\"]\n", 500 | "\n", 501 | "# Convert text data to feature vectors\n", 502 | "vectorizer = CountVectorizer()\n", 503 | "X = vectorizer.fit_transform(messages)\n", 504 | "y = labels\n", 505 | "\n", 506 | "# Split the data\n", 507 | "\n", 508 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", 509 | "\n", 510 | "\n", 511 | "# Initialize the Naive Bayes classifier\n", 512 | "\n", 513 | "classifier = MultinomialNB()\n", 514 | "\n", 515 | "\n", 516 | "# TODO: Fit the classifier to the training data\n", 517 | "\n", 518 | "classifier.fit(X_train, y_train)\n", 519 | "\n", 520 | "\n", 521 | "# TODO: Predict on the test data\n", 522 | "\n", 523 | "y_pred = classifier.predict(X_test)\n", 524 | "\n", 525 | "\n", 526 | "# TODO: Calculate and print the accuracy\n", 527 | "\n", 528 | "accuracy = accuracy_score(y_test, y_pred)\n", 529 | "print(\"Accuracy:\", accuracy)\n" 530 | ] 531 | }, 532 | { 533 | "cell_type": "code", 534 | "execution_count": 16, 535 | "id": "764a2756", 536 | "metadata": {}, 537 | "outputs": [ 538 | { 539 | "data": { 540 | "text/plain": [ 541 | "array(['spam', 'spam'], dtype=''\n", 563 | "\twith 13 stored elements in Compressed Sparse Row format>" 564 | ] 565 | }, 566 | "execution_count": 26, 567 | "metadata": {}, 568 | "output_type": "execute_result" 569 | } 570 | ], 571 | "source": [ 572 | "X" 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "execution_count": null, 578 | "id": "841e39b9", 579 | "metadata": {}, 580 | "outputs": [], 581 | "source": [] 582 | } 583 | ], 584 | "metadata": { 585 | "kernelspec": { 586 | "display_name": "Python 3 (ipykernel)", 587 | "language": "python", 588 | "name": "python3" 589 | }, 590 | "language_info": { 591 | "codemirror_mode": { 592 | "name": "ipython", 593 | "version": 3 594 | }, 595 | "file_extension": ".py", 596 | "mimetype": "text/x-python", 597 | "name": "python", 598 | "nbconvert_exporter": "python", 599 | "pygments_lexer": "ipython3", 600 | "version": "3.11.4" 601 | } 602 | }, 603 | "nbformat": 4, 604 | "nbformat_minor": 5 605 | } 606 | -------------------------------------------------------------------------------- /ML Practice/weather.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "id": "46676c95", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 5, 16 | "id": "1d12897e", 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/html": [ 22 | "
\n", 23 | "\n", 36 | "\n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | "
outlooktemperaturehumiditywindyplay
0overcasthothighFalseyes
1overcastcoolnormalTrueyes
2overcastmildhighTrueyes
3overcasthotnormalFalseyes
4rainymildhighFalseyes
5rainycoolnormalFalseyes
6rainycoolnormalTrueno
7rainymildnormalFalseyes
8rainymildhighTrueno
9sunnyhothighFalseno
\n", 130 | "
" 131 | ], 132 | "text/plain": [ 133 | " outlook temperature humidity windy play\n", 134 | "0 overcast hot high False yes\n", 135 | "1 overcast cool normal True yes\n", 136 | "2 overcast mild high True yes\n", 137 | "3 overcast hot normal False yes\n", 138 | "4 rainy mild high False yes\n", 139 | "5 rainy cool normal False yes\n", 140 | "6 rainy cool normal True no\n", 141 | "7 rainy mild normal False yes\n", 142 | "8 rainy mild high True no\n", 143 | "9 sunny hot high False no" 144 | ] 145 | }, 146 | "execution_count": 5, 147 | "metadata": {}, 148 | "output_type": "execute_result" 149 | } 150 | ], 151 | "source": [ 152 | "df = pd.read_csv(\"weather.csv\")\n", 153 | "df.head(10)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 6, 159 | "id": "f8393778", 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "inputs = df.drop('play',axis='columns')" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 7, 169 | "id": "98eae7f8", 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "target = df['play']" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 9, 179 | "id": "32d8c717", 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "from sklearn.preprocessing import LabelEncoder\n", 184 | "le_outlook = LabelEncoder()\n", 185 | "le_temperature = LabelEncoder()\n", 186 | "le_humidity = LabelEncoder()\n", 187 | "le_windy = LabelEncoder()" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 12, 193 | "id": "92d36171", 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "inputs['outlook_n'] = le_outlook.fit_transform(inputs['outlook'])\n", 198 | "inputs['temperature_n'] = le_temperature.fit_transform(inputs['temperature'])\n", 199 | "inputs['humidity_n'] = le_humidity.fit_transform(inputs['humidity'])\n", 200 | "inputs['windy_n'] = le_windy.fit_transform(inputs['windy'])" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 18, 206 | "id": "7f9b1efb", 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "inputs_n = inputs.drop(['outlook','temperature','humidity','windy'],axis='columns')" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 19, 216 | "id": "917d2fc4", 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/html": [ 222 | "
\n", 223 | "\n", 236 | "\n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | "
outlook_ntemperature_nhumidity_nwindy_n
00100
10011
20201
30110
41200
51010
61011
71210
81201
92100
102101
112200
122010
132211
\n", 347 | "
" 348 | ], 349 | "text/plain": [ 350 | " outlook_n temperature_n humidity_n windy_n\n", 351 | "0 0 1 0 0\n", 352 | "1 0 0 1 1\n", 353 | "2 0 2 0 1\n", 354 | "3 0 1 1 0\n", 355 | "4 1 2 0 0\n", 356 | "5 1 0 1 0\n", 357 | "6 1 0 1 1\n", 358 | "7 1 2 1 0\n", 359 | "8 1 2 0 1\n", 360 | "9 2 1 0 0\n", 361 | "10 2 1 0 1\n", 362 | "11 2 2 0 0\n", 363 | "12 2 0 1 0\n", 364 | "13 2 2 1 1" 365 | ] 366 | }, 367 | "execution_count": 19, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | } 371 | ], 372 | "source": [ 373 | "inputs_n\n" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 21, 379 | "id": "1625dc7f", 380 | "metadata": {}, 381 | "outputs": [], 382 | "source": [ 383 | "from sklearn import tree\n", 384 | "model = tree.DecisionTreeClassifier()" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": 22, 390 | "id": "989103e3", 391 | "metadata": {}, 392 | "outputs": [ 393 | { 394 | "data": { 395 | "text/html": [ 396 | "
DecisionTreeClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" 397 | ], 398 | "text/plain": [ 399 | "DecisionTreeClassifier()" 400 | ] 401 | }, 402 | "execution_count": 22, 403 | "metadata": {}, 404 | "output_type": "execute_result" 405 | } 406 | ], 407 | "source": [ 408 | "model.fit(inputs_n, target)" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 23, 414 | "id": "5ed09f14", 415 | "metadata": {}, 416 | "outputs": [ 417 | { 418 | "data": { 419 | "text/plain": [ 420 | "1.0" 421 | ] 422 | }, 423 | "execution_count": 23, 424 | "metadata": {}, 425 | "output_type": "execute_result" 426 | } 427 | ], 428 | "source": [ 429 | "model.score(inputs_n,target)" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 25, 435 | "id": "099a4625", 436 | "metadata": {}, 437 | "outputs": [ 438 | { 439 | "name": "stderr", 440 | "output_type": "stream", 441 | "text": [ 442 | "C:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\base.py:464: UserWarning: X does not have valid feature names, but DecisionTreeClassifier was fitted with feature names\n", 443 | " warnings.warn(\n" 444 | ] 445 | }, 446 | { 447 | "data": { 448 | "text/plain": [ 449 | "array(['no'], dtype=object)" 450 | ] 451 | }, 452 | "execution_count": 25, 453 | "metadata": {}, 454 | "output_type": "execute_result" 455 | } 456 | ], 457 | "source": [ 458 | "model.predict([[2,1,0,1]])" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 26, 464 | "id": "f32ed705", 465 | "metadata": {}, 466 | "outputs": [ 467 | { 468 | "name": "stderr", 469 | "output_type": "stream", 470 | "text": [ 471 | "C:\\ProgramData\\anaconda3\\Lib\\site-packages\\sklearn\\base.py:464: UserWarning: X does not have valid feature names, but DecisionTreeClassifier was fitted with feature names\n", 472 | " warnings.warn(\n" 473 | ] 474 | }, 475 | { 476 | "data": { 477 | "text/plain": [ 478 | "array(['yes'], dtype=object)" 479 | ] 480 | }, 481 | "execution_count": 26, 482 | "metadata": {}, 483 | "output_type": "execute_result" 484 | } 485 | ], 486 | "source": [ 487 | "model.predict([[2,1,1,0]])" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": null, 493 | "id": "4505ba19", 494 | "metadata": {}, 495 | "outputs": [], 496 | "source": [] 497 | } 498 | ], 499 | "metadata": { 500 | "kernelspec": { 501 | "display_name": "Python 3 (ipykernel)", 502 | "language": "python", 503 | "name": "python3" 504 | }, 505 | "language_info": { 506 | "codemirror_mode": { 507 | "name": "ipython", 508 | "version": 3 509 | }, 510 | "file_extension": ".py", 511 | "mimetype": "text/x-python", 512 | "name": "python", 513 | "nbconvert_exporter": "python", 514 | "pygments_lexer": "ipython3", 515 | "version": "3.11.4" 516 | } 517 | }, 518 | "nbformat": 4, 519 | "nbformat_minor": 5 520 | } 521 | -------------------------------------------------------------------------------- /ML Practice/ML Mid Task/SVM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "id": "814666e4", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "from sklearn import datasets" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 12, 17 | "id": "713f4176", 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "data": { 22 | "text/plain": [ 23 | "{'data': array([[5.1, 3.5, 1.4, 0.2],\n", 24 | " [4.9, 3. , 1.4, 0.2],\n", 25 | " [4.7, 3.2, 1.3, 0.2],\n", 26 | " [4.6, 3.1, 1.5, 0.2],\n", 27 | " [5. , 3.6, 1.4, 0.2],\n", 28 | " [5.4, 3.9, 1.7, 0.4],\n", 29 | " [4.6, 3.4, 1.4, 0.3],\n", 30 | " [5. , 3.4, 1.5, 0.2],\n", 31 | " [4.4, 2.9, 1.4, 0.2],\n", 32 | " [4.9, 3.1, 1.5, 0.1],\n", 33 | " [5.4, 3.7, 1.5, 0.2],\n", 34 | " [4.8, 3.4, 1.6, 0.2],\n", 35 | " [4.8, 3. , 1.4, 0.1],\n", 36 | " [4.3, 3. , 1.1, 0.1],\n", 37 | " [5.8, 4. , 1.2, 0.2],\n", 38 | " [5.7, 4.4, 1.5, 0.4],\n", 39 | " [5.4, 3.9, 1.3, 0.4],\n", 40 | " [5.1, 3.5, 1.4, 0.3],\n", 41 | " [5.7, 3.8, 1.7, 0.3],\n", 42 | " [5.1, 3.8, 1.5, 0.3],\n", 43 | " [5.4, 3.4, 1.7, 0.2],\n", 44 | " [5.1, 3.7, 1.5, 0.4],\n", 45 | " [4.6, 3.6, 1. , 0.2],\n", 46 | " [5.1, 3.3, 1.7, 0.5],\n", 47 | " [4.8, 3.4, 1.9, 0.2],\n", 48 | " [5. , 3. , 1.6, 0.2],\n", 49 | " [5. , 3.4, 1.6, 0.4],\n", 50 | " [5.2, 3.5, 1.5, 0.2],\n", 51 | " [5.2, 3.4, 1.4, 0.2],\n", 52 | " [4.7, 3.2, 1.6, 0.2],\n", 53 | " [4.8, 3.1, 1.6, 0.2],\n", 54 | " [5.4, 3.4, 1.5, 0.4],\n", 55 | " [5.2, 4.1, 1.5, 0.1],\n", 56 | " [5.5, 4.2, 1.4, 0.2],\n", 57 | " [4.9, 3.1, 1.5, 0.2],\n", 58 | " [5. , 3.2, 1.2, 0.2],\n", 59 | " [5.5, 3.5, 1.3, 0.2],\n", 60 | " [4.9, 3.6, 1.4, 0.1],\n", 61 | " [4.4, 3. , 1.3, 0.2],\n", 62 | " [5.1, 3.4, 1.5, 0.2],\n", 63 | " [5. , 3.5, 1.3, 0.3],\n", 64 | " [4.5, 2.3, 1.3, 0.3],\n", 65 | " [4.4, 3.2, 1.3, 0.2],\n", 66 | " [5. , 3.5, 1.6, 0.6],\n", 67 | " [5.1, 3.8, 1.9, 0.4],\n", 68 | " [4.8, 3. , 1.4, 0.3],\n", 69 | " [5.1, 3.8, 1.6, 0.2],\n", 70 | " [4.6, 3.2, 1.4, 0.2],\n", 71 | " [5.3, 3.7, 1.5, 0.2],\n", 72 | " [5. , 3.3, 1.4, 0.2],\n", 73 | " [7. , 3.2, 4.7, 1.4],\n", 74 | " [6.4, 3.2, 4.5, 1.5],\n", 75 | " [6.9, 3.1, 4.9, 1.5],\n", 76 | " [5.5, 2.3, 4. , 1.3],\n", 77 | " [6.5, 2.8, 4.6, 1.5],\n", 78 | " [5.7, 2.8, 4.5, 1.3],\n", 79 | " [6.3, 3.3, 4.7, 1.6],\n", 80 | " [4.9, 2.4, 3.3, 1. ],\n", 81 | " [6.6, 2.9, 4.6, 1.3],\n", 82 | " [5.2, 2.7, 3.9, 1.4],\n", 83 | " [5. , 2. , 3.5, 1. ],\n", 84 | " [5.9, 3. , 4.2, 1.5],\n", 85 | " [6. , 2.2, 4. , 1. ],\n", 86 | " [6.1, 2.9, 4.7, 1.4],\n", 87 | " [5.6, 2.9, 3.6, 1.3],\n", 88 | " [6.7, 3.1, 4.4, 1.4],\n", 89 | " [5.6, 3. , 4.5, 1.5],\n", 90 | " [5.8, 2.7, 4.1, 1. ],\n", 91 | " [6.2, 2.2, 4.5, 1.5],\n", 92 | " [5.6, 2.5, 3.9, 1.1],\n", 93 | " [5.9, 3.2, 4.8, 1.8],\n", 94 | " [6.1, 2.8, 4. , 1.3],\n", 95 | " [6.3, 2.5, 4.9, 1.5],\n", 96 | " [6.1, 2.8, 4.7, 1.2],\n", 97 | " [6.4, 2.9, 4.3, 1.3],\n", 98 | " [6.6, 3. , 4.4, 1.4],\n", 99 | " [6.8, 2.8, 4.8, 1.4],\n", 100 | " [6.7, 3. , 5. , 1.7],\n", 101 | " [6. , 2.9, 4.5, 1.5],\n", 102 | " [5.7, 2.6, 3.5, 1. ],\n", 103 | " [5.5, 2.4, 3.8, 1.1],\n", 104 | " [5.5, 2.4, 3.7, 1. ],\n", 105 | " [5.8, 2.7, 3.9, 1.2],\n", 106 | " [6. , 2.7, 5.1, 1.6],\n", 107 | " [5.4, 3. , 4.5, 1.5],\n", 108 | " [6. , 3.4, 4.5, 1.6],\n", 109 | " [6.7, 3.1, 4.7, 1.5],\n", 110 | " [6.3, 2.3, 4.4, 1.3],\n", 111 | " [5.6, 3. , 4.1, 1.3],\n", 112 | " [5.5, 2.5, 4. , 1.3],\n", 113 | " [5.5, 2.6, 4.4, 1.2],\n", 114 | " [6.1, 3. , 4.6, 1.4],\n", 115 | " [5.8, 2.6, 4. , 1.2],\n", 116 | " [5. , 2.3, 3.3, 1. ],\n", 117 | " [5.6, 2.7, 4.2, 1.3],\n", 118 | " [5.7, 3. , 4.2, 1.2],\n", 119 | " [5.7, 2.9, 4.2, 1.3],\n", 120 | " [6.2, 2.9, 4.3, 1.3],\n", 121 | " [5.1, 2.5, 3. , 1.1],\n", 122 | " [5.7, 2.8, 4.1, 1.3],\n", 123 | " [6.3, 3.3, 6. , 2.5],\n", 124 | " [5.8, 2.7, 5.1, 1.9],\n", 125 | " [7.1, 3. , 5.9, 2.1],\n", 126 | " [6.3, 2.9, 5.6, 1.8],\n", 127 | " [6.5, 3. , 5.8, 2.2],\n", 128 | " [7.6, 3. , 6.6, 2.1],\n", 129 | " [4.9, 2.5, 4.5, 1.7],\n", 130 | " [7.3, 2.9, 6.3, 1.8],\n", 131 | " [6.7, 2.5, 5.8, 1.8],\n", 132 | " [7.2, 3.6, 6.1, 2.5],\n", 133 | " [6.5, 3.2, 5.1, 2. ],\n", 134 | " [6.4, 2.7, 5.3, 1.9],\n", 135 | " [6.8, 3. , 5.5, 2.1],\n", 136 | " [5.7, 2.5, 5. , 2. ],\n", 137 | " [5.8, 2.8, 5.1, 2.4],\n", 138 | " [6.4, 3.2, 5.3, 2.3],\n", 139 | " [6.5, 3. , 5.5, 1.8],\n", 140 | " [7.7, 3.8, 6.7, 2.2],\n", 141 | " [7.7, 2.6, 6.9, 2.3],\n", 142 | " [6. , 2.2, 5. , 1.5],\n", 143 | " [6.9, 3.2, 5.7, 2.3],\n", 144 | " [5.6, 2.8, 4.9, 2. ],\n", 145 | " [7.7, 2.8, 6.7, 2. ],\n", 146 | " [6.3, 2.7, 4.9, 1.8],\n", 147 | " [6.7, 3.3, 5.7, 2.1],\n", 148 | " [7.2, 3.2, 6. , 1.8],\n", 149 | " [6.2, 2.8, 4.8, 1.8],\n", 150 | " [6.1, 3. , 4.9, 1.8],\n", 151 | " [6.4, 2.8, 5.6, 2.1],\n", 152 | " [7.2, 3. , 5.8, 1.6],\n", 153 | " [7.4, 2.8, 6.1, 1.9],\n", 154 | " [7.9, 3.8, 6.4, 2. ],\n", 155 | " [6.4, 2.8, 5.6, 2.2],\n", 156 | " [6.3, 2.8, 5.1, 1.5],\n", 157 | " [6.1, 2.6, 5.6, 1.4],\n", 158 | " [7.7, 3. , 6.1, 2.3],\n", 159 | " [6.3, 3.4, 5.6, 2.4],\n", 160 | " [6.4, 3.1, 5.5, 1.8],\n", 161 | " [6. , 3. , 4.8, 1.8],\n", 162 | " [6.9, 3.1, 5.4, 2.1],\n", 163 | " [6.7, 3.1, 5.6, 2.4],\n", 164 | " [6.9, 3.1, 5.1, 2.3],\n", 165 | " [5.8, 2.7, 5.1, 1.9],\n", 166 | " [6.8, 3.2, 5.9, 2.3],\n", 167 | " [6.7, 3.3, 5.7, 2.5],\n", 168 | " [6.7, 3. , 5.2, 2.3],\n", 169 | " [6.3, 2.5, 5. , 1.9],\n", 170 | " [6.5, 3. , 5.2, 2. ],\n", 171 | " [6.2, 3.4, 5.4, 2.3],\n", 172 | " [5.9, 3. , 5.1, 1.8]]),\n", 173 | " 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 174 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 175 | " 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", 176 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", 177 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", 178 | " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", 179 | " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]),\n", 180 | " 'frame': None,\n", 181 | " 'target_names': array(['setosa', 'versicolor', 'virginica'], dtype='#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}
SVC(kernel='linear')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" 262 | ], 263 | "text/plain": [ 264 | "SVC(kernel='linear')" 265 | ] 266 | }, 267 | "execution_count": 19, 268 | "metadata": {}, 269 | "output_type": "execute_result" 270 | } 271 | ], 272 | "source": [ 273 | "clf.fit(X_train, y_train)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 20, 279 | "id": "6a8fbcb7", 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [ 283 | "y_pred = clf.predict(X_test)" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 25, 289 | "id": "cf8a534a", 290 | "metadata": {}, 291 | "outputs": [ 292 | { 293 | "name": "stdout", 294 | "output_type": "stream", 295 | "text": [ 296 | "Test Instance 2: Sepal Length = 5.7, Sepal Width = 3.8\n", 297 | "Test Instance 6: Sepal Length = 5.4, Sepal Width = 3.4\n", 298 | "Test Instance 12: Sepal Length = 4.8, Sepal Width = 3.0\n", 299 | "Test Instance 13: Sepal Length = 5.5, Sepal Width = 3.5\n", 300 | "Test Instance 14: Sepal Length = 4.9, Sepal Width = 3.1\n", 301 | "Test Instance 15: Sepal Length = 5.1, Sepal Width = 3.8\n", 302 | "Test Instance 21: Sepal Length = 4.7, Sepal Width = 3.2\n", 303 | "Test Instance 23: Sepal Length = 5.0, Sepal Width = 3.4\n", 304 | "Test Instance 29: Sepal Length = 4.8, Sepal Width = 3.0\n", 305 | "Test Instance 30: Sepal Length = 4.8, Sepal Width = 3.1\n" 306 | ] 307 | } 308 | ], 309 | "source": [ 310 | "for i in range(len(X_test)):\n", 311 | " if y_pred[i] == 0:\n", 312 | " sepal_length, sepal_width = X_test[i][:2]\n", 313 | " print(f\"Test Instance {i + 1}: Sepal Length = {sepal_length}, Sepal Width = {sepal_width}\")" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 27, 319 | "id": "ef2d637e", 320 | "metadata": {}, 321 | "outputs": [], 322 | "source": [ 323 | "from sklearn.metrics import accuracy_score" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 28, 329 | "id": "e639a9ad", 330 | "metadata": {}, 331 | "outputs": [ 332 | { 333 | "name": "stdout", 334 | "output_type": "stream", 335 | "text": [ 336 | "Accuracy: 1.0\n" 337 | ] 338 | } 339 | ], 340 | "source": [ 341 | "accuracy = accuracy_score(y_test, y_pred)\n", 342 | "print(F'Accuracy: {accuracy}')" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "id": "02ea6879", 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [] 352 | } 353 | ], 354 | "metadata": { 355 | "kernelspec": { 356 | "display_name": "Python 3 (ipykernel)", 357 | "language": "python", 358 | "name": "python3" 359 | }, 360 | "language_info": { 361 | "codemirror_mode": { 362 | "name": "ipython", 363 | "version": 3 364 | }, 365 | "file_extension": ".py", 366 | "mimetype": "text/x-python", 367 | "name": "python", 368 | "nbconvert_exporter": "python", 369 | "pygments_lexer": "ipython3", 370 | "version": "3.11.4" 371 | } 372 | }, 373 | "nbformat": 4, 374 | "nbformat_minor": 5 375 | } 376 | -------------------------------------------------------------------------------- /Support Vector Machine/SVM (1).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "id": "814666e4", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "from sklearn import datasets" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 12, 17 | "id": "713f4176", 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "data": { 22 | "text/plain": [ 23 | "{'data': array([[5.1, 3.5, 1.4, 0.2],\n", 24 | " [4.9, 3. , 1.4, 0.2],\n", 25 | " [4.7, 3.2, 1.3, 0.2],\n", 26 | " [4.6, 3.1, 1.5, 0.2],\n", 27 | " [5. , 3.6, 1.4, 0.2],\n", 28 | " [5.4, 3.9, 1.7, 0.4],\n", 29 | " [4.6, 3.4, 1.4, 0.3],\n", 30 | " [5. , 3.4, 1.5, 0.2],\n", 31 | " [4.4, 2.9, 1.4, 0.2],\n", 32 | " [4.9, 3.1, 1.5, 0.1],\n", 33 | " [5.4, 3.7, 1.5, 0.2],\n", 34 | " [4.8, 3.4, 1.6, 0.2],\n", 35 | " [4.8, 3. , 1.4, 0.1],\n", 36 | " [4.3, 3. , 1.1, 0.1],\n", 37 | " [5.8, 4. , 1.2, 0.2],\n", 38 | " [5.7, 4.4, 1.5, 0.4],\n", 39 | " [5.4, 3.9, 1.3, 0.4],\n", 40 | " [5.1, 3.5, 1.4, 0.3],\n", 41 | " [5.7, 3.8, 1.7, 0.3],\n", 42 | " [5.1, 3.8, 1.5, 0.3],\n", 43 | " [5.4, 3.4, 1.7, 0.2],\n", 44 | " [5.1, 3.7, 1.5, 0.4],\n", 45 | " [4.6, 3.6, 1. , 0.2],\n", 46 | " [5.1, 3.3, 1.7, 0.5],\n", 47 | " [4.8, 3.4, 1.9, 0.2],\n", 48 | " [5. , 3. , 1.6, 0.2],\n", 49 | " [5. , 3.4, 1.6, 0.4],\n", 50 | " [5.2, 3.5, 1.5, 0.2],\n", 51 | " [5.2, 3.4, 1.4, 0.2],\n", 52 | " [4.7, 3.2, 1.6, 0.2],\n", 53 | " [4.8, 3.1, 1.6, 0.2],\n", 54 | " [5.4, 3.4, 1.5, 0.4],\n", 55 | " [5.2, 4.1, 1.5, 0.1],\n", 56 | " [5.5, 4.2, 1.4, 0.2],\n", 57 | " [4.9, 3.1, 1.5, 0.2],\n", 58 | " [5. , 3.2, 1.2, 0.2],\n", 59 | " [5.5, 3.5, 1.3, 0.2],\n", 60 | " [4.9, 3.6, 1.4, 0.1],\n", 61 | " [4.4, 3. , 1.3, 0.2],\n", 62 | " [5.1, 3.4, 1.5, 0.2],\n", 63 | " [5. , 3.5, 1.3, 0.3],\n", 64 | " [4.5, 2.3, 1.3, 0.3],\n", 65 | " [4.4, 3.2, 1.3, 0.2],\n", 66 | " [5. , 3.5, 1.6, 0.6],\n", 67 | " [5.1, 3.8, 1.9, 0.4],\n", 68 | " [4.8, 3. , 1.4, 0.3],\n", 69 | " [5.1, 3.8, 1.6, 0.2],\n", 70 | " [4.6, 3.2, 1.4, 0.2],\n", 71 | " [5.3, 3.7, 1.5, 0.2],\n", 72 | " [5. , 3.3, 1.4, 0.2],\n", 73 | " [7. , 3.2, 4.7, 1.4],\n", 74 | " [6.4, 3.2, 4.5, 1.5],\n", 75 | " [6.9, 3.1, 4.9, 1.5],\n", 76 | " [5.5, 2.3, 4. , 1.3],\n", 77 | " [6.5, 2.8, 4.6, 1.5],\n", 78 | " [5.7, 2.8, 4.5, 1.3],\n", 79 | " [6.3, 3.3, 4.7, 1.6],\n", 80 | " [4.9, 2.4, 3.3, 1. ],\n", 81 | " [6.6, 2.9, 4.6, 1.3],\n", 82 | " [5.2, 2.7, 3.9, 1.4],\n", 83 | " [5. , 2. , 3.5, 1. ],\n", 84 | " [5.9, 3. , 4.2, 1.5],\n", 85 | " [6. , 2.2, 4. , 1. ],\n", 86 | " [6.1, 2.9, 4.7, 1.4],\n", 87 | " [5.6, 2.9, 3.6, 1.3],\n", 88 | " [6.7, 3.1, 4.4, 1.4],\n", 89 | " [5.6, 3. , 4.5, 1.5],\n", 90 | " [5.8, 2.7, 4.1, 1. ],\n", 91 | " [6.2, 2.2, 4.5, 1.5],\n", 92 | " [5.6, 2.5, 3.9, 1.1],\n", 93 | " [5.9, 3.2, 4.8, 1.8],\n", 94 | " [6.1, 2.8, 4. , 1.3],\n", 95 | " [6.3, 2.5, 4.9, 1.5],\n", 96 | " [6.1, 2.8, 4.7, 1.2],\n", 97 | " [6.4, 2.9, 4.3, 1.3],\n", 98 | " [6.6, 3. , 4.4, 1.4],\n", 99 | " [6.8, 2.8, 4.8, 1.4],\n", 100 | " [6.7, 3. , 5. , 1.7],\n", 101 | " [6. , 2.9, 4.5, 1.5],\n", 102 | " [5.7, 2.6, 3.5, 1. ],\n", 103 | " [5.5, 2.4, 3.8, 1.1],\n", 104 | " [5.5, 2.4, 3.7, 1. ],\n", 105 | " [5.8, 2.7, 3.9, 1.2],\n", 106 | " [6. , 2.7, 5.1, 1.6],\n", 107 | " [5.4, 3. , 4.5, 1.5],\n", 108 | " [6. , 3.4, 4.5, 1.6],\n", 109 | " [6.7, 3.1, 4.7, 1.5],\n", 110 | " [6.3, 2.3, 4.4, 1.3],\n", 111 | " [5.6, 3. , 4.1, 1.3],\n", 112 | " [5.5, 2.5, 4. , 1.3],\n", 113 | " [5.5, 2.6, 4.4, 1.2],\n", 114 | " [6.1, 3. , 4.6, 1.4],\n", 115 | " [5.8, 2.6, 4. , 1.2],\n", 116 | " [5. , 2.3, 3.3, 1. ],\n", 117 | " [5.6, 2.7, 4.2, 1.3],\n", 118 | " [5.7, 3. , 4.2, 1.2],\n", 119 | " [5.7, 2.9, 4.2, 1.3],\n", 120 | " [6.2, 2.9, 4.3, 1.3],\n", 121 | " [5.1, 2.5, 3. , 1.1],\n", 122 | " [5.7, 2.8, 4.1, 1.3],\n", 123 | " [6.3, 3.3, 6. , 2.5],\n", 124 | " [5.8, 2.7, 5.1, 1.9],\n", 125 | " [7.1, 3. , 5.9, 2.1],\n", 126 | " [6.3, 2.9, 5.6, 1.8],\n", 127 | " [6.5, 3. , 5.8, 2.2],\n", 128 | " [7.6, 3. , 6.6, 2.1],\n", 129 | " [4.9, 2.5, 4.5, 1.7],\n", 130 | " [7.3, 2.9, 6.3, 1.8],\n", 131 | " [6.7, 2.5, 5.8, 1.8],\n", 132 | " [7.2, 3.6, 6.1, 2.5],\n", 133 | " [6.5, 3.2, 5.1, 2. ],\n", 134 | " [6.4, 2.7, 5.3, 1.9],\n", 135 | " [6.8, 3. , 5.5, 2.1],\n", 136 | " [5.7, 2.5, 5. , 2. ],\n", 137 | " [5.8, 2.8, 5.1, 2.4],\n", 138 | " [6.4, 3.2, 5.3, 2.3],\n", 139 | " [6.5, 3. , 5.5, 1.8],\n", 140 | " [7.7, 3.8, 6.7, 2.2],\n", 141 | " [7.7, 2.6, 6.9, 2.3],\n", 142 | " [6. , 2.2, 5. , 1.5],\n", 143 | " [6.9, 3.2, 5.7, 2.3],\n", 144 | " [5.6, 2.8, 4.9, 2. ],\n", 145 | " [7.7, 2.8, 6.7, 2. ],\n", 146 | " [6.3, 2.7, 4.9, 1.8],\n", 147 | " [6.7, 3.3, 5.7, 2.1],\n", 148 | " [7.2, 3.2, 6. , 1.8],\n", 149 | " [6.2, 2.8, 4.8, 1.8],\n", 150 | " [6.1, 3. , 4.9, 1.8],\n", 151 | " [6.4, 2.8, 5.6, 2.1],\n", 152 | " [7.2, 3. , 5.8, 1.6],\n", 153 | " [7.4, 2.8, 6.1, 1.9],\n", 154 | " [7.9, 3.8, 6.4, 2. ],\n", 155 | " [6.4, 2.8, 5.6, 2.2],\n", 156 | " [6.3, 2.8, 5.1, 1.5],\n", 157 | " [6.1, 2.6, 5.6, 1.4],\n", 158 | " [7.7, 3. , 6.1, 2.3],\n", 159 | " [6.3, 3.4, 5.6, 2.4],\n", 160 | " [6.4, 3.1, 5.5, 1.8],\n", 161 | " [6. , 3. , 4.8, 1.8],\n", 162 | " [6.9, 3.1, 5.4, 2.1],\n", 163 | " [6.7, 3.1, 5.6, 2.4],\n", 164 | " [6.9, 3.1, 5.1, 2.3],\n", 165 | " [5.8, 2.7, 5.1, 1.9],\n", 166 | " [6.8, 3.2, 5.9, 2.3],\n", 167 | " [6.7, 3.3, 5.7, 2.5],\n", 168 | " [6.7, 3. , 5.2, 2.3],\n", 169 | " [6.3, 2.5, 5. , 1.9],\n", 170 | " [6.5, 3. , 5.2, 2. ],\n", 171 | " [6.2, 3.4, 5.4, 2.3],\n", 172 | " [5.9, 3. , 5.1, 1.8]]),\n", 173 | " 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 174 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 175 | " 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", 176 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", 177 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", 178 | " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", 179 | " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]),\n", 180 | " 'frame': None,\n", 181 | " 'target_names': array(['setosa', 'versicolor', 'virginica'], dtype='#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}
SVC(kernel='linear')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" 262 | ], 263 | "text/plain": [ 264 | "SVC(kernel='linear')" 265 | ] 266 | }, 267 | "execution_count": 19, 268 | "metadata": {}, 269 | "output_type": "execute_result" 270 | } 271 | ], 272 | "source": [ 273 | "clf.fit(X_train, y_train)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 20, 279 | "id": "6a8fbcb7", 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [ 283 | "y_pred = clf.predict(X_test)" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 25, 289 | "id": "cf8a534a", 290 | "metadata": {}, 291 | "outputs": [ 292 | { 293 | "name": "stdout", 294 | "output_type": "stream", 295 | "text": [ 296 | "Test Instance 2: Sepal Length = 5.7, Sepal Width = 3.8\n", 297 | "Test Instance 6: Sepal Length = 5.4, Sepal Width = 3.4\n", 298 | "Test Instance 12: Sepal Length = 4.8, Sepal Width = 3.0\n", 299 | "Test Instance 13: Sepal Length = 5.5, Sepal Width = 3.5\n", 300 | "Test Instance 14: Sepal Length = 4.9, Sepal Width = 3.1\n", 301 | "Test Instance 15: Sepal Length = 5.1, Sepal Width = 3.8\n", 302 | "Test Instance 21: Sepal Length = 4.7, Sepal Width = 3.2\n", 303 | "Test Instance 23: Sepal Length = 5.0, Sepal Width = 3.4\n", 304 | "Test Instance 29: Sepal Length = 4.8, Sepal Width = 3.0\n", 305 | "Test Instance 30: Sepal Length = 4.8, Sepal Width = 3.1\n" 306 | ] 307 | } 308 | ], 309 | "source": [ 310 | "for i in range(len(X_test)):\n", 311 | " if y_pred[i] == 0:\n", 312 | " sepal_length, sepal_width = X_test[i][:2]\n", 313 | " print(f\"Test Instance {i + 1}: Sepal Length = {sepal_length}, Sepal Width = {sepal_width}\")" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 27, 319 | "id": "ef2d637e", 320 | "metadata": {}, 321 | "outputs": [], 322 | "source": [ 323 | "from sklearn.metrics import accuracy_score" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 28, 329 | "id": "e639a9ad", 330 | "metadata": {}, 331 | "outputs": [ 332 | { 333 | "name": "stdout", 334 | "output_type": "stream", 335 | "text": [ 336 | "Accuracy: 1.0\n" 337 | ] 338 | } 339 | ], 340 | "source": [ 341 | "accuracy = accuracy_score(y_test, y_pred)\n", 342 | "print(F'Accuracy: {accuracy}')" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "id": "02ea6879", 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [] 352 | } 353 | ], 354 | "metadata": { 355 | "kernelspec": { 356 | "display_name": "Python 3 (ipykernel)", 357 | "language": "python", 358 | "name": "python3" 359 | }, 360 | "language_info": { 361 | "codemirror_mode": { 362 | "name": "ipython", 363 | "version": 3 364 | }, 365 | "file_extension": ".py", 366 | "mimetype": "text/x-python", 367 | "name": "python", 368 | "nbconvert_exporter": "python", 369 | "pygments_lexer": "ipython3", 370 | "version": "3.11.4" 371 | } 372 | }, 373 | "nbformat": 4, 374 | "nbformat_minor": 5 375 | } 376 | -------------------------------------------------------------------------------- /ML Practice/Ridge_Lasso_diff .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 14, 6 | "id": "f428f6bf", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 15, 16 | "id": "411f5545", 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/html": [ 22 | "
\n", 23 | "\n", 36 | "\n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | "
Unnamed: 0PriceAreaLocationbhkGYM
006200000400Airoli10
12149000001245Airoli20
23140000001183Airoli21
34148000001245Airoli20
456400000495Airoli10
\n", 96 | "
" 97 | ], 98 | "text/plain": [ 99 | " Unnamed: 0 Price Area Location bhk GYM\n", 100 | "0 0 6200000 400 Airoli 1 0\n", 101 | "1 2 14900000 1245 Airoli 2 0\n", 102 | "2 3 14000000 1183 Airoli 2 1\n", 103 | "3 4 14800000 1245 Airoli 2 0\n", 104 | "4 5 6400000 495 Airoli 1 0" 105 | ] 106 | }, 107 | "execution_count": 15, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": [ 113 | "data = pd.read_csv(\"cleaned_data.csv\")\n", 114 | "data.head()" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 17, 120 | "id": "96f362c3", 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "X = data.drop(columns=['Price', 'Location'])\n", 125 | "Y = data['Price']" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 18, 131 | "id": "30650a92", 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "from sklearn.model_selection import train_test_split" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 19, 141 | "id": "a95e2351", 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=0)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 20, 151 | "id": "03c0e775", 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | "(2946, 4)\n", 159 | "(737, 4)\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "print(X_train.shape)\n", 165 | "print(X_test.shape)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 21, 171 | "id": "c65668d2", 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "from sklearn.linear_model import LinearRegression,Lasso,Ridge" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 22, 181 | "id": "45b091ac", 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "lr = LinearRegression()" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 24, 191 | "id": "c4803916", 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/html": [ 197 | "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" 198 | ], 199 | "text/plain": [ 200 | "LinearRegression()" 201 | ] 202 | }, 203 | "execution_count": 24, 204 | "metadata": {}, 205 | "output_type": "execute_result" 206 | } 207 | ], 208 | "source": [ 209 | "lr.fit(X_train,Y_train)" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 26, 215 | "id": "ef9ca0f7", 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "Y_pred= lr.predict(X_test)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 28, 225 | "id": "0c2ef03e", 226 | "metadata": {}, 227 | "outputs": [ 228 | { 229 | "data": { 230 | "text/plain": [ 231 | "0.6445022268229081" 232 | ] 233 | }, 234 | "execution_count": 28, 235 | "metadata": {}, 236 | "output_type": "execute_result" 237 | } 238 | ], 239 | "source": [ 240 | "from sklearn.metrics import r2_score\n", 241 | "r2_score(Y_test, Y_pred)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 29, 247 | "id": "2c79ea36", 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "lasso= Lasso()" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 30, 257 | "id": "718b6d31", 258 | "metadata": {}, 259 | "outputs": [ 260 | { 261 | "data": { 262 | "text/html": [ 263 | "
Lasso()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" 264 | ], 265 | "text/plain": [ 266 | "Lasso()" 267 | ] 268 | }, 269 | "execution_count": 30, 270 | "metadata": {}, 271 | "output_type": "execute_result" 272 | } 273 | ], 274 | "source": [ 275 | "lasso.fit(X_train,Y_train)" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 31, 281 | "id": "8ac27c45", 282 | "metadata": {}, 283 | "outputs": [ 284 | { 285 | "data": { 286 | "text/plain": [ 287 | "0.6445022486626868" 288 | ] 289 | }, 290 | "execution_count": 31, 291 | "metadata": {}, 292 | "output_type": "execute_result" 293 | } 294 | ], 295 | "source": [ 296 | "Y_pred_lasso= lasso.predict(X_test)\n", 297 | "r2_score(Y_test,Y_pred_lasso)" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 32, 303 | "id": "de126539", 304 | "metadata": {}, 305 | "outputs": [], 306 | "source": [ 307 | "ridge= Ridge()" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 33, 313 | "id": "e5372e8f", 314 | "metadata": {}, 315 | "outputs": [ 316 | { 317 | "data": { 318 | "text/html": [ 319 | "
Ridge()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" 320 | ], 321 | "text/plain": [ 322 | "Ridge()" 323 | ] 324 | }, 325 | "execution_count": 33, 326 | "metadata": {}, 327 | "output_type": "execute_result" 328 | } 329 | ], 330 | "source": [ 331 | "ridge.fit(X_train,Y_train)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 34, 337 | "id": "f4cc9e54", 338 | "metadata": {}, 339 | "outputs": [ 340 | { 341 | "data": { 342 | "text/plain": [ 343 | "0.6445226820678371" 344 | ] 345 | }, 346 | "execution_count": 34, 347 | "metadata": {}, 348 | "output_type": "execute_result" 349 | } 350 | ], 351 | "source": [ 352 | "Y_pred_ridge= ridge.predict(X_test)\n", 353 | "r2_score(Y_test,Y_pred_ridge)" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 35, 359 | "id": "2d97bcd7", 360 | "metadata": {}, 361 | "outputs": [ 362 | { 363 | "name": "stdout", 364 | "output_type": "stream", 365 | "text": [ 366 | "No Regularization: 0.6445022268229081\n", 367 | "Lasso: 0.6445022486626868\n", 368 | "Ridge: 0.6445226820678371\n" 369 | ] 370 | } 371 | ], 372 | "source": [ 373 | "print(\"No Regularization: \",r2_score(Y_test,Y_pred))\n", 374 | "print(\"Lasso: \", r2_score(Y_test,Y_pred_lasso))\n", 375 | "print(\"Ridge: \", r2_score(Y_test,Y_pred_ridge))" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "id": "a4f30cc6", 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [] 385 | } 386 | ], 387 | "metadata": { 388 | "kernelspec": { 389 | "display_name": "Python 3 (ipykernel)", 390 | "language": "python", 391 | "name": "python3" 392 | }, 393 | "language_info": { 394 | "codemirror_mode": { 395 | "name": "ipython", 396 | "version": 3 397 | }, 398 | "file_extension": ".py", 399 | "mimetype": "text/x-python", 400 | "name": "python", 401 | "nbconvert_exporter": "python", 402 | "pygments_lexer": "ipython3", 403 | "version": "3.11.5" 404 | } 405 | }, 406 | "nbformat": 4, 407 | "nbformat_minor": 5 408 | } 409 | -------------------------------------------------------------------------------- /ML Practice/spam.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "id": "7b990faf", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 6, 16 | "id": "4791fbc1", 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/html": [ 22 | "
\n", 23 | "\n", 36 | "\n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | "
CategoryMessage
0hamGo until jurong point, crazy.. Available only ...
1hamOk lar... Joking wif u oni...
2spamFree entry in 2 a wkly comp to win FA Cup fina...
3hamU dun say so early hor... U c already then say...
4hamNah I don't think he goes to usf, he lives aro...
\n", 72 | "
" 73 | ], 74 | "text/plain": [ 75 | " Category Message\n", 76 | "0 ham Go until jurong point, crazy.. Available only ...\n", 77 | "1 ham Ok lar... Joking wif u oni...\n", 78 | "2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n", 79 | "3 ham U dun say so early hor... U c already then say...\n", 80 | "4 ham Nah I don't think he goes to usf, he lives aro..." 81 | ] 82 | }, 83 | "execution_count": 6, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "df = pd.read_csv(\"spam.csv\")\n", 90 | "df.head()" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 7, 96 | "id": "6722e4b5", 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/html": [ 102 | "
\n", 103 | "\n", 120 | "\n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | "
Message
countuniquetopfreq
Category
ham48254516Sorry, I'll call later30
spam747641Please call our customer service representativ...4
\n", 158 | "
" 159 | ], 160 | "text/plain": [ 161 | " Message \\\n", 162 | " count unique top \n", 163 | "Category \n", 164 | "ham 4825 4516 Sorry, I'll call later \n", 165 | "spam 747 641 Please call our customer service representativ... \n", 166 | "\n", 167 | " \n", 168 | " freq \n", 169 | "Category \n", 170 | "ham 30 \n", 171 | "spam 4 " 172 | ] 173 | }, 174 | "execution_count": 7, 175 | "metadata": {}, 176 | "output_type": "execute_result" 177 | } 178 | ], 179 | "source": [ 180 | "df.groupby('Category').describe()" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 9, 186 | "id": "778a219e", 187 | "metadata": {}, 188 | "outputs": [ 189 | { 190 | "data": { 191 | "text/html": [ 192 | "
\n", 193 | "\n", 206 | "\n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | "
CategoryMessagespam
0hamGo until jurong point, crazy.. Available only ...0
1hamOk lar... Joking wif u oni...0
2spamFree entry in 2 a wkly comp to win FA Cup fina...1
3hamU dun say so early hor... U c already then say...0
4hamNah I don't think he goes to usf, he lives aro...0
\n", 248 | "
" 249 | ], 250 | "text/plain": [ 251 | " Category Message spam\n", 252 | "0 ham Go until jurong point, crazy.. Available only ... 0\n", 253 | "1 ham Ok lar... Joking wif u oni... 0\n", 254 | "2 spam Free entry in 2 a wkly comp to win FA Cup fina... 1\n", 255 | "3 ham U dun say so early hor... U c already then say... 0\n", 256 | "4 ham Nah I don't think he goes to usf, he lives aro... 0" 257 | ] 258 | }, 259 | "execution_count": 9, 260 | "metadata": {}, 261 | "output_type": "execute_result" 262 | } 263 | ], 264 | "source": [ 265 | "df[\"spam\"] = df['Category'].apply(lambda x: 1 if x=='spam' else 0)\n", 266 | "df.head()" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 12, 272 | "id": "43845d3a", 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "from sklearn.model_selection import train_test_split" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 13, 282 | "id": "9c4ee2ba", 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "x_train, x_test, y_train, y_test = train_test_split(df.Message,df.spam, test_size=0.2)" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 17, 292 | "id": "005bf831", 293 | "metadata": {}, 294 | "outputs": [ 295 | { 296 | "data": { 297 | "text/plain": [ 298 | "array([[0, 0, 0, ..., 0, 0, 0],\n", 299 | " [0, 0, 0, ..., 0, 0, 0],\n", 300 | " [0, 0, 0, ..., 0, 0, 0]], dtype=int64)" 301 | ] 302 | }, 303 | "execution_count": 17, 304 | "metadata": {}, 305 | "output_type": "execute_result" 306 | } 307 | ], 308 | "source": [ 309 | "from sklearn.feature_extraction.text import CountVectorizer\n", 310 | "v = CountVectorizer()\n", 311 | "x_train_count = v.fit_transform(x_train.values)\n", 312 | "x_train_count.toarray()[:3]" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 18, 318 | "id": "80aaf80e", 319 | "metadata": {}, 320 | "outputs": [ 321 | { 322 | "data": { 323 | "text/html": [ 324 | "
MultinomialNB()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" 325 | ], 326 | "text/plain": [ 327 | "MultinomialNB()" 328 | ] 329 | }, 330 | "execution_count": 18, 331 | "metadata": {}, 332 | "output_type": "execute_result" 333 | } 334 | ], 335 | "source": [ 336 | "from sklearn.naive_bayes import MultinomialNB\n", 337 | "model = MultinomialNB()\n", 338 | "model.fit(x_train_count,y_train)" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 19, 344 | "id": "f59aa048", 345 | "metadata": {}, 346 | "outputs": [ 347 | { 348 | "data": { 349 | "text/plain": [ 350 | "array([0, 1], dtype=int64)" 351 | ] 352 | }, 353 | "execution_count": 19, 354 | "metadata": {}, 355 | "output_type": "execute_result" 356 | } 357 | ], 358 | "source": [ 359 | "emails = [\n", 360 | " 'Hey mohan, can we get together to watch footbal game tomorrow?',\n", 361 | " 'Upto 20% discount on parking, exclusive offer just for you. Dont miss this reward!'\n", 362 | "]\n", 363 | "emails_count = v.transform(emails)\n", 364 | "model.predict(emails_count)" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 20, 370 | "id": "603f0d94", 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "data": { 375 | "text/plain": [ 376 | "0.9856424982053122" 377 | ] 378 | }, 379 | "execution_count": 20, 380 | "metadata": {}, 381 | "output_type": "execute_result" 382 | } 383 | ], 384 | "source": [ 385 | "x_test_count = v.transform(x_test)\n", 386 | "model.score(x_test_count, y_test)" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 21, 392 | "id": "2090cfc6", 393 | "metadata": {}, 394 | "outputs": [], 395 | "source": [ 396 | "from sklearn.pipeline import Pipeline\n", 397 | "clf = Pipeline([\n", 398 | " ('vectorizer', CountVectorizer()),\n", 399 | " ('nb', MultinomialNB())\n", 400 | "])" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": 22, 406 | "id": "b60fb9c4", 407 | "metadata": {}, 408 | "outputs": [ 409 | { 410 | "data": { 411 | "text/html": [ 412 | "
Pipeline(steps=[('vectorizer', CountVectorizer()), ('nb', MultinomialNB())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" 413 | ], 414 | "text/plain": [ 415 | "Pipeline(steps=[('vectorizer', CountVectorizer()), ('nb', MultinomialNB())])" 416 | ] 417 | }, 418 | "execution_count": 22, 419 | "metadata": {}, 420 | "output_type": "execute_result" 421 | } 422 | ], 423 | "source": [ 424 | "clf.fit(x_train, y_train)" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": 23, 430 | "id": "e4cb9af3", 431 | "metadata": {}, 432 | "outputs": [ 433 | { 434 | "data": { 435 | "text/plain": [ 436 | "0.9856424982053122" 437 | ] 438 | }, 439 | "execution_count": 23, 440 | "metadata": {}, 441 | "output_type": "execute_result" 442 | } 443 | ], 444 | "source": [ 445 | "clf.score(x_test,y_test)" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": 24, 451 | "id": "1fde08c4", 452 | "metadata": {}, 453 | "outputs": [ 454 | { 455 | "data": { 456 | "text/plain": [ 457 | "array([0, 1], dtype=int64)" 458 | ] 459 | }, 460 | "execution_count": 24, 461 | "metadata": {}, 462 | "output_type": "execute_result" 463 | } 464 | ], 465 | "source": [ 466 | "clf.predict(emails)" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": null, 472 | "id": "3b63aef1", 473 | "metadata": {}, 474 | "outputs": [], 475 | "source": [] 476 | } 477 | ], 478 | "metadata": { 479 | "kernelspec": { 480 | "display_name": "Python 3 (ipykernel)", 481 | "language": "python", 482 | "name": "python3" 483 | }, 484 | "language_info": { 485 | "codemirror_mode": { 486 | "name": "ipython", 487 | "version": 3 488 | }, 489 | "file_extension": ".py", 490 | "mimetype": "text/x-python", 491 | "name": "python", 492 | "nbconvert_exporter": "python", 493 | "pygments_lexer": "ipython3", 494 | "version": "3.11.4" 495 | } 496 | }, 497 | "nbformat": 4, 498 | "nbformat_minor": 5 499 | } 500 | --------------------------------------------------------------------------------