├── ML LAB SYLLABUS.pdf ├── tennis.csv ├── enjoysport.csv ├── LICENSE ├── find s.ipynb ├── program1.ipynb ├── README.md ├── Candidate_Elimination.ipynb ├── program2.ipynb ├── program4.ipynb ├── program8.ipynb ├── program5.ipynb ├── program3.ipynb ├── program6.ipynb ├── program10.ipynb └── program7.ipynb /ML LAB SYLLABUS.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FarhaKousar1601/Machine-Learning-Laboratory-21AIL66-/main/ML LAB SYLLABUS.pdf -------------------------------------------------------------------------------- /tennis.csv: -------------------------------------------------------------------------------- 1 | Sky AirTemp Humidity Wind Water Forecast EnjoySport 2 | Sunny Warm Normal Strong Warm Same Yes 3 | Sunny Warm High Strong Warm Same Yes 4 | Rainy Cold High Strong Warm Change No 5 | Sunny Warm High Strong Cool Change Yes -------------------------------------------------------------------------------- /enjoysport.csv: -------------------------------------------------------------------------------- 1 | sky,airtemp,humidity,wind,water,forcast,enjoysport 2 | sunny,warm,normal,strong,warm,same,yes 3 | sunny,warm,high,strong,warm,same,yes 4 | rainy,cold,high,strong,warm,change,no 5 | sunny,warm,high,strong,cool,change,yes 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Farha Kousar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /find s.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "88a645af", 7 | "metadata": { 8 | "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", 9 | "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", 10 | "execution": { 11 | "iopub.execute_input": "2024-08-01T08:08:53.509685Z", 12 | "iopub.status.busy": "2024-08-01T08:08:53.509240Z", 13 | "iopub.status.idle": "2024-08-01T08:08:53.536356Z", 14 | "shell.execute_reply": "2024-08-01T08:08:53.534886Z" 15 | }, 16 | "papermill": { 17 | "duration": 0.033489, 18 | "end_time": "2024-08-01T08:08:53.538819", 19 | "exception": false, 20 | "start_time": "2024-08-01T08:08:53.505330", 21 | "status": "completed" 22 | }, 23 | "tags": [] 24 | }, 25 | "outputs": [ 26 | { 27 | "name": "stdout", 28 | "output_type": "stream", 29 | "text": [ 30 | "['sunny', 'warm', 'normal', 'strong', 'warm', 'same', 'yes']\n", 31 | "['sunny', 'warm', 'high', 'strong', 'warm', 'same', 'yes']\n", 32 | "['rainy', 'cold', 'high', 'strong', 'warm', 'change', 'no']\n", 33 | "['sunny', 'warm', 'high', 'strong', 'cool', 'change', 'yes']\n", 34 | "\n", 35 | "\n", 36 | "The Maximally specific hypothesis for training instances is ['sunny', 'warm', '?', 'strong', '?', '?']\n" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "import csv \n", 42 | "with open('/kaggle/input/enjoysport-csv/enjoysport.csv','r') as file:\n", 43 | " data = [row for row in csv.reader(file)]\n", 44 | " #print(\"The total number of training instances are:\",\n", 45 | "\n", 46 | " for row in data[1:]:\n", 47 | " print(row)\n", 48 | " print()\n", 49 | "num_attribute = len(data[0])-1\n", 50 | "hypothesis = ['0']*num_attribute\n", 51 | "\n", 52 | "for i in range(0,len(data)):\n", 53 | " if data[i][num_attribute ] == 'yes':\n", 54 | " for j in range(0,num_attribute):\n", 55 | " if hypothesis[j] == '0' or hypothesis[j] == data[i][j]:\n", 56 | " hypothesis[j] = data[i][j]\n", 57 | " else:\n", 58 | " hypothesis[j] = '?'\n", 59 | "print(\"\\nThe Maximally specific hypothesis for training instances is\",hypothesis)" 60 | ] 61 | } 62 | ], 63 | "metadata": { 64 | "kaggle": { 65 | "accelerator": "none", 66 | "dataSources": [ 67 | { 68 | "datasetId": 5477690, 69 | "sourceId": 9079671, 70 | "sourceType": "datasetVersion" 71 | } 72 | ], 73 | "dockerImageVersionId": 30746, 74 | "isGpuEnabled": false, 75 | "isInternetEnabled": true, 76 | "language": "python", 77 | "sourceType": "notebook" 78 | }, 79 | "kernelspec": { 80 | "display_name": "Python 3", 81 | "language": "python", 82 | "name": "python3" 83 | }, 84 | "language_info": { 85 | "codemirror_mode": { 86 | "name": "ipython", 87 | "version": 3 88 | }, 89 | "file_extension": ".py", 90 | "mimetype": "text/x-python", 91 | "name": "python", 92 | "nbconvert_exporter": "python", 93 | "pygments_lexer": "ipython3", 94 | "version": "3.10.13" 95 | }, 96 | "papermill": { 97 | "default_parameters": {}, 98 | "duration": 3.602061, 99 | "end_time": "2024-08-01T08:08:53.961764", 100 | "environment_variables": {}, 101 | "exception": null, 102 | "input_path": "__notebook__.ipynb", 103 | "output_path": "__notebook__.ipynb", 104 | "parameters": {}, 105 | "start_time": "2024-08-01T08:08:50.359703", 106 | "version": "2.5.0" 107 | } 108 | }, 109 | "nbformat": 4, 110 | "nbformat_minor": 5 111 | } 112 | -------------------------------------------------------------------------------- /program1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "4f3fd18a", 6 | "metadata": { 7 | "papermill": { 8 | "duration": 0.002133, 9 | "end_time": "2024-07-21T16:36:48.221078", 10 | "exception": false, 11 | "start_time": "2024-07-21T16:36:48.218945", 12 | "status": "completed" 13 | }, 14 | "tags": [] 15 | }, 16 | "source": [ 17 | "**Aim: Illustrate and Demonstrate the working model and principle of Find-S algorithm.\n", 18 | "Program: For a given set of training data examples stored in a .CSV file, implement and demonstrate the Find-S algorithm to output a description of the set of all hypotheses consistent with the training examples.**" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "id": "99191800", 25 | "metadata": { 26 | "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", 27 | "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", 28 | "execution": { 29 | "iopub.execute_input": "2024-07-21T16:36:48.226132Z", 30 | "iopub.status.busy": "2024-07-21T16:36:48.225743Z", 31 | "iopub.status.idle": "2024-07-21T16:36:48.247342Z", 32 | "shell.execute_reply": "2024-07-21T16:36:48.246124Z" 33 | }, 34 | "papermill": { 35 | "duration": 0.027498, 36 | "end_time": "2024-07-21T16:36:48.250332", 37 | "exception": false, 38 | "start_time": "2024-07-21T16:36:48.222834", 39 | "status": "completed" 40 | }, 41 | "tags": [] 42 | }, 43 | "outputs": [ 44 | { 45 | "name": "stdout", 46 | "output_type": "stream", 47 | "text": [ 48 | "['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes']\n", 49 | "['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes']\n", 50 | "['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'No']\n", 51 | "['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Yes']\n", 52 | "Most specific hypothesis is\n", 53 | "[['0', '0', '0', '0', '0', '0']]\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "import csv\n", 59 | "\n", 60 | "# Open the CSV file\n", 61 | "with open('/kaggle/input/find-s-algorithm-dataset/ws.csv', 'r') as f:\n", 62 | " reader = csv.reader(f)\n", 63 | " your_list = list(reader)\n", 64 | "\n", 65 | "# Initialize the hypothesis\n", 66 | "h = [['0', '0', '0', '0', '0', '0']]\n", 67 | "\n", 68 | "# Process each row in the dataset\n", 69 | "for i in your_list:\n", 70 | " print(i)\n", 71 | " if i[-1] == \"True\":\n", 72 | " j = 0\n", 73 | " for x in i:\n", 74 | " if x != \"True\":\n", 75 | " if x != h[0][j] and h[0][j] == '0':\n", 76 | " h[0][j] = x\n", 77 | " elif x != h[0][j] and h[0][j] != '0':\n", 78 | " h[0][j] = '?'\n", 79 | " else:\n", 80 | " pass\n", 81 | " j += 1\n", 82 | "\n", 83 | "# Print the most specific hypothesis\n", 84 | "print(\"Most specific hypothesis is\")\n", 85 | "print(h)" 86 | ] 87 | } 88 | ], 89 | "metadata": { 90 | "kaggle": { 91 | "accelerator": "none", 92 | "dataSources": [ 93 | { 94 | "datasetId": 1284212, 95 | "sourceId": 2140252, 96 | "sourceType": "datasetVersion" 97 | } 98 | ], 99 | "dockerImageVersionId": 30746, 100 | "isGpuEnabled": false, 101 | "isInternetEnabled": true, 102 | "language": "python", 103 | "sourceType": "notebook" 104 | }, 105 | "kernelspec": { 106 | "display_name": "Python 3", 107 | "language": "python", 108 | "name": "python3" 109 | }, 110 | "language_info": { 111 | "codemirror_mode": { 112 | "name": "ipython", 113 | "version": 3 114 | }, 115 | "file_extension": ".py", 116 | "mimetype": "text/x-python", 117 | "name": "python", 118 | "nbconvert_exporter": "python", 119 | "pygments_lexer": "ipython3", 120 | "version": "3.10.13" 121 | }, 122 | "papermill": { 123 | "default_parameters": {}, 124 | "duration": 3.141406, 125 | "end_time": "2024-07-21T16:36:48.572240", 126 | "environment_variables": {}, 127 | "exception": null, 128 | "input_path": "__notebook__.ipynb", 129 | "output_path": "__notebook__.ipynb", 130 | "parameters": {}, 131 | "start_time": "2024-07-21T16:36:45.430834", 132 | "version": "2.5.0" 133 | } 134 | }, 135 | "nbformat": 4, 136 | "nbformat_minor": 5 137 | } 138 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine Learning Laboratory (21AIL66) 2 | 3 | ![Course Code](https://img.shields.io/badge/Course-21AIL66-blue) 4 | ![Machine Learning](https://img.shields.io/badge/Subject-Machine%20Learning-orange) 5 | ![Laboratory](https://img.shields.io/badge/Type-Laboratory-green) 6 | ![Python](https://img.shields.io/badge/Python-3.x-blue.svg) 7 | ![Pandas](https://img.shields.io/badge/Library-Pandas-green) 8 | ![NumPy](https://img.shields.io/badge/Library-NumPy-yellow) 9 | ![Scikit-Learn](https://img.shields.io/badge/Library-Scikit--Learn-orange) 10 | 11 | Welcome to the Machine Learning Laboratory (21AIL66) repository. This repository contains a list of problems and solutions developed as part of the Machine Learning lab coursework. 12 | 13 | ## List of Programs 14 | 15 | Here is a list of programs covered in this lab course: 16 | 17 | 1. **Find-S Algorithm** 18 | - **Aim**: Illustrate and demonstrate the working model and principle of the Find-S algorithm. 19 | - **Program**: Implement the Find-S algorithm for a given set of training data examples stored in a .CSV file. 20 | 21 | 2. **Candidate Elimination Algorithm** 22 | - **Aim**: Demonstrate the working model and principle of the Candidate Elimination algorithm. 23 | - **Program**: Implement the Candidate Elimination algorithm for a given set of training data examples stored in a .CSV file. 24 | 25 | 3. **Decision Tree (ID3 Algorithm)** 26 | - **Aim**: Construct the decision tree using training data sets under supervised learning. 27 | - **Program**: Write a program to demonstrate the ID3 algorithm. Use an appropriate data set for building the decision tree and classify a new sample. 28 | 29 | 4. **Artificial Neural Network (Backpropagation)** 30 | - **Aim**: Understand the working principle of Artificial Neural Networks with feed-forward and feed-backward principles. 31 | - **Program**: Build an Artificial Neural Network using the Backpropagation algorithm and test it with appropriate datasets. 32 | 33 | 5. **Naive Bayes Classifier** 34 | - **Aim**: Demonstrate the text classifier using the Naïve Bayes classifier algorithm. 35 | - **Program**: Implement the Naive Bayes classifier for a sample training data set stored in a .CSV file and compute its accuracy. 36 | 37 | 6. **Bayesian Belief Network** 38 | - **Aim**: Demonstrate and analyze the results sets obtained from Bayesian belief network principles. 39 | - **Program**: Construct a Bayesian network using medical data and diagnose heart patients with a standard Heart Disease Data Set. 40 | 41 | 7. **K-Means Clustering (Expectation Maximization)** 42 | - **Aim**: Implement and demonstrate the working model of K-means clustering algorithm with Expectation Maximization concept. 43 | - **Program**: Apply the EM algorithm and K-Means clustering to a dataset stored in a .CSV file, compare the results, and analyze the quality of clustering. 44 | 45 | 8. **K-Nearest Neighbour (KNN)** 46 | - **Aim**: Demonstrate and analyze the results of classification based on the KNN Algorithm. 47 | - **Program**: Implement the KNN algorithm to classify the iris dataset, printing both correct and wrong predictions. 48 | 49 | 9. **Locally Weighted Regression** 50 | - **Aim**: Understand and analyze the concept of Regression algorithm techniques. 51 | - **Program**: Implement the Locally Weighted Regression algorithm to fit data points and visualize the results with appropriate graphs. 52 | 53 | 10. **Support Vector Machine (SVM)** 54 | - **Aim**: Implement and demonstrate classification algorithm using Support Vector Machine Algorithm. 55 | - **Program**: Implement and demonstrate the working of SVM algorithm for classification purposes. 56 | 57 | ## Kaggle Notebook 58 | 59 | You can view and run all the programs in a Kaggle notebook. Click the link below to access the notebook: 60 | 61 | - [Machine Learning Laboratory (21AIL66) Notebook](https://www.kaggle.com/code/farhakouser/machine-learning-laboratory-21ail66-ipynb/edit) 62 | 63 | Please upvote the notebook and follow me on Kaggle if you find it useful. 64 | 65 | ## Viva Questions 66 | 67 | For detailed viva questions and answers, you can refer to the [Viva Questions Wiki](https://github.com/FarhaKousar1601/Machine-Learning-Laboratory-21AIL66-/wiki/viva-vtu). 68 | 69 | ## How to Use 70 | 71 | 1. Clone the repository: 72 | ```sh 73 | git clone https://github.com/FarhaKousar1601/Machine-Learning-Laboratory-21AIL66-.git 74 | ``` 75 | 2. Navigate to the project directory: 76 | ```sh 77 | cd Machine-Learning-Laboratory-21AIL66- 78 | ``` 79 | 3. Open the relevant program file and run it using your preferred Python environment. 80 | 81 | ## Prerequisites 82 | 83 | - Python 3.x 84 | - Required libraries: numpy, pandas, scikit-learn, matplotlib (Install using pip if not already installed) 85 | 86 | ```sh 87 | pip install numpy pandas scikit-learn matplotlib 88 | ``` 89 | 90 | ## Contributing 91 | 92 | Contributions are welcome! Please fork the repository, star it, learn from the code, discuss any improvements, and create a pull request with your changes. 93 | 94 | ## License 95 | 96 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 97 | 98 | --- 99 | ## Connect with me 100 | [LinkedIn](https://www.linkedin.com/in/farhakousar16) 101 | [GitHub](https://github.com/FarhaKousar1601) 102 | 103 | © 2024 Department of AIML, KNS Institute of Technology 104 | -------------------------------------------------------------------------------- /Candidate_Elimination.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "5b3cf8cb", 7 | "metadata": { 8 | "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", 9 | "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", 10 | "execution": { 11 | "iopub.execute_input": "2024-08-01T09:59:13.700244Z", 12 | "iopub.status.busy": "2024-08-01T09:59:13.699259Z", 13 | "iopub.status.idle": "2024-08-01T09:59:13.720744Z", 14 | "shell.execute_reply": "2024-08-01T09:59:13.719746Z" 15 | }, 16 | "papermill": { 17 | "duration": 0.027829, 18 | "end_time": "2024-08-01T09:59:13.723541", 19 | "exception": false, 20 | "start_time": "2024-08-01T09:59:13.695712", 21 | "status": "completed" 22 | }, 23 | "tags": [] 24 | }, 25 | "outputs": [ 26 | { 27 | "name": "stdout", 28 | "output_type": "stream", 29 | "text": [ 30 | "\n", 31 | "Steps of Candidate Elimination Algorithm, training instance 2\n", 32 | "S: ['sunny', 'warm', 'normal', 'strong', 'warm', 'same']\n", 33 | "G: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]\n", 34 | "\n", 35 | "Steps of Candidate Elimination Algorithm, training instance 3\n", 36 | "S: ['sunny', 'warm', 'normal', 'strong', 'warm', 'same']\n", 37 | "G: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]\n", 38 | "\n", 39 | "Steps of Candidate Elimination Algorithm, training instance 4\n", 40 | "S: ['sunny', 'warm', 'normal', 'strong', 'warm', 'same']\n", 41 | "G: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]\n", 42 | "\n", 43 | "Steps of Candidate Elimination Algorithm, training instance 5\n", 44 | "S: ['sunny', 'warm', 'normal', 'strong', 'warm', 'same']\n", 45 | "G: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]\n", 46 | "\n", 47 | "Final specific hypothesis:\n", 48 | " ['sunny', 'warm', 'normal', 'strong', 'warm', 'same']\n", 49 | "\n", 50 | "Final general hypothesis:\n", 51 | " []\n" 52 | ] 53 | } 54 | ], 55 | "source": [ 56 | "import csv\n", 57 | "\n", 58 | "# Load data from CSV file\n", 59 | "with open('/kaggle/input/enjoysport-csv/enjoysport.csv') as f:\n", 60 | " csv_file = csv.reader(f)\n", 61 | " data = list(csv_file)\n", 62 | "\n", 63 | "# Initialize S and G\n", 64 | "s = data[1][:-1]\n", 65 | "g = [['?' for _ in range(len(s))] for _ in range(len(s))]\n", 66 | "\n", 67 | "# Process each training instance\n", 68 | "for i in data[1:]:\n", 69 | " if i[-1] == \"Yes\":\n", 70 | " for j in range(len(s)):\n", 71 | " if i[j] != s[j]:\n", 72 | " s[j] = '?'\n", 73 | " g[j][j] = '?'\n", 74 | " elif i[-1] == \"No\":\n", 75 | " for j in range(len(s)):\n", 76 | " if i[j] != s[j]:\n", 77 | " g[j][j] = s[j]\n", 78 | " else:\n", 79 | " g[j][j] = '?'\n", 80 | " \n", 81 | " # Print the current hypotheses\n", 82 | " print(f\"\\nSteps of Candidate Elimination Algorithm, training instance {data.index(i) + 1}\")\n", 83 | " print(\"S:\", s)\n", 84 | " print(\"G:\", g)\n", 85 | "\n", 86 | "# Collect and print final general hypotheses\n", 87 | "gh = [i for i in g if any(j != '?' for j in i)]\n", 88 | "\n", 89 | "print(\"\\nFinal specific hypothesis:\\n\", s)\n", 90 | "print(\"\\nFinal general hypothesis:\\n\", gh)\n" 91 | ] 92 | } 93 | ], 94 | "metadata": { 95 | "kaggle": { 96 | "accelerator": "none", 97 | "dataSources": [ 98 | { 99 | "datasetId": 5477690, 100 | "sourceId": 9079671, 101 | "sourceType": "datasetVersion" 102 | } 103 | ], 104 | "dockerImageVersionId": 30746, 105 | "isGpuEnabled": false, 106 | "isInternetEnabled": true, 107 | "language": "python", 108 | "sourceType": "notebook" 109 | }, 110 | "kernelspec": { 111 | "display_name": "Python 3", 112 | "language": "python", 113 | "name": "python3" 114 | }, 115 | "language_info": { 116 | "codemirror_mode": { 117 | "name": "ipython", 118 | "version": 3 119 | }, 120 | "file_extension": ".py", 121 | "mimetype": "text/x-python", 122 | "name": "python", 123 | "nbconvert_exporter": "python", 124 | "pygments_lexer": "ipython3", 125 | "version": "3.10.13" 126 | }, 127 | "papermill": { 128 | "default_parameters": {}, 129 | "duration": 2.95831, 130 | "end_time": "2024-08-01T09:59:14.044536", 131 | "environment_variables": {}, 132 | "exception": null, 133 | "input_path": "__notebook__.ipynb", 134 | "output_path": "__notebook__.ipynb", 135 | "parameters": {}, 136 | "start_time": "2024-08-01T09:59:11.086226", 137 | "version": "2.5.0" 138 | } 139 | }, 140 | "nbformat": 4, 141 | "nbformat_minor": 5 142 | } 143 | -------------------------------------------------------------------------------- /program2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "e1fded58", 6 | "metadata": { 7 | "papermill": { 8 | "duration": 0.002442, 9 | "end_time": "2024-07-21T16:45:51.237614", 10 | "exception": false, 11 | "start_time": "2024-07-21T16:45:51.235172", 12 | "status": "completed" 13 | }, 14 | "tags": [] 15 | }, 16 | "source": [ 17 | "**2.Aim: Demonstrate the working model and principle of candidate elimination algorithm. Program: For a given set of training data examples stored in a .CSV file, implement and demonstrate the Candidate-Elimination algorithm to output a description of the set of all hypotheses consistent with the training examples.**" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "id": "7da4fedd", 24 | "metadata": { 25 | "execution": { 26 | "iopub.execute_input": "2024-07-21T16:45:51.243907Z", 27 | "iopub.status.busy": "2024-07-21T16:45:51.243348Z", 28 | "iopub.status.idle": "2024-07-21T16:45:51.276830Z", 29 | "shell.execute_reply": "2024-07-21T16:45:51.275030Z" 30 | }, 31 | "papermill": { 32 | "duration": 0.041262, 33 | "end_time": "2024-07-21T16:45:51.280938", 34 | "exception": false, 35 | "start_time": "2024-07-21T16:45:51.239676", 36 | "status": "completed" 37 | }, 38 | "tags": [] 39 | }, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "Steps of Candidate Elimination Algorithm 1\n", 46 | "[]\n", 47 | "[]\n", 48 | "Steps of Candidate Elimination Algorithm 2\n", 49 | "[]\n", 50 | "[]\n", 51 | "Steps of Candidate Elimination Algorithm 3\n", 52 | "[]\n", 53 | "[]\n", 54 | "Steps of Candidate Elimination Algorithm 4\n", 55 | "[]\n", 56 | "[]\n", 57 | "Steps of Candidate Elimination Algorithm 5\n", 58 | "[]\n", 59 | "[]\n", 60 | "\n", 61 | "Final specific hypothesis:\n", 62 | " []\n", 63 | "\n", 64 | "Final general hypothesis:\n", 65 | " []\n" 66 | ] 67 | } 68 | ], 69 | "source": [ 70 | "import csv\n", 71 | "\n", 72 | "# Opens the CSV file \"tennis.csv\" and reads its contents\n", 73 | "with open(\"/kaggle/input/tennis-csv/tennis.csv\") as f:\n", 74 | " csv_file = csv.reader(f)\n", 75 | " data = list(csv_file)\n", 76 | "\n", 77 | "# Initializes the specific hypothesis 's' and the general hypothesis 'gh'\n", 78 | "s = ['?' for _ in range(len(data[0])-1)]\n", 79 | "gh = [['?' for _ in range(len(s))] for _ in range(len(s))]\n", 80 | "\n", 81 | "# Iterates through each data instance in 'data'\n", 82 | "for instance in data:\n", 83 | " # Checks if the last attribute of the instance is \"Yes\"\n", 84 | " if instance[-1] == \"Yes\":\n", 85 | " # Updates 's' and 'gh' based on positive instance\n", 86 | " for j in range(len(s)):\n", 87 | " if instance[j] != s[j] and s[j] == '?':\n", 88 | " s[j] = instance[j]\n", 89 | " elif instance[j] != s[j] and s[j] != '?':\n", 90 | " s[j] = '?'\n", 91 | " elif instance[-1] == \"No\":\n", 92 | " # Updates 'gh' based on negative instance\n", 93 | " for j in range(len(s)):\n", 94 | " if instance[j] != s[j]:\n", 95 | " gh[j][j] = s[j]\n", 96 | " else:\n", 97 | " gh[j][j] = '?'\n", 98 | "\n", 99 | " # Prints the steps of the Candidate Elimination Algorithm\n", 100 | " print(\"Steps of Candidate Elimination Algorithm\", data.index(instance) + 1)\n", 101 | " print(s)\n", 102 | " print(gh)\n", 103 | "\n", 104 | " # Removes inconsistent hypotheses from 'gh'\n", 105 | " g = []\n", 106 | " for i in gh:\n", 107 | " if i not in g:\n", 108 | " g.append(i)\n", 109 | "\n", 110 | " # Ends the loop if 's' becomes the final hypothesis\n", 111 | " if len(g) == 1:\n", 112 | " break\n", 113 | "\n", 114 | "# Prints the final specific and general hypotheses\n", 115 | "print(\"\\nFinal specific hypothesis:\\n\", s)\n", 116 | "print(\"\\nFinal general hypothesis:\\n\", g)\n" 117 | ] 118 | } 119 | ], 120 | "metadata": { 121 | "kaggle": { 122 | "accelerator": "none", 123 | "dataSources": [ 124 | { 125 | "datasetId": 5405088, 126 | "sourceId": 8976764, 127 | "sourceType": "datasetVersion" 128 | } 129 | ], 130 | "dockerImageVersionId": 30746, 131 | "isGpuEnabled": false, 132 | "isInternetEnabled": true, 133 | "language": "python", 134 | "sourceType": "notebook" 135 | }, 136 | "kernelspec": { 137 | "display_name": "Python 3", 138 | "language": "python", 139 | "name": "python3" 140 | }, 141 | "language_info": { 142 | "codemirror_mode": { 143 | "name": "ipython", 144 | "version": 3 145 | }, 146 | "file_extension": ".py", 147 | "mimetype": "text/x-python", 148 | "name": "python", 149 | "nbconvert_exporter": "python", 150 | "pygments_lexer": "ipython3", 151 | "version": "3.10.13" 152 | }, 153 | "papermill": { 154 | "default_parameters": {}, 155 | "duration": 3.452149, 156 | "end_time": "2024-07-21T16:45:51.705493", 157 | "environment_variables": {}, 158 | "exception": null, 159 | "input_path": "__notebook__.ipynb", 160 | "output_path": "__notebook__.ipynb", 161 | "parameters": {}, 162 | "start_time": "2024-07-21T16:45:48.253344", 163 | "version": "2.5.0" 164 | } 165 | }, 166 | "nbformat": 4, 167 | "nbformat_minor": 5 168 | } 169 | -------------------------------------------------------------------------------- /program4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "c0effbc9", 6 | "metadata": { 7 | "papermill": { 8 | "duration": 0.002405, 9 | "end_time": "2024-07-21T16:53:04.583449", 10 | "exception": false, 11 | "start_time": "2024-07-21T16:53:04.581044", 12 | "status": "completed" 13 | }, 14 | "tags": [] 15 | }, 16 | "source": [ 17 | "**4)Aim: To understand the working principle of Artificial Neural network with feed forward and feed backward principle. Program: Build an Artificial Neural Network by implementing the Backpropagation algorithm and test the same using appropriate data sets**" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "id": "f5fac3af", 24 | "metadata": { 25 | "execution": { 26 | "iopub.execute_input": "2024-07-21T16:53:04.589615Z", 27 | "iopub.status.busy": "2024-07-21T16:53:04.589176Z", 28 | "iopub.status.idle": "2024-07-21T16:53:09.662257Z", 29 | "shell.execute_reply": "2024-07-21T16:53:09.660775Z" 30 | }, 31 | "papermill": { 32 | "duration": 5.079468, 33 | "end_time": "2024-07-21T16:53:09.665049", 34 | "exception": false, 35 | "start_time": "2024-07-21T16:53:04.585581", 36 | "status": "completed" 37 | }, 38 | "tags": [] 39 | }, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "Epoch: 0, Loss: 0.2949\n", 46 | "Epoch: 1000, Loss: 0.5025\n", 47 | "Epoch: 2000, Loss: 0.5025\n", 48 | "Epoch: 3000, Loss: 0.5025\n", 49 | "Epoch: 4000, Loss: 0.5025\n", 50 | "Epoch: 5000, Loss: 0.5025\n", 51 | "Epoch: 6000, Loss: 0.5025\n", 52 | "Epoch: 7000, Loss: 0.5025\n", 53 | "Epoch: 8000, Loss: 0.5025\n", 54 | "Epoch: 9000, Loss: 0.5025\n", 55 | "Accuracy: 51.00%\n" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "import numpy as np\n", 61 | "from sklearn.model_selection import train_test_split\n", 62 | "from sklearn.datasets import make_moons\n", 63 | "from sklearn.preprocessing import OneHotEncoder\n", 64 | "\n", 65 | "# Activation function and its derivative\n", 66 | "sigmoid = lambda x: 1 / (1 + np.exp(-x))\n", 67 | "sigmoid_derivative = lambda x: x * (1 - x)\n", 68 | "\n", 69 | "# ANN class\n", 70 | "class NeuralNetwork:\n", 71 | " def __init__(self, input_size, hidden_size, output_size):\n", 72 | " self.W1 = np.random.randn(input_size, hidden_size)\n", 73 | " self.b1 = np.zeros((1, hidden_size))\n", 74 | " self.W2 = np.random.randn(hidden_size, output_size)\n", 75 | " self.b2 = np.zeros((1, output_size))\n", 76 | "\n", 77 | " def forward(self, X):\n", 78 | " self.a1 = sigmoid(np.dot(X, self.W1) + self.b1)\n", 79 | " self.a2 = sigmoid(np.dot(self.a1, self.W2) + self.b2)\n", 80 | " return self.a2\n", 81 | "\n", 82 | " def backward(self, X, y, output):\n", 83 | " d_output = (y - output) * sigmoid_derivative(output)\n", 84 | " d_hidden = d_output.dot(self.W2.T) * sigmoid_derivative(self.a1)\n", 85 | "\n", 86 | " self.W2 += self.a1.T.dot(d_output)\n", 87 | " self.b2 += np.sum(d_output, axis=0, keepdims=True)\n", 88 | " self.W1 += X.T.dot(d_hidden)\n", 89 | " self.b1 += np.sum(d_hidden, axis=0, keepdims=True)\n", 90 | "\n", 91 | " def train(self, X, y, epochs=10000):\n", 92 | " for epoch in range(epochs):\n", 93 | " output = self.forward(X)\n", 94 | " self.backward(X, y, output)\n", 95 | " if epoch % 1000 == 0:\n", 96 | " loss = np.mean(np.square(y - output))\n", 97 | " print(f'Epoch: {epoch}, Loss: {loss:.4f}')\n", 98 | "\n", 99 | "# Create and preprocess dataset\n", 100 | "X, y = make_moons(n_samples=1000, noise=0.2)\n", 101 | "y = OneHotEncoder().fit_transform(y.reshape(-1, 1)).toarray()\n", 102 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n", 103 | "\n", 104 | "# Initialize and train network\n", 105 | "nn = NeuralNetwork(X_train.shape[1], 10, y_train.shape[1])\n", 106 | "nn.train(X_train, y_train)\n", 107 | "\n", 108 | "# Test network\n", 109 | "output = nn.forward(X_test)\n", 110 | "predictions = np.argmax(output, axis=1)\n", 111 | "accuracy = np.mean(predictions == np.argmax(y_test, axis=1))\n", 112 | "print(f'Accuracy: {accuracy * 100:.2f}%')\n" 113 | ] 114 | } 115 | ], 116 | "metadata": { 117 | "kaggle": { 118 | "accelerator": "none", 119 | "dataSources": [], 120 | "dockerImageVersionId": 30746, 121 | "isGpuEnabled": false, 122 | "isInternetEnabled": true, 123 | "language": "python", 124 | "sourceType": "notebook" 125 | }, 126 | "kernelspec": { 127 | "display_name": "Python 3", 128 | "language": "python", 129 | "name": "python3" 130 | }, 131 | "language_info": { 132 | "codemirror_mode": { 133 | "name": "ipython", 134 | "version": 3 135 | }, 136 | "file_extension": ".py", 137 | "mimetype": "text/x-python", 138 | "name": "python", 139 | "nbconvert_exporter": "python", 140 | "pygments_lexer": "ipython3", 141 | "version": "3.10.13" 142 | }, 143 | "papermill": { 144 | "default_parameters": {}, 145 | "duration": 9.037337, 146 | "end_time": "2024-07-21T16:53:10.191528", 147 | "environment_variables": {}, 148 | "exception": null, 149 | "input_path": "__notebook__.ipynb", 150 | "output_path": "__notebook__.ipynb", 151 | "parameters": {}, 152 | "start_time": "2024-07-21T16:53:01.154191", 153 | "version": "2.5.0" 154 | } 155 | }, 156 | "nbformat": 4, 157 | "nbformat_minor": 5 158 | } 159 | -------------------------------------------------------------------------------- /program8.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "739e1301", 6 | "metadata": { 7 | "papermill": { 8 | "duration": 0.002575, 9 | "end_time": "2024-07-21T18:45:09.280269", 10 | "exception": false, 11 | "start_time": "2024-07-21T18:45:09.277694", 12 | "status": "completed" 13 | }, 14 | "tags": [] 15 | }, 16 | "source": [ 17 | "**8)Aim: Demonstrate and analyse the results of classification based on KNN Algorithm. Program: Write a program to implement k-Nearest Neighbour algorithm to classify the iris data set. Print both correct and wrong predictions. Java/Python ML library classes can be used for this problem.**" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "id": "2475f658", 24 | "metadata": { 25 | "execution": { 26 | "iopub.execute_input": "2024-07-21T18:45:09.286394Z", 27 | "iopub.status.busy": "2024-07-21T18:45:09.285910Z", 28 | "iopub.status.idle": "2024-07-21T18:45:11.346976Z", 29 | "shell.execute_reply": "2024-07-21T18:45:11.345675Z" 30 | }, 31 | "papermill": { 32 | "duration": 2.067371, 33 | "end_time": "2024-07-21T18:45:11.349877", 34 | "exception": false, 35 | "start_time": "2024-07-21T18:45:09.282506", 36 | "status": "completed" 37 | }, 38 | "tags": [] 39 | }, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "Accuracy: 1.0\n", 46 | "\n", 47 | "Correct predictions:\n", 48 | "Predicted: 1 Actual: 1\n", 49 | "Predicted: 0 Actual: 0\n", 50 | "Predicted: 2 Actual: 2\n", 51 | "Predicted: 1 Actual: 1\n", 52 | "Predicted: 1 Actual: 1\n", 53 | "Predicted: 0 Actual: 0\n", 54 | "Predicted: 1 Actual: 1\n", 55 | "Predicted: 2 Actual: 2\n", 56 | "Predicted: 1 Actual: 1\n", 57 | "Predicted: 1 Actual: 1\n", 58 | "Predicted: 2 Actual: 2\n", 59 | "Predicted: 0 Actual: 0\n", 60 | "Predicted: 0 Actual: 0\n", 61 | "Predicted: 0 Actual: 0\n", 62 | "Predicted: 0 Actual: 0\n", 63 | "Predicted: 1 Actual: 1\n", 64 | "Predicted: 2 Actual: 2\n", 65 | "Predicted: 1 Actual: 1\n", 66 | "Predicted: 1 Actual: 1\n", 67 | "Predicted: 2 Actual: 2\n", 68 | "Predicted: 0 Actual: 0\n", 69 | "Predicted: 2 Actual: 2\n", 70 | "Predicted: 0 Actual: 0\n", 71 | "Predicted: 2 Actual: 2\n", 72 | "Predicted: 2 Actual: 2\n", 73 | "Predicted: 2 Actual: 2\n", 74 | "Predicted: 2 Actual: 2\n", 75 | "Predicted: 2 Actual: 2\n", 76 | "Predicted: 0 Actual: 0\n", 77 | "Predicted: 0 Actual: 0\n", 78 | "Predicted: 0 Actual: 0\n", 79 | "Predicted: 0 Actual: 0\n", 80 | "Predicted: 1 Actual: 1\n", 81 | "Predicted: 0 Actual: 0\n", 82 | "Predicted: 0 Actual: 0\n", 83 | "Predicted: 2 Actual: 2\n", 84 | "Predicted: 1 Actual: 1\n", 85 | "Predicted: 0 Actual: 0\n", 86 | "Predicted: 0 Actual: 0\n", 87 | "Predicted: 0 Actual: 0\n", 88 | "Predicted: 2 Actual: 2\n", 89 | "Predicted: 1 Actual: 1\n", 90 | "Predicted: 1 Actual: 1\n", 91 | "Predicted: 0 Actual: 0\n", 92 | "Predicted: 0 Actual: 0\n", 93 | "\n", 94 | "Wrong predictions:\n" 95 | ] 96 | } 97 | ], 98 | "source": [ 99 | "from sklearn.datasets import load_iris\n", 100 | "from sklearn.model_selection import train_test_split\n", 101 | "from sklearn.neighbors import KNeighborsClassifier\n", 102 | "from sklearn.metrics import accuracy_score\n", 103 | "\n", 104 | "# Load Iris dataset\n", 105 | "iris = load_iris()\n", 106 | "X = iris.data\n", 107 | "y = iris.target\n", 108 | "\n", 109 | "# Split dataset into training and testing sets\n", 110 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n", 111 | "\n", 112 | "# Initialize k-NN classifier\n", 113 | "knn = KNeighborsClassifier(n_neighbors=3)\n", 114 | "\n", 115 | "# Train the classifier\n", 116 | "knn.fit(X_train, y_train)\n", 117 | "\n", 118 | "# Predict the labels for test set\n", 119 | "y_pred = knn.predict(X_test)\n", 120 | "\n", 121 | "# Calculate accuracy\n", 122 | "accuracy = accuracy_score(y_test, y_pred)\n", 123 | "print(\"Accuracy:\", accuracy)\n", 124 | "\n", 125 | "# Print correct and wrong predictions\n", 126 | "print(\"\\nCorrect predictions:\")\n", 127 | "for i in range(len(y_test)):\n", 128 | " if y_pred[i] == y_test[i]:\n", 129 | " print(\"Predicted:\", y_pred[i], \"Actual:\", y_test[i])\n", 130 | "\n", 131 | "print(\"\\nWrong predictions:\")\n", 132 | "for i in range(len(y_test)):\n", 133 | " if y_pred[i] != y_test[i]:\n", 134 | " print(\"Predicted:\", y_pred[i], \"Actual:\", y_test[i])\n" 135 | ] 136 | } 137 | ], 138 | "metadata": { 139 | "kaggle": { 140 | "accelerator": "none", 141 | "dataSources": [], 142 | "dockerImageVersionId": 30746, 143 | "isGpuEnabled": false, 144 | "isInternetEnabled": true, 145 | "language": "python", 146 | "sourceType": "notebook" 147 | }, 148 | "kernelspec": { 149 | "display_name": "Python 3", 150 | "language": "python", 151 | "name": "python3" 152 | }, 153 | "language_info": { 154 | "codemirror_mode": { 155 | "name": "ipython", 156 | "version": 3 157 | }, 158 | "file_extension": ".py", 159 | "mimetype": "text/x-python", 160 | "name": "python", 161 | "nbconvert_exporter": "python", 162 | "pygments_lexer": "ipython3", 163 | "version": "3.10.13" 164 | }, 165 | "papermill": { 166 | "default_parameters": {}, 167 | "duration": 5.877774, 168 | "end_time": "2024-07-21T18:45:11.873906", 169 | "environment_variables": {}, 170 | "exception": null, 171 | "input_path": "__notebook__.ipynb", 172 | "output_path": "__notebook__.ipynb", 173 | "parameters": {}, 174 | "start_time": "2024-07-21T18:45:05.996132", 175 | "version": "2.5.0" 176 | } 177 | }, 178 | "nbformat": 4, 179 | "nbformat_minor": 5 180 | } 181 | -------------------------------------------------------------------------------- /program5.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "4c54070c", 6 | "metadata": { 7 | "papermill": { 8 | "duration": 0.002108, 9 | "end_time": "2024-07-21T18:21:22.560873", 10 | "exception": false, 11 | "start_time": "2024-07-21T18:21:22.558765", 12 | "status": "completed" 13 | }, 14 | "tags": [] 15 | }, 16 | "source": [ 17 | "5)Aim: Demonstrate the text classifier using Naïve bayes classifier algorithm. Program: Write a program to implement the naive Bayesian classifier for a sample training data set stored as a .CSV file. Compute the accuracy of the classifier, considering few test data sets." 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "id": "9fd869f7", 24 | "metadata": { 25 | "execution": { 26 | "iopub.execute_input": "2024-07-21T18:21:22.565886Z", 27 | "iopub.status.busy": "2024-07-21T18:21:22.565488Z", 28 | "iopub.status.idle": "2024-07-21T18:21:24.724688Z", 29 | "shell.execute_reply": "2024-07-21T18:21:24.723559Z" 30 | }, 31 | "papermill": { 32 | "duration": 2.164594, 33 | "end_time": "2024-07-21T18:21:24.727190", 34 | "exception": false, 35 | "start_time": "2024-07-21T18:21:22.562596", 36 | "status": "completed" 37 | }, 38 | "tags": [] 39 | }, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "\n", 46 | "RangeIndex: 150 entries, 0 to 149\n", 47 | "Data columns (total 6 columns):\n", 48 | " # Column Non-Null Count Dtype \n", 49 | "--- ------ -------------- ----- \n", 50 | " 0 Unnamed: 0 150 non-null int64 \n", 51 | " 1 SepalLengthCm 150 non-null object \n", 52 | " 2 SepalWidthCm 149 non-null float64\n", 53 | " 3 PetalLengthCm 149 non-null object \n", 54 | " 4 PetalWidthCm 150 non-null float64\n", 55 | " 5 Species 149 non-null object \n", 56 | "dtypes: float64(2), int64(1), object(3)\n", 57 | "memory usage: 7.2+ KB\n", 58 | "None\n", 59 | " Unnamed: 0 SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm \\\n", 60 | "0 1 5.1 3.5 1.4 0.2 \n", 61 | "1 2 4.9 NaN 1.4 0.2 \n", 62 | "2 3 4.7 3.2 1.3 0.2 \n", 63 | "3 4 ?? 3.1 1.5 0.2 \n", 64 | "4 5 5 3.6 ### 0.2 \n", 65 | "\n", 66 | " Species \n", 67 | "0 Iris-setosa \n", 68 | "1 NaN \n", 69 | "2 Iris-setosa \n", 70 | "3 Iris-setosa \n", 71 | "4 Iris-setosa \n", 72 | "['Iris-setosa' nan 'Iris-versicolor' 'Iris-virginica']\n", 73 | "Accuracy: 93.33%\n" 74 | ] 75 | } 76 | ], 77 | "source": [ 78 | "import pandas as pd\n", 79 | "import numpy as np\n", 80 | "from sklearn.model_selection import train_test_split\n", 81 | "from collections import defaultdict\n", 82 | "\n", 83 | "class NaiveBayesClassifier:\n", 84 | " def __init__(self):\n", 85 | " self.priors = {}\n", 86 | " self.likelihoods = defaultdict(dict)\n", 87 | "\n", 88 | " def fit(self, X, y):\n", 89 | " self.classes = np.unique(y)\n", 90 | " total_samples = len(y)\n", 91 | "\n", 92 | " for cls in self.classes:\n", 93 | " X_cls = X[y == cls]\n", 94 | " self.priors[cls] = len(X_cls) / total_samples\n", 95 | "\n", 96 | " for column in X.columns:\n", 97 | " self.likelihoods[column][cls] = X_cls[column].value_counts(normalize=True).to_dict()\n", 98 | "\n", 99 | " def predict(self, X):\n", 100 | " results = []\n", 101 | "\n", 102 | " for i in range(len(X)):\n", 103 | " posteriors = {}\n", 104 | "\n", 105 | " for cls in self.classes:\n", 106 | " prior = np.log(self.priors[cls])\n", 107 | " likelihood = sum(\n", 108 | " np.log(self.likelihoods[col].get(cls, {}).get(X.iloc[i][col], 1e-6))\n", 109 | " for col in X.columns\n", 110 | " )\n", 111 | " posteriors[cls] = prior + likelihood\n", 112 | "\n", 113 | " results.append(max(posteriors, key=posteriors.get))\n", 114 | "\n", 115 | " return results\n", 116 | "\n", 117 | " def accuracy(self, y_true, y_pred):\n", 118 | " return np.mean(np.array(y_true) == np.array(y_pred))\n", 119 | "\n", 120 | "# Load data from CSV file\n", 121 | "data = pd.read_csv('/kaggle/input/iris-dataset/Iris_data_sample.csv')\n", 122 | "\n", 123 | "# Inspect data for inconsistencies\n", 124 | "print(data.info())\n", 125 | "print(data.head())\n", 126 | "\n", 127 | "# Check for NaN values and data types in target column\n", 128 | "print(data.iloc[:, -1].unique())\n", 129 | "\n", 130 | "# Separate features and target variable\n", 131 | "X = data.iloc[:, :-1]\n", 132 | "y = data.iloc[:, -1]\n", 133 | "\n", 134 | "# Clean target variable to ensure consistent data type\n", 135 | "y = y.astype(str)\n", 136 | "\n", 137 | "# Split data into training and testing sets\n", 138 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", 139 | "\n", 140 | "# Initialize and train the Naive Bayes Classifier\n", 141 | "nb_classifier = NaiveBayesClassifier()\n", 142 | "nb_classifier.fit(X_train, y_train)\n", 143 | "\n", 144 | "# Make predictions on the test set\n", 145 | "y_pred = nb_classifier.predict(X_test)\n", 146 | "\n", 147 | "# Compute the accuracy\n", 148 | "accuracy = nb_classifier.accuracy(y_test, y_pred)\n", 149 | "print(f'Accuracy: {accuracy * 100:.2f}%')\n" 150 | ] 151 | } 152 | ], 153 | "metadata": { 154 | "kaggle": { 155 | "accelerator": "none", 156 | "dataSources": [ 157 | { 158 | "datasetId": 5367700, 159 | "sourceId": 8923926, 160 | "sourceType": "datasetVersion" 161 | } 162 | ], 163 | "dockerImageVersionId": 30746, 164 | "isGpuEnabled": false, 165 | "isInternetEnabled": true, 166 | "language": "python", 167 | "sourceType": "notebook" 168 | }, 169 | "kernelspec": { 170 | "display_name": "Python 3", 171 | "language": "python", 172 | "name": "python3" 173 | }, 174 | "language_info": { 175 | "codemirror_mode": { 176 | "name": "ipython", 177 | "version": 3 178 | }, 179 | "file_extension": ".py", 180 | "mimetype": "text/x-python", 181 | "name": "python", 182 | "nbconvert_exporter": "python", 183 | "pygments_lexer": "ipython3", 184 | "version": "3.10.13" 185 | }, 186 | "papermill": { 187 | "default_parameters": {}, 188 | "duration": 5.475488, 189 | "end_time": "2024-07-21T18:21:25.249105", 190 | "environment_variables": {}, 191 | "exception": null, 192 | "input_path": "__notebook__.ipynb", 193 | "output_path": "__notebook__.ipynb", 194 | "parameters": {}, 195 | "start_time": "2024-07-21T18:21:19.773617", 196 | "version": "2.5.0" 197 | } 198 | }, 199 | "nbformat": 4, 200 | "nbformat_minor": 5 201 | } 202 | -------------------------------------------------------------------------------- /program3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "9a44e3aa", 6 | "metadata": { 7 | "papermill": { 8 | "duration": 0.002553, 9 | "end_time": "2024-07-21T16:49:30.098649", 10 | "exception": false, 11 | "start_time": "2024-07-21T16:49:30.096096", 12 | "status": "completed" 13 | }, 14 | "tags": [] 15 | }, 16 | "source": [ 17 | "**3)Aim: To construct the Decision tree using the training data sets under supervised learning concept. Program: Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use an appropriate data set for building the decision tree and apply this knowledge to classify a new sample**" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "id": "815ba59f", 24 | "metadata": { 25 | "execution": { 26 | "iopub.execute_input": "2024-07-21T16:49:30.106000Z", 27 | "iopub.status.busy": "2024-07-21T16:49:30.105482Z", 28 | "iopub.status.idle": "2024-07-21T16:49:31.166308Z", 29 | "shell.execute_reply": "2024-07-21T16:49:31.165149Z" 30 | }, 31 | "papermill": { 32 | "duration": 1.067837, 33 | "end_time": "2024-07-21T16:49:31.169127", 34 | "exception": false, 35 | "start_time": "2024-07-21T16:49:30.101290", 36 | "status": "completed" 37 | }, 38 | "tags": [] 39 | }, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "Constructed Decision Tree:\n", 46 | "{'Outlook': {'Overcast': 'Yes', 'Rain': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}}, 'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}}}\n", 47 | "\n", 48 | "Classification Result for the Sample:\n", 49 | "No\n" 50 | ] 51 | } 52 | ], 53 | "source": [ 54 | "import numpy as np\n", 55 | "import pandas as pd\n", 56 | "from collections import Counter\n", 57 | "\n", 58 | "# Defining the dataset\n", 59 | "data = [\n", 60 | " [\"Outlook\", \"Temperature\", \"Humidity\", \"Wind\", \"PlayTennis\"],\n", 61 | " [\"Sunny\", \"Hot\", \"High\", \"Weak\", \"No\"],\n", 62 | " [\"Sunny\", \"Hot\", \"High\", \"Strong\", \"No\"],\n", 63 | " [\"Overcast\", \"Hot\", \"High\", \"Weak\", \"Yes\"],\n", 64 | " [\"Rain\", \"Mild\", \"High\", \"Weak\", \"Yes\"],\n", 65 | " [\"Rain\", \"Cool\", \"Normal\", \"Weak\", \"Yes\"],\n", 66 | " [\"Rain\", \"Cool\", \"Normal\", \"Strong\", \"No\"],\n", 67 | " [\"Overcast\", \"Cool\", \"Normal\", \"Strong\", \"Yes\"],\n", 68 | " [\"Sunny\", \"Mild\", \"High\", \"Weak\", \"No\"],\n", 69 | " [\"Sunny\", \"Cool\", \"Normal\", \"Weak\", \"Yes\"],\n", 70 | " [\"Rain\", \"Mild\", \"Normal\", \"Weak\", \"Yes\"],\n", 71 | " [\"Sunny\", \"Mild\", \"Normal\", \"Strong\", \"Yes\"],\n", 72 | " [\"Overcast\", \"Mild\", \"High\", \"Strong\", \"Yes\"],\n", 73 | " [\"Overcast\", \"Hot\", \"Normal\", \"Weak\", \"Yes\"],\n", 74 | " [\"Rain\", \"Mild\", \"High\", \"Strong\", \"No\"]\n", 75 | "]\n", 76 | "\n", 77 | "# Creating a DataFrame from the dataset\n", 78 | "df = pd.DataFrame(data[1:], columns=data[0])\n", 79 | "\n", 80 | "def entropy(target_col):\n", 81 | " elements, counts = np.unique(target_col, return_counts=True)\n", 82 | " entropy = np.sum([(-counts[i]/np.sum(counts)) * np.log2(counts[i]/np.sum(counts)) for i in range(len(elements))])\n", 83 | " return entropy\n", 84 | "\n", 85 | "def information_gain(data, split_attribute, target_attribute):\n", 86 | " total_entropy = entropy(data[target_attribute])\n", 87 | " values, counts = np.unique(data[split_attribute], return_counts=True)\n", 88 | " weighted_entropy = np.sum([(counts[i]/np.sum(counts)) * entropy(data[data[split_attribute] == values[i]][target_attribute]) for i in range(len(values))])\n", 89 | " information_gain = total_entropy - weighted_entropy\n", 90 | " return information_gain\n", 91 | "\n", 92 | "def ID3(data, original_data, features, target_attribute, parent_node_class=None):\n", 93 | " if len(np.unique(data[target_attribute])) <= 1:\n", 94 | " return np.unique(data[target_attribute])[0]\n", 95 | " elif len(data) == 0:\n", 96 | " return np.unique(original_data[target_attribute])[np.argmax(np.unique(original_data[target_attribute], return_counts=True)[1])]\n", 97 | " elif len(features) == 0:\n", 98 | " return parent_node_class\n", 99 | " else:\n", 100 | " parent_node_class = np.unique(data[target_attribute])[np.argmax(np.unique(data[target_attribute], return_counts=True)[1])]\n", 101 | " item_values = [information_gain(data, feature, target_attribute) for feature in features]\n", 102 | " best_feature_index = np.argmax(item_values)\n", 103 | " best_feature = features[best_feature_index]\n", 104 | " tree = {best_feature: {}}\n", 105 | " features = [i for i in features if i != best_feature]\n", 106 | " for value in np.unique(data[best_feature]):\n", 107 | " value_sub_data = data[data[best_feature] == value]\n", 108 | " subtree = ID3(value_sub_data, original_data, features, target_attribute, parent_node_class)\n", 109 | " tree[best_feature][value] = subtree\n", 110 | " return tree\n", 111 | "\n", 112 | "def classify(sample, tree):\n", 113 | " for attribute in list(sample.keys()):\n", 114 | " if attribute in tree.keys():\n", 115 | " try:\n", 116 | " result = tree[attribute][sample[attribute]]\n", 117 | " except KeyError:\n", 118 | " return None\n", 119 | " if isinstance(result, dict):\n", 120 | " return classify(sample, result)\n", 121 | " else:\n", 122 | " return result\n", 123 | "\n", 124 | "# List of features\n", 125 | "features = list(df.columns[:-1])\n", 126 | "\n", 127 | "# Target attribute\n", 128 | "target_attribute = df.columns[-1]\n", 129 | "\n", 130 | "# Constructing the decision tree\n", 131 | "decision_tree = ID3(df, df, features, target_attribute)\n", 132 | "\n", 133 | "# Sample to classify\n", 134 | "sample = {\n", 135 | " \"Outlook\": \"Sunny\",\n", 136 | " \"Temperature\": \"Cool\",\n", 137 | " \"Humidity\": \"High\",\n", 138 | " \"Wind\": \"Strong\"\n", 139 | "}\n", 140 | "\n", 141 | "# Classifying the sample\n", 142 | "classification_result = classify(sample, decision_tree)\n", 143 | "\n", 144 | "print(\"Constructed Decision Tree:\")\n", 145 | "print(decision_tree)\n", 146 | "\n", 147 | "print(\"\\nClassification Result for the Sample:\")\n", 148 | "print(classification_result)\n" 149 | ] 150 | } 151 | ], 152 | "metadata": { 153 | "kaggle": { 154 | "accelerator": "none", 155 | "dataSources": [], 156 | "dockerImageVersionId": 30746, 157 | "isGpuEnabled": false, 158 | "isInternetEnabled": true, 159 | "language": "python", 160 | "sourceType": "notebook" 161 | }, 162 | "kernelspec": { 163 | "display_name": "Python 3", 164 | "language": "python", 165 | "name": "python3" 166 | }, 167 | "language_info": { 168 | "codemirror_mode": { 169 | "name": "ipython", 170 | "version": 3 171 | }, 172 | "file_extension": ".py", 173 | "mimetype": "text/x-python", 174 | "name": "python", 175 | "nbconvert_exporter": "python", 176 | "pygments_lexer": "ipython3", 177 | "version": "3.10.13" 178 | }, 179 | "papermill": { 180 | "default_parameters": {}, 181 | "duration": 4.65122, 182 | "end_time": "2024-07-21T16:49:31.692954", 183 | "environment_variables": {}, 184 | "exception": null, 185 | "input_path": "__notebook__.ipynb", 186 | "output_path": "__notebook__.ipynb", 187 | "parameters": {}, 188 | "start_time": "2024-07-21T16:49:27.041734", 189 | "version": "2.5.0" 190 | } 191 | }, 192 | "nbformat": 4, 193 | "nbformat_minor": 5 194 | } 195 | -------------------------------------------------------------------------------- /program6.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "938a2c62", 6 | "metadata": { 7 | "papermill": { 8 | "duration": 0.002428, 9 | "end_time": "2024-07-21T18:28:56.380650", 10 | "exception": false, 11 | "start_time": "2024-07-21T18:28:56.378222", 12 | "status": "completed" 13 | }, 14 | "tags": [] 15 | }, 16 | "source": [ 17 | "**6)Aim: Demonstrate and Analyse the results sets obtained from Bayesian belief network Principle. Program:- Write a program to construct a Bayesian network considering medical data. Use this model to demonstrate the diagnosis of heart patients using standard Heart Disease Data Set. You can use Python ML library classes/API.**" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "id": "547613b1", 24 | "metadata": { 25 | "execution": { 26 | "iopub.execute_input": "2024-07-21T18:28:56.386295Z", 27 | "iopub.status.busy": "2024-07-21T18:28:56.385901Z", 28 | "iopub.status.idle": "2024-07-21T18:29:10.802399Z", 29 | "shell.execute_reply": "2024-07-21T18:29:10.800946Z" 30 | }, 31 | "papermill": { 32 | "duration": 14.422076, 33 | "end_time": "2024-07-21T18:29:10.804868", 34 | "exception": false, 35 | "start_time": "2024-07-21T18:28:56.382792", 36 | "status": "completed" 37 | }, 38 | "tags": [] 39 | }, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "Requirement already satisfied: numpy in /opt/conda/lib/python3.10/site-packages (1.26.4)\r\n", 46 | "Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (2.2.2)\r\n", 47 | "Collecting pgmpy\r\n", 48 | " Downloading pgmpy-0.1.25-py3-none-any.whl.metadata (6.4 kB)\r\n", 49 | "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas) (2.9.0.post0)\r\n", 50 | "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas) (2023.3.post1)\r\n", 51 | "Requirement already satisfied: tzdata>=2022.7 in /opt/conda/lib/python3.10/site-packages (from pandas) (2023.4)\r\n", 52 | "Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from pgmpy) (3.2.1)\r\n", 53 | "Requirement already satisfied: scipy in /opt/conda/lib/python3.10/site-packages (from pgmpy) (1.11.4)\r\n", 54 | "Requirement already satisfied: scikit-learn in /opt/conda/lib/python3.10/site-packages (from pgmpy) (1.2.2)\r\n", 55 | "Requirement already satisfied: pyparsing in /opt/conda/lib/python3.10/site-packages (from pgmpy) (3.1.1)\r\n", 56 | "Requirement already satisfied: torch in /opt/conda/lib/python3.10/site-packages (from pgmpy) (2.1.2+cpu)\r\n", 57 | "Requirement already satisfied: statsmodels in /opt/conda/lib/python3.10/site-packages (from pgmpy) (0.14.1)\r\n", 58 | "Requirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from pgmpy) (4.66.4)\r\n", 59 | "Requirement already satisfied: joblib in /opt/conda/lib/python3.10/site-packages (from pgmpy) (1.4.2)\r\n", 60 | "Requirement already satisfied: opt-einsum in /opt/conda/lib/python3.10/site-packages (from pgmpy) (3.3.0)\r\n", 61 | "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\r\n", 62 | "Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->pgmpy) (3.2.0)\r\n", 63 | "Requirement already satisfied: patsy>=0.5.4 in /opt/conda/lib/python3.10/site-packages (from statsmodels->pgmpy) (0.5.6)\r\n", 64 | "Requirement already satisfied: packaging>=21.3 in /opt/conda/lib/python3.10/site-packages (from statsmodels->pgmpy) (21.3)\r\n", 65 | "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch->pgmpy) (3.13.1)\r\n", 66 | "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch->pgmpy) (4.9.0)\r\n", 67 | "Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch->pgmpy) (1.13.0)\r\n", 68 | "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch->pgmpy) (3.1.2)\r\n", 69 | "Requirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from torch->pgmpy) (2024.5.0)\r\n", 70 | "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch->pgmpy) (2.1.3)\r\n", 71 | "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from sympy->torch->pgmpy) (1.3.0)\r\n", 72 | "Downloading pgmpy-0.1.25-py3-none-any.whl (2.0 MB)\r\n", 73 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m20.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", 74 | "\u001b[?25hInstalling collected packages: pgmpy\r\n", 75 | "Successfully installed pgmpy-0.1.25\r\n", 76 | "Note: you may need to restart the kernel to use updated packages.\n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "pip install numpy pandas pgmpy" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 2, 87 | "id": "ebc6b632", 88 | "metadata": { 89 | "execution": { 90 | "iopub.execute_input": "2024-07-21T18:29:10.813026Z", 91 | "iopub.status.busy": "2024-07-21T18:29:10.812065Z", 92 | "iopub.status.idle": "2024-07-21T18:29:19.482367Z", 93 | "shell.execute_reply": "2024-07-21T18:29:19.481285Z" 94 | }, 95 | "papermill": { 96 | "duration": 8.67744, 97 | "end_time": "2024-07-21T18:29:19.485274", 98 | "exception": false, 99 | "start_time": "2024-07-21T18:29:10.807834", 100 | "status": "completed" 101 | }, 102 | "tags": [] 103 | }, 104 | "outputs": [ 105 | { 106 | "name": "stdout", 107 | "output_type": "stream", 108 | "text": [ 109 | "Sample instances from the dataset are given below:\n", 110 | " age gender cp trestbps chol fbs restecg thalach exang oldpeak \\\n", 111 | "0 63 1 1 145 233 1 2 150 0 2.3 \n", 112 | "1 67 1 4 160 286 0 2 108 1 1.5 \n", 113 | "2 67 1 4 120 229 0 2 129 1 2.6 \n", 114 | "3 37 1 3 130 250 0 0 187 0 3.5 \n", 115 | "4 41 0 2 130 204 0 2 172 0 1.4 \n", 116 | "\n", 117 | " slope ca thal heartdisease \n", 118 | "0 3 0 6 0 \n", 119 | "1 2 3 3 2 \n", 120 | "2 2 2 7 1 \n", 121 | "3 3 0 3 0 \n", 122 | "4 1 0 3 0 \n", 123 | "\n", 124 | "Attributes and datatypes:\n", 125 | "age int64\n", 126 | "gender int64\n", 127 | "cp int64\n", 128 | "trestbps int64\n", 129 | "chol int64\n", 130 | "fbs int64\n", 131 | "restecg int64\n", 132 | "thalach int64\n", 133 | "exang int64\n", 134 | "oldpeak float64\n", 135 | "slope int64\n", 136 | "ca object\n", 137 | "thal object\n", 138 | "heartdisease int64\n", 139 | "dtype: object\n", 140 | "\n", 141 | "Column names in the dataset:\n", 142 | "Index(['age', 'gender', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',\n", 143 | " 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'heartdisease'],\n", 144 | " dtype='object')\n", 145 | "\n", 146 | "Learning CPD using Maximum likelihood estimators\n", 147 | "\n", 148 | "Inferencing with Bayesian Network:\n", 149 | "\n", 150 | "1. Probability of HeartDisease given evidence= restecg\n", 151 | "+-----------------+---------------------+\n", 152 | "| heartdisease | phi(heartdisease) |\n", 153 | "+=================+=====================+\n", 154 | "| heartdisease(0) | 0.1386 |\n", 155 | "+-----------------+---------------------+\n", 156 | "| heartdisease(1) | 0.0000 |\n", 157 | "+-----------------+---------------------+\n", 158 | "| heartdisease(2) | 0.2403 |\n", 159 | "+-----------------+---------------------+\n", 160 | "| heartdisease(3) | 0.2174 |\n", 161 | "+-----------------+---------------------+\n", 162 | "| heartdisease(4) | 0.4036 |\n", 163 | "+-----------------+---------------------+\n", 164 | "\n", 165 | "2. Probability of HeartDisease given evidence= cp\n", 166 | "+-----------------+---------------------+\n", 167 | "| heartdisease | phi(heartdisease) |\n", 168 | "+=================+=====================+\n", 169 | "| heartdisease(0) | 0.3791 |\n", 170 | "+-----------------+---------------------+\n", 171 | "| heartdisease(1) | 0.1944 |\n", 172 | "+-----------------+---------------------+\n", 173 | "| heartdisease(2) | 0.1533 |\n", 174 | "+-----------------+---------------------+\n", 175 | "| heartdisease(3) | 0.1355 |\n", 176 | "+-----------------+---------------------+\n", 177 | "| heartdisease(4) | 0.1377 |\n", 178 | "+-----------------+---------------------+\n" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "import numpy as np\n", 184 | "import pandas as pd\n", 185 | "from pgmpy.estimators import MaximumLikelihoodEstimator\n", 186 | "from pgmpy.models import BayesianModel\n", 187 | "from pgmpy.inference import VariableElimination\n", 188 | "\n", 189 | "# Load the dataset\n", 190 | "heartDisease = pd.read_csv('/kaggle/input/heart-disease-csv/heartdisease.csv')\n", 191 | "heartDisease = heartDisease.replace('?', np.nan)\n", 192 | "\n", 193 | "# Print sample instances and data types\n", 194 | "print('Sample instances from the dataset are given below:')\n", 195 | "print(heartDisease.head())\n", 196 | "\n", 197 | "print('\\nAttributes and datatypes:')\n", 198 | "print(heartDisease.dtypes)\n", 199 | "\n", 200 | "# Ensure the column names are correct\n", 201 | "print('\\nColumn names in the dataset:')\n", 202 | "print(heartDisease.columns)\n", 203 | "\n", 204 | "# Define the model structure with the correct column names\n", 205 | "model = BayesianModel([\n", 206 | " ('age', 'heartdisease'), # 'heartdisease' corresponds to the target variable\n", 207 | " ('exang', 'heartdisease'),\n", 208 | " ('cp', 'heartdisease'),\n", 209 | " ('heartdisease', 'restecg'),\n", 210 | " ('heartdisease', 'chol')\n", 211 | "])\n", 212 | "\n", 213 | "# Learning CPD using Maximum Likelihood Estimators\n", 214 | "print('\\nLearning CPD using Maximum likelihood estimators')\n", 215 | "model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)\n", 216 | "\n", 217 | "# Inferencing with Bayesian Network\n", 218 | "print('\\nInferencing with Bayesian Network:')\n", 219 | "HeartDisease_infer = VariableElimination(model)\n", 220 | "\n", 221 | "# Queries\n", 222 | "print('\\n1. Probability of HeartDisease given evidence= restecg')\n", 223 | "q1 = HeartDisease_infer.query(variables=['heartdisease'], evidence={'restecg': 1})\n", 224 | "print(q1)\n", 225 | "\n", 226 | "print('\\n2. Probability of HeartDisease given evidence= cp')\n", 227 | "q2 = HeartDisease_infer.query(variables=['heartdisease'], evidence={'cp': 1})\n", 228 | "print(q2)\n" 229 | ] 230 | } 231 | ], 232 | "metadata": { 233 | "kaggle": { 234 | "accelerator": "none", 235 | "dataSources": [ 236 | { 237 | "datasetId": 5424296, 238 | "sourceId": 9004019, 239 | "sourceType": "datasetVersion" 240 | } 241 | ], 242 | "dockerImageVersionId": 30746, 243 | "isGpuEnabled": false, 244 | "isInternetEnabled": true, 245 | "language": "python", 246 | "sourceType": "notebook" 247 | }, 248 | "kernelspec": { 249 | "display_name": "Python 3", 250 | "language": "python", 251 | "name": "python3" 252 | }, 253 | "language_info": { 254 | "codemirror_mode": { 255 | "name": "ipython", 256 | "version": 3 257 | }, 258 | "file_extension": ".py", 259 | "mimetype": "text/x-python", 260 | "name": "python", 261 | "nbconvert_exporter": "python", 262 | "pygments_lexer": "ipython3", 263 | "version": "3.10.13" 264 | }, 265 | "papermill": { 266 | "default_parameters": {}, 267 | "duration": 27.002744, 268 | "end_time": "2024-07-21T18:29:20.611726", 269 | "environment_variables": {}, 270 | "exception": null, 271 | "input_path": "__notebook__.ipynb", 272 | "output_path": "__notebook__.ipynb", 273 | "parameters": {}, 274 | "start_time": "2024-07-21T18:28:53.608982", 275 | "version": "2.5.0" 276 | } 277 | }, 278 | "nbformat": 4, 279 | "nbformat_minor": 5 280 | } 281 | -------------------------------------------------------------------------------- /program10.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "972bb47e", 6 | "metadata": { 7 | "papermill": { 8 | "duration": 0.002727, 9 | "end_time": "2024-07-21T18:55:01.677684", 10 | "exception": false, 11 | "start_time": "2024-07-21T18:55:01.674957", 12 | "status": "completed" 13 | }, 14 | "tags": [] 15 | }, 16 | "source": [ 17 | "**10. Implement and demonstrate the working of SVM algorithm for classification.**" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "id": "65e76d7c", 24 | "metadata": { 25 | "execution": { 26 | "iopub.execute_input": "2024-07-21T18:55:01.684070Z", 27 | "iopub.status.busy": "2024-07-21T18:55:01.683648Z", 28 | "iopub.status.idle": "2024-07-21T18:55:03.770743Z", 29 | "shell.execute_reply": "2024-07-21T18:55:03.769465Z" 30 | }, 31 | "papermill": { 32 | "duration": 2.093758, 33 | "end_time": "2024-07-21T18:55:03.773783", 34 | "exception": false, 35 | "start_time": "2024-07-21T18:55:01.680025", 36 | "status": "completed" 37 | }, 38 | "tags": [] 39 | }, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "Accuracy: 0.7333333333333333\n" 46 | ] 47 | }, 48 | { 49 | "data": { 50 | "image/png": "", 51 | "text/plain": [ 52 | "
" 53 | ] 54 | }, 55 | "metadata": {}, 56 | "output_type": "display_data" 57 | } 58 | ], 59 | "source": [ 60 | "# Importing necessary libraries\n", 61 | "import numpy as np\n", 62 | "import matplotlib.pyplot as plt\n", 63 | "from sklearn import datasets\n", 64 | "from sklearn.model_selection import train_test_split\n", 65 | "from sklearn.preprocessing import StandardScaler\n", 66 | "from sklearn.svm import SVC\n", 67 | "from sklearn.metrics import accuracy_score\n", 68 | "\n", 69 | "# Load the iris dataset\n", 70 | "iris = datasets.load_iris()\n", 71 | "X = iris.data[:, :2] # Taking only the first two features for simplicity\n", 72 | "y = iris.target\n", 73 | "\n", 74 | "# Splitting the dataset into training and testing sets\n", 75 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n", 76 | "\n", 77 | "# Feature scaling\n", 78 | "sc = StandardScaler()\n", 79 | "X_train = sc.fit_transform(X_train)\n", 80 | "X_test = sc.transform(X_test)\n", 81 | "\n", 82 | "# Training the SVM model\n", 83 | "svm_classifier = SVC(kernel='linear', random_state=42)\n", 84 | "svm_classifier.fit(X_train, y_train)\n", 85 | "\n", 86 | "# Predicting the test set results\n", 87 | "y_pred = svm_classifier.predict(X_test)\n", 88 | "\n", 89 | "# Calculating the accuracy of the model\n", 90 | "accuracy = accuracy_score(y_test, y_pred)\n", 91 | "print(\"Accuracy:\", accuracy)\n", 92 | "\n", 93 | "# Visualizing the decision boundary\n", 94 | "def plot_decision_boundary(classifier, X, y):\n", 95 | " x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n", 96 | " y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n", 97 | " xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),\n", 98 | " np.arange(y_min, y_max, 0.1))\n", 99 | " Z = classifier.predict(np.c_[xx.ravel(), yy.ravel()])\n", 100 | " Z = Z.reshape(xx.shape)\n", 101 | " plt.contourf(xx, yy, Z, alpha=0.4, cmap=plt.cm.coolwarm)\n", 102 | " plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolors='k', cmap=plt.cm.coolwarm)\n", 103 | " plt.xlabel('Feature 1')\n", 104 | " plt.ylabel('Feature 2')\n", 105 | " plt.title('SVM Decision Boundary')\n", 106 | " plt.show()\n", 107 | "\n", 108 | "# Plotting decision boundary\n", 109 | "plot_decision_boundary(svm_classifier, X_train, y_train)\n" 110 | ] 111 | } 112 | ], 113 | "metadata": { 114 | "kaggle": { 115 | "accelerator": "none", 116 | "dataSources": [], 117 | "dockerImageVersionId": 30746, 118 | "isGpuEnabled": false, 119 | "isInternetEnabled": false, 120 | "language": "python", 121 | "sourceType": "notebook" 122 | }, 123 | "kernelspec": { 124 | "display_name": "Python 3", 125 | "language": "python", 126 | "name": "python3" 127 | }, 128 | "language_info": { 129 | "codemirror_mode": { 130 | "name": "ipython", 131 | "version": 3 132 | }, 133 | "file_extension": ".py", 134 | "mimetype": "text/x-python", 135 | "name": "python", 136 | "nbconvert_exporter": "python", 137 | "pygments_lexer": "ipython3", 138 | "version": "3.10.13" 139 | }, 140 | "papermill": { 141 | "default_parameters": {}, 142 | "duration": 5.97488, 143 | "end_time": "2024-07-21T18:55:04.301514", 144 | "environment_variables": {}, 145 | "exception": null, 146 | "input_path": "__notebook__.ipynb", 147 | "output_path": "__notebook__.ipynb", 148 | "parameters": {}, 149 | "start_time": "2024-07-21T18:54:58.326634", 150 | "version": "2.5.0" 151 | } 152 | }, 153 | "nbformat": 4, 154 | "nbformat_minor": 5 155 | } 156 | -------------------------------------------------------------------------------- /program7.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "4a19aa53", 6 | "metadata": { 7 | "papermill": { 8 | "duration": 0.003095, 9 | "end_time": "2024-07-21T18:41:38.854166", 10 | "exception": false, 11 | "start_time": "2024-07-21T18:41:38.851071", 12 | "status": "completed" 13 | }, 14 | "tags": [] 15 | }, 16 | "source": [ 17 | "**7)Aim: Implement and demonstrate the working model of K-means clustering algorithm with Expectation Maximization Concept. Program: Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set for clustering using k-Means algorithm. Compare the results of these two algorithms and comment on the quality of clustering. You can add Python ML library classes/API in the program.**" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "id": "4c01320a", 24 | "metadata": { 25 | "execution": { 26 | "iopub.execute_input": "2024-07-21T18:41:38.861223Z", 27 | "iopub.status.busy": "2024-07-21T18:41:38.860794Z", 28 | "iopub.status.idle": "2024-07-21T18:41:43.054309Z", 29 | "shell.execute_reply": "2024-07-21T18:41:43.052802Z" 30 | }, 31 | "papermill": { 32 | "duration": 4.201147, 33 | "end_time": "2024-07-21T18:41:43.058389", 34 | "exception": false, 35 | "start_time": "2024-07-21T18:41:38.857242", 36 | "status": "completed" 37 | }, 38 | "tags": [] 39 | }, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | " 5.1 3.5 1.4 0.2 Iris-setosa\n", 46 | "0 4.9 3.0 1.4 0.2 Iris-setosa\n", 47 | "1 4.7 3.2 1.3 0.2 Iris-setosa\n", 48 | "2 4.6 3.1 1.5 0.2 Iris-setosa\n", 49 | "3 5.0 3.6 1.4 0.2 Iris-setosa\n", 50 | "4 5.4 3.9 1.7 0.4 Iris-setosa\n", 51 | "Index(['5.1', '3.5', '1.4', '0.2', 'Iris-setosa'], dtype='object')\n", 52 | "Number of columns: 5\n", 53 | "The Adjusted Rand Index of K-Means: 0.6150051194844962\n", 54 | "The Silhouette Score of K-Means: 0.45655541185952575\n" 55 | ] 56 | }, 57 | { 58 | "name": "stderr", 59 | "output_type": "stream", 60 | "text": [ 61 | "/opt/conda/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n", 62 | " warnings.warn(\n" 63 | ] 64 | }, 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "The Adjusted Rand Index of GMM: 0.9025775147114491\n", 70 | "The Silhouette Score of GMM: 0.37020510233493803\n" 71 | ] 72 | }, 73 | { 74 | "data": { 75 | "image/png": "", 76 | "text/plain": [ 77 | "
" 78 | ] 79 | }, 80 | "metadata": {}, 81 | "output_type": "display_data" 82 | } 83 | ], 84 | "source": [ 85 | "import pandas as pd\n", 86 | "import numpy as np\n", 87 | "import matplotlib.pyplot as plt\n", 88 | "from sklearn.cluster import KMeans\n", 89 | "from sklearn.mixture import GaussianMixture\n", 90 | "from sklearn.preprocessing import StandardScaler\n", 91 | "from sklearn.metrics import silhouette_score, adjusted_rand_score\n", 92 | "\n", 93 | "# Load the dataset\n", 94 | "csv_path = \"/kaggle/input/iris2-csv/iris 2.csv\"\n", 95 | "dataset = pd.read_csv(csv_path)\n", 96 | "\n", 97 | "# Inspect columns\n", 98 | "print(dataset.head())\n", 99 | "print(dataset.columns)\n", 100 | "print(f\"Number of columns: {len(dataset.columns)}\")\n", 101 | "\n", 102 | "# Update column names based on inspection\n", 103 | "# Ensure the number of names matches the number of columns in the dataset\n", 104 | "names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']\n", 105 | "dataset.columns = names\n", 106 | "\n", 107 | "# Prepare features and labels\n", 108 | "X = dataset.iloc[:, :-1] # Features\n", 109 | "y = dataset['class'].map({'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}) # Labels\n", 110 | "\n", 111 | "# Standardize features\n", 112 | "scaler = StandardScaler()\n", 113 | "X_scaled = scaler.fit_transform(X)\n", 114 | "\n", 115 | "plt.figure(figsize=(18, 6))\n", 116 | "\n", 117 | "# REAL PLOT\n", 118 | "plt.subplot(1, 3, 1)\n", 119 | "plt.title('Real')\n", 120 | "colormap = np.array(['red', 'lime', 'black'])\n", 121 | "plt.scatter(X_scaled[:, 2], X_scaled[:, 3], c=colormap[y])\n", 122 | "\n", 123 | "# K-Means Clustering\n", 124 | "kmeans_model = KMeans(n_clusters=3, random_state=0).fit(X_scaled)\n", 125 | "y_kmeans = kmeans_model.labels_\n", 126 | "plt.subplot(1, 3, 2)\n", 127 | "plt.title('KMeans')\n", 128 | "plt.scatter(X_scaled[:, 2], X_scaled[:, 3], c=colormap[y_kmeans])\n", 129 | "print('The Adjusted Rand Index of K-Means: ', adjusted_rand_score(y, y_kmeans))\n", 130 | "print('The Silhouette Score of K-Means: ', silhouette_score(X_scaled, y_kmeans))\n", 131 | "\n", 132 | "# Gaussian Mixture Model (GMM)\n", 133 | "gmm_model = GaussianMixture(n_components=3, random_state=0).fit(X_scaled)\n", 134 | "y_gmm = gmm_model.predict(X_scaled)\n", 135 | "plt.subplot(1, 3, 3)\n", 136 | "plt.title('GMM Classification')\n", 137 | "plt.scatter(X_scaled[:, 2], X_scaled[:, 3], c=colormap[y_gmm])\n", 138 | "print('The Adjusted Rand Index of GMM: ', adjusted_rand_score(y, y_gmm))\n", 139 | "print('The Silhouette Score of GMM: ', silhouette_score(X_scaled, y_gmm))\n", 140 | "\n", 141 | "plt.show()\n" 142 | ] 143 | } 144 | ], 145 | "metadata": { 146 | "kaggle": { 147 | "accelerator": "none", 148 | "dataSources": [ 149 | { 150 | "datasetId": 5424909, 151 | "sourceId": 9004888, 152 | "sourceType": "datasetVersion" 153 | } 154 | ], 155 | "dockerImageVersionId": 30746, 156 | "isGpuEnabled": false, 157 | "isInternetEnabled": true, 158 | "language": "python", 159 | "sourceType": "notebook" 160 | }, 161 | "kernelspec": { 162 | "display_name": "Python 3", 163 | "language": "python", 164 | "name": "python3" 165 | }, 166 | "language_info": { 167 | "codemirror_mode": { 168 | "name": "ipython", 169 | "version": 3 170 | }, 171 | "file_extension": ".py", 172 | "mimetype": "text/x-python", 173 | "name": "python", 174 | "nbconvert_exporter": "python", 175 | "pygments_lexer": "ipython3", 176 | "version": "3.10.13" 177 | }, 178 | "papermill": { 179 | "default_parameters": {}, 180 | "duration": 8.083042, 181 | "end_time": "2024-07-21T18:41:43.685526", 182 | "environment_variables": {}, 183 | "exception": null, 184 | "input_path": "__notebook__.ipynb", 185 | "output_path": "__notebook__.ipynb", 186 | "parameters": {}, 187 | "start_time": "2024-07-21T18:41:35.602484", 188 | "version": "2.5.0" 189 | } 190 | }, 191 | "nbformat": 4, 192 | "nbformat_minor": 5 193 | } 194 | --------------------------------------------------------------------------------