└── Healthcare Disease Prediction System.ipynb /Healthcare Disease Prediction System.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "8d64d72b", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "Training Accuracy: 1.00\n", 14 | "Test Accuracy: 0.24\n", 15 | "\n", 16 | "Example Prediction:\n", 17 | "Predicted Disease: Diabetes\n", 18 | "Probabilities:\n", 19 | "Diabetes: 0.36\n", 20 | "Healthy: 0.22\n", 21 | "Heart Disease: 0.33\n", 22 | "Hypertension: 0.09\n", 23 | "\n", 24 | "DISCLAIMER: This is a synthetic example for demonstration purposes only.\n", 25 | "NOT TO BE USED FOR ACTUAL MEDICAL DIAGNOSIS OR DECISION MAKING.\n" 26 | ] 27 | } 28 | ], 29 | "source": [ 30 | "import pandas as pd\n", 31 | "import numpy as np\n", 32 | "from sklearn.model_selection import train_test_split\n", 33 | "from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler\n", 34 | "from sklearn.ensemble import RandomForestClassifier\n", 35 | "from sklearn.metrics import accuracy_score\n", 36 | "\n", 37 | "# Generate synthetic data\n", 38 | "np.random.seed(42)\n", 39 | "n_samples = 1000\n", 40 | "\n", 41 | "data = {\n", 42 | " 'age': np.random.randint(18, 80, size=n_samples),\n", 43 | " 'gender': np.random.choice(['Male', 'Female'], size=n_samples),\n", 44 | " 'blood_pressure': np.random.choice(['Normal', 'High', 'Very High'], size=n_samples),\n", 45 | " 'cholesterol': np.random.randint(120, 300, size=n_samples),\n", 46 | " 'bmi': np.round(np.random.uniform(18.5, 40.0, size=n_samples), 1),\n", 47 | " 'blood_sugar': np.random.randint(70, 200, size=n_samples),\n", 48 | " 'disease': np.random.choice(['Diabetes', 'Heart Disease', 'Hypertension', 'Healthy'], \n", 49 | " size=n_samples, p=[0.2, 0.25, 0.25, 0.3])\n", 50 | "}\n", 51 | "\n", 52 | "df = pd.DataFrame(data)\n", 53 | "\n", 54 | "# Split data into features and target\n", 55 | "X = df.drop('disease', axis=1)\n", 56 | "y = df['disease']\n", 57 | "\n", 58 | "# Split into train and test sets\n", 59 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", 60 | "\n", 61 | "# Preprocessing setup\n", 62 | "numerical_features = ['age', 'cholesterol', 'bmi', 'blood_sugar']\n", 63 | "categorical_features = ['gender', 'blood_pressure']\n", 64 | "\n", 65 | "# Initialize encoders and scaler\n", 66 | "gender_encoder = LabelEncoder()\n", 67 | "bp_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')\n", 68 | "scaler = StandardScaler()\n", 69 | "\n", 70 | "# Process training data\n", 71 | "X_train_processed = X_train.copy()\n", 72 | "\n", 73 | "# Encode gender\n", 74 | "X_train_processed['gender'] = gender_encoder.fit_transform(X_train['gender'])\n", 75 | "\n", 76 | "# One-hot encode blood pressure\n", 77 | "bp_train = bp_encoder.fit_transform(X_train[['blood_pressure']])\n", 78 | "bp_columns = bp_encoder.get_feature_names_out(['blood_pressure'])\n", 79 | "bp_train_df = pd.DataFrame(bp_train, columns=bp_columns, index=X_train_processed.index)\n", 80 | "\n", 81 | "# Scale numerical features\n", 82 | "X_train_processed[numerical_features] = scaler.fit_transform(X_train[numerical_features])\n", 83 | "\n", 84 | "# Combine all features\n", 85 | "X_train_processed = pd.concat([\n", 86 | " X_train_processed.drop('blood_pressure', axis=1),\n", 87 | " bp_train_df\n", 88 | "], axis=1)\n", 89 | "\n", 90 | "# Process test data\n", 91 | "X_test_processed = X_test.copy()\n", 92 | "\n", 93 | "# Encode gender\n", 94 | "X_test_processed['gender'] = gender_encoder.transform(X_test['gender'])\n", 95 | "\n", 96 | "# One-hot encode blood pressure\n", 97 | "bp_test = bp_encoder.transform(X_test[['blood_pressure']])\n", 98 | "bp_test_df = pd.DataFrame(bp_test, columns=bp_columns, index=X_test_processed.index)\n", 99 | "\n", 100 | "# Scale numerical features\n", 101 | "X_test_processed[numerical_features] = scaler.transform(X_test[numerical_features])\n", 102 | "\n", 103 | "# Combine all features\n", 104 | "X_test_processed = pd.concat([\n", 105 | " X_test_processed.drop('blood_pressure', axis=1),\n", 106 | " bp_test_df\n", 107 | "], axis=1)\n", 108 | "\n", 109 | "# Train model\n", 110 | "model = RandomForestClassifier(n_estimators=100, random_state=42)\n", 111 | "model.fit(X_train_processed, y_train)\n", 112 | "\n", 113 | "# Evaluate\n", 114 | "train_preds = model.predict(X_train_processed)\n", 115 | "test_preds = model.predict(X_test_processed)\n", 116 | "\n", 117 | "print(f\"Training Accuracy: {accuracy_score(y_train, train_preds):.2f}\")\n", 118 | "print(f\"Test Accuracy: {accuracy_score(y_test, test_preds):.2f}\")\n", 119 | "\n", 120 | "# Prediction function\n", 121 | "def predict_disease(age, gender, blood_pressure, cholesterol, bmi, blood_sugar):\n", 122 | " # Create input dataframe\n", 123 | " input_data = pd.DataFrame([{\n", 124 | " 'age': age,\n", 125 | " 'gender': gender,\n", 126 | " 'blood_pressure': blood_pressure,\n", 127 | " 'cholesterol': cholesterol,\n", 128 | " 'bmi': bmi,\n", 129 | " 'blood_sugar': blood_sugar\n", 130 | " }])\n", 131 | " \n", 132 | " # Preprocess input\n", 133 | " processed_data = input_data.copy()\n", 134 | " \n", 135 | " # Encode gender\n", 136 | " processed_data['gender'] = gender_encoder.transform(processed_data['gender'])\n", 137 | " \n", 138 | " # One-hot encode blood pressure\n", 139 | " bp_input = bp_encoder.transform(processed_data[['blood_pressure']])\n", 140 | " bp_input_df = pd.DataFrame(bp_input, columns=bp_columns, index=processed_data.index)\n", 141 | " \n", 142 | " # Scale numerical features\n", 143 | " processed_data[numerical_features] = scaler.transform(processed_data[numerical_features])\n", 144 | " \n", 145 | " # Combine features\n", 146 | " processed_data = pd.concat([\n", 147 | " processed_data.drop('blood_pressure', axis=1),\n", 148 | " bp_input_df\n", 149 | " ], axis=1)\n", 150 | " \n", 151 | " # Make prediction\n", 152 | " prediction = model.predict(processed_data)\n", 153 | " probabilities = model.predict_proba(processed_data)\n", 154 | " \n", 155 | " return {\n", 156 | " 'disease': prediction[0],\n", 157 | " 'probabilities': dict(zip(model.classes_, probabilities[0]))\n", 158 | " }\n", 159 | "\n", 160 | "# Example usage\n", 161 | "sample_input = {\n", 162 | " 'age': 45,\n", 163 | " 'gender': 'Female',\n", 164 | " 'blood_pressure': 'High',\n", 165 | " 'cholesterol': 220,\n", 166 | " 'bmi': 28.5,\n", 167 | " 'blood_sugar': 150\n", 168 | "}\n", 169 | "\n", 170 | "prediction = predict_disease(**sample_input)\n", 171 | "print(\"\\nExample Prediction:\")\n", 172 | "print(f\"Predicted Disease: {prediction['disease']}\")\n", 173 | "print(\"Probabilities:\")\n", 174 | "for disease, prob in prediction['probabilities'].items():\n", 175 | " print(f\"{disease}: {prob:.2f}\")\n", 176 | "\n", 177 | "# Important Disclaimer\n", 178 | "print(\"\\nDISCLAIMER: This is a synthetic example for demonstration purposes only.\")\n", 179 | "print(\"NOT TO BE USED FOR ACTUAL MEDICAL DIAGNOSIS OR DECISION MAKING.\")" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "id": "e7760eb3", 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [] 189 | } 190 | ], 191 | "metadata": { 192 | "kernelspec": { 193 | "display_name": "Python 3 (ipykernel)", 194 | "language": "python", 195 | "name": "python3" 196 | }, 197 | "language_info": { 198 | "codemirror_mode": { 199 | "name": "ipython", 200 | "version": 3 201 | }, 202 | "file_extension": ".py", 203 | "mimetype": "text/x-python", 204 | "name": "python", 205 | "nbconvert_exporter": "python", 206 | "pygments_lexer": "ipython3", 207 | "version": "3.10.9" 208 | } 209 | }, 210 | "nbformat": 4, 211 | "nbformat_minor": 5 212 | } 213 | --------------------------------------------------------------------------------