├── ANN ├── ANN_Architecture.png ├── Codes │ ├── artificial_neural_network.ipynb │ └── artificial_neural_network.py └── Dataset │ ├── Churn_Modelling.csv │ └── Folds5x2_pp.xlsx ├── Algorithm └── Gradient_Descent.ipynb ├── Associative Rule Learning ├── Apriori_Algorithm_using_Market_basket_optimisation_Dataset.ipynb └── Apriori_Algorithm_using_the_Groceries_Dataset.ipynb ├── CONTRIBUTING.md ├── Classification ├── Classification_Models.ipynb ├── Decision_Tree_Classification.ipynb ├── K_Nearest_Neighbors(K_NN).ipynb ├── Logistic_Regression.ipynb ├── Naive_Bayes.ipynb ├── Random_Forest_Classification.ipynb └── Support_Vector_Machine.ipynb ├── Clustering └── k_means_clustering.ipynb ├── Convolutional Neural Network └── Dog_Cat_Prediction_Using_CNN.ipynb ├── Extraa ├── BERT_tf_hub.ipynb ├── BNLP.ipynb ├── DTALE_example.ipynb ├── Data_Preprocessing_Tools.ipynb ├── Movie_Recommender_Systems.ipynb ├── NER.ipynb ├── Plot.ipynb ├── PracticeML.ipynb ├── PythonCheatSheet.ipynb ├── Seaborn_Charts_Cheat_Sheet.ipynb ├── Simple_Text_Preprocessing.ipynb ├── Time-Series-Prediction │ └── Weather_data_Madrid.ipynb ├── data_preprocessing_tools.ipynb └── pd_np.ipynb ├── GenAI ├── Gemini │ └── SocialMediaPostDescriptionApp │ │ ├── .env.example │ │ ├── .gitignore │ │ ├── README.md │ │ ├── app.py │ │ └── requirements.txt └── OpenAI │ └── README.md ├── LICENSE ├── NLP ├── Embedding_Sequence_LSTM_Preprocessing.ipynb ├── IMDB_Review_Sentiment_Analysis.ipynb └── natural_language_processing.ipynb ├── NeuralProphet └── WeatherForecast.ipynb ├── PDF ├── MACHINE LEARNING CHEATSHEET (w chen).pdf ├── Machine Learning Interview Cheat sheets (Anwar V0.1.0.3).pdf ├── PythonCheatSheet.pdf ├── Rules of Machine Learning- Best Practices for ML Engineering.pdf ├── azure-machine-learning-algorithm-cheat-sheet-nov2019.pdf └── plotly_cheat_sheet.pdf ├── README.md ├── Recurrent Neural Network └── Embedding_Sequence_LSTM_Keras_preprocessing.ipynb └── Regression └── Linear_Regression.ipynb /ANN/ANN_Architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekramasif/Basic-Machine-Learning/71f028a0e66dda2f7ef5eff8909f81d9db61ba8f/ANN/ANN_Architecture.png -------------------------------------------------------------------------------- /ANN/Codes/artificial_neural_network.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Artificial Neural Network 3 | 4 | Automatically generated by Colaboratory. 5 | 6 | Original file is located at 7 | https://colab.research.google.com/drive/1eje9zILprgVmohMN7cKykI3fn4FBRPnF 8 | 9 | # Artificial Neural Network 10 | 11 | ### Importing the libraries 12 | """ 13 | 14 | import numpy as np 15 | import pandas as pd 16 | import tensorflow as tf 17 | 18 | tf.__version__ 19 | 20 | """## Part 1 - Data Preprocessing 21 | 22 | ### Importing the dataset 23 | """ 24 | 25 | dataset = pd.read_excel('Folds5x2_pp.xlsx') 26 | X = dataset.iloc[:, :-1].values 27 | y = dataset.iloc[:, -1].values 28 | 29 | """### Splitting the dataset into the Training set and Test set""" 30 | 31 | from sklearn.model_selection import train_test_split 32 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 33 | 34 | """## Part 2 - Building the ANN 35 | 36 | ### Initializing the ANN 37 | """ 38 | 39 | ann = tf.keras.models.Sequential() 40 | 41 | """### Adding the input layer and the first hidden layer""" 42 | 43 | ann.add(tf.keras.layers.Dense(units=6, activation='relu')) 44 | 45 | """### Adding the second hidden layer""" 46 | 47 | ann.add(tf.keras.layers.Dense(units=6, activation='relu')) 48 | 49 | """### Adding the output layer""" 50 | 51 | ann.add(tf.keras.layers.Dense(units=1)) 52 | 53 | """## Part 3 - Training the ANN 54 | 55 | ### Compiling the ANN 56 | """ 57 | 58 | ann.compile(optimizer = 'adam', loss = 'mean_squared_error') 59 | 60 | """### Training the ANN model on the Training set""" 61 | 62 | ann.fit(X_train, y_train, batch_size = 32, epochs = 100) 63 | 64 | """### Predicting the results of the Test set""" 65 | 66 | y_pred = ann.predict(X_test) 67 | np.set_printoptions(precision=2) 68 | print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1)) -------------------------------------------------------------------------------- /ANN/Dataset/Folds5x2_pp.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekramasif/Basic-Machine-Learning/71f028a0e66dda2f7ef5eff8909f81d9db61ba8f/ANN/Dataset/Folds5x2_pp.xlsx -------------------------------------------------------------------------------- /Algorithm/Gradient_Descent.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Gradient_Descent.ipynb", 7 | "provenance": [], 8 | "authorship_tag": "ABX9TyM5FLnT6y0aAJ54cUP0fJqq", 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | } 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "id": "view-in-github", 24 | "colab_type": "text" 25 | }, 26 | "source": [ 27 | "\"Open" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 26, 33 | "metadata": { 34 | "id": "wUEbcJZ9et3T" 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "import numpy as np\n", 39 | "import matplotlib.pyplot as plt" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "source": [ 45 | "x = np.array([1,2,3,4,5])\n", 46 | "y = np.array([5,7,9,11,13])" 47 | ], 48 | "metadata": { 49 | "id": "EfD3JPyMfWW4" 50 | }, 51 | "execution_count": 27, 52 | "outputs": [] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "source": [ 57 | "x, y" 58 | ], 59 | "metadata": { 60 | "colab": { 61 | "base_uri": "https://localhost:8080/" 62 | }, 63 | "id": "Eq9FXMxbfcqA", 64 | "outputId": "2c1cbdef-18cb-4443-aa20-19317ca38f5d" 65 | }, 66 | "execution_count": 28, 67 | "outputs": [ 68 | { 69 | "output_type": "execute_result", 70 | "data": { 71 | "text/plain": [ 72 | "(array([1, 2, 3, 4, 5]), array([ 5, 7, 9, 11, 13]))" 73 | ] 74 | }, 75 | "metadata": {}, 76 | "execution_count": 28 77 | } 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "source": [ 83 | "%matplotlib inline\n", 84 | "def gradient_descent(x,y):\n", 85 | " m_curr = b_curr = 0\n", 86 | " rate = 0.01\n", 87 | " n = len(x)\n", 88 | " for i in range(10000):\n", 89 | " y_predicted = m_curr * x + b_curr\n", 90 | " plt.plot(x,y_predicted,color='blue')\n", 91 | " md = -(2/n)*sum(x*(y-y_predicted))\n", 92 | " yd = -(2/n)*sum(y-y_predicted)\n", 93 | " m_curr = m_curr - rate * md\n", 94 | " b_curr = b_curr - rate * yd\n" 95 | ], 96 | "metadata": { 97 | "id": "NdZLK4Z2gC-j" 98 | }, 99 | "execution_count": 29, 100 | "outputs": [] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "source": [ 105 | "plt.scatter(x,y,color='r',marker='h',linewidth='5')\n", 106 | "gradient_descent(x,y)" 107 | ], 108 | "metadata": { 109 | "colab": { 110 | "base_uri": "https://localhost:8080/", 111 | "height": 265 112 | }, 113 | "id": "HRW9eh6SgILu", 114 | "outputId": "6fce693f-6a1a-402b-9c0e-46c9150b8507" 115 | }, 116 | "execution_count": 30, 117 | "outputs": [ 118 | { 119 | "output_type": "display_data", 120 | "data": { 121 | "image/png": "\n", 122 | "text/plain": [ 123 | "
" 124 | ] 125 | }, 126 | "metadata": { 127 | "needs_background": "light" 128 | } 129 | } 130 | ] 131 | } 132 | ] 133 | } -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Free free to contribute! 2 | ## Always share your knowledge! 3 | 4 | > Just fork and pull request! 5 | -------------------------------------------------------------------------------- /Classification/Classification_Models.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Classification_Models.ipynb", 7 | "provenance": [], 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "id": "931yqq2DShJx" 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "#@title Preparing the data\n", 38 | "# Importing libraries\n", 39 | "import pandas as pd\n", 40 | "import numpy as np\n", 41 | "pd.set_option(\"display.precision\", 4)\n", 42 | "# Metrics\n", 43 | "from sklearn.metrics import accuracy_score\n", 44 | "from sklearn.metrics import f1_score\n", 45 | "\n", 46 | "# Loading the data\n", 47 | "df = pd.read_csv('Data.csv')\n", 48 | "X = df.iloc[:, :-1].values\n", 49 | "y = df.iloc[:, -1].values\n", 50 | "\n", 51 | "# Encoding the Dependent Variable\n", 52 | "from sklearn.preprocessing import LabelEncoder\n", 53 | "le = LabelEncoder()\n", 54 | "y = le.fit_transform(y)\n", 55 | "\n", 56 | "# Splitting the data into train and test set\n", 57 | "from sklearn.model_selection import train_test_split\n", 58 | "X_train, X_test, y_train, y_test = train_test_split(X, y,\n", 59 | " test_size = 0.2,\n", 60 | " random_state = 42)\n", 61 | "\n", 62 | "# Feature scaling\n", 63 | "from sklearn.preprocessing import StandardScaler\n", 64 | "sc = StandardScaler()\n", 65 | "X_train = sc.fit_transform(X_train)\n", 66 | "X_test = sc.fit_transform(X_test)\n", 67 | "\n" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "source": [ 73 | "#@title Logistic Regression\n", 74 | "# Training the Logistic Regression model \n", 75 | "from sklearn.linear_model import LogisticRegression\n", 76 | "classifier_lr = LogisticRegression(random_state = 42)\n", 77 | "classifier_lr.fit(X_train, y_train)\n", 78 | "\n", 79 | "# Predicting the test values\n", 80 | "y_pred_lr = classifier_lr.predict(X_test)\n", 81 | "\n", 82 | "# Scoring\n", 83 | "acc_lr = accuracy_score(y_test, y_pred_lr)\n", 84 | "f1_lr = f1_score(y_test, y_pred_lr)" 85 | ], 86 | "metadata": { 87 | "id": "tEVNPMO2UuBC" 88 | }, 89 | "execution_count": null, 90 | "outputs": [] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "source": [ 95 | "#@title K-Nearest Neighbors (K-NN)\n", 96 | "# Training the K-NN model\n", 97 | "\"\"\"\n", 98 | "Power parameter for the Minkowski metric. \n", 99 | "When p = 1, this is equivalent to using manhattan_distance (l1), \n", 100 | "and euclidean_distance (l2) for p = 2. \n", 101 | "\"\"\"\n", 102 | "from sklearn.neighbors import KNeighborsClassifier\n", 103 | "classifier_knn = KNeighborsClassifier(n_neighbors = 5, p = 2, metric = 'minkowski')\n", 104 | "classifier_knn.fit(X_train, y_train)\n", 105 | "\n", 106 | "# Predicting the test values\n", 107 | "y_pred_knn = classifier_knn.predict(X_test)\n", 108 | "\n", 109 | "# Scoring\n", 110 | "acc_knn = accuracy_score(y_test, y_pred_knn)\n", 111 | "f1_knn = f1_score(y_test, y_pred_knn)" 112 | ], 113 | "metadata": { 114 | "id": "9M55P5d_WF2M" 115 | }, 116 | "execution_count": null, 117 | "outputs": [] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "source": [ 122 | "#@title Support Vector Machine (SVM)\n", 123 | "# Training the SVM Classifier model\n", 124 | "from sklearn.svm import SVC\n", 125 | "classifier_svm = SVC(kernel = 'linear', random_state = 42)\n", 126 | "classifier_svm.fit(X_train, y_train)\n", 127 | "\n", 128 | "# Predicting the test values\n", 129 | "y_pred_svm = classifier_svm.predict(X_test)\n", 130 | "\n", 131 | "# Scoring\n", 132 | "acc_svm = accuracy_score(y_test, y_pred_svm)\n", 133 | "f1_svm = f1_score(y_test, y_pred_svm)" 134 | ], 135 | "metadata": { 136 | "cellView": "form", 137 | "id": "sedJt-y_W4gh" 138 | }, 139 | "execution_count": null, 140 | "outputs": [] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "source": [ 145 | "#@title Kernel SVM\n", 146 | "# Training the Kernel SVM model\n", 147 | "from sklearn.svm import SVC\n", 148 | "classifier_ksvm = SVC(kernel = 'rbf', random_state = 42)\n", 149 | "classifier_ksvm.fit(X_train, y_train)\n", 150 | "\n", 151 | "# Predicting the test values\n", 152 | "y_pred_ksvm = classifier_ksvm.predict(X_test)\n", 153 | "\n", 154 | "# Scoring\n", 155 | "acc_ksvm = accuracy_score(y_test, y_pred_ksvm)\n", 156 | "f1_ksvm = f1_score(y_test, y_pred_ksvm)" 157 | ], 158 | "metadata": { 159 | "cellView": "form", 160 | "id": "Qb0y0DuDXZ2I" 161 | }, 162 | "execution_count": null, 163 | "outputs": [] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "source": [ 168 | "#@title Naive Bayes\n", 169 | "# Training the Naive Bayes model\n", 170 | "from sklearn.naive_bayes import GaussianNB\n", 171 | "classifier_nb = GaussianNB()\n", 172 | "classifier_nb.fit(X_train, y_train)\n", 173 | "\n", 174 | "# Predicting the test values\n", 175 | "y_pred_nb = classifier_nb.predict(X_test)\n", 176 | "\n", 177 | "# Scoring\n", 178 | "acc_nb = accuracy_score(y_test, y_pred_nb)\n", 179 | "f1_nb = f1_score(y_test, y_pred_nb)" 180 | ], 181 | "metadata": { 182 | "cellView": "form", 183 | "id": "QsMXewXrX3bR" 184 | }, 185 | "execution_count": null, 186 | "outputs": [] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "source": [ 191 | "#@title Decision Tree Classification\n", 192 | "# Training the Decision Tree model\n", 193 | "from sklearn.tree import DecisionTreeClassifier\n", 194 | "classifier_dtc = DecisionTreeClassifier(criterion = 'entropy', random_state = 42)\n", 195 | "classifier_dtc.fit(X_train, y_train)\n", 196 | "\n", 197 | "# Predicting the test values\n", 198 | "y_pred_dtc = classifier_dtc.predict(X_test)\n", 199 | "\n", 200 | "# Scoring\n", 201 | "acc_dtc = accuracy_score(y_test, y_pred_dtc)\n", 202 | "f1_dtc = f1_score(y_test, y_pred_dtc)" 203 | ], 204 | "metadata": { 205 | "cellView": "form", 206 | "id": "A4dw2EIxYSHq" 207 | }, 208 | "execution_count": null, 209 | "outputs": [] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "source": [ 214 | "#@title Random Forest Classification\n", 215 | "# Training Random Forest model\n", 216 | "from sklearn.ensemble import RandomForestClassifier\n", 217 | "classifier_rfc = RandomForestClassifier(n_estimators=100, criterion='entropy',\n", 218 | " random_state = 42)\n", 219 | "classifier_rfc.fit(X_train, y_train)\n", 220 | "\n", 221 | "# Predicting the test results\n", 222 | "y_pred_rfc = classifier_rfc.predict(X_test)\n", 223 | "\n", 224 | "# Scoring\n", 225 | "acc_rfc = accuracy_score(y_test, y_pred_rfc)\n", 226 | "f1_rfc = f1_score(y_test, y_pred_rfc)" 227 | ], 228 | "metadata": { 229 | "cellView": "form", 230 | "id": "q55gb2_WZH3Y" 231 | }, 232 | "execution_count": null, 233 | "outputs": [] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "source": [ 238 | "#@title Results\n", 239 | "scores = np.array([[acc_lr, f1_lr], [acc_knn, f1_knn], [acc_svm, f1_svm],\n", 240 | " [acc_ksvm, f1_svm], [acc_nb, f1_nb], [acc_dtc, f1_dtc],\n", 241 | " [acc_rfc, f1_rfc]])\n", 242 | "result = pd.DataFrame(data = scores,\n", 243 | " columns = ['Accuracy', 'f1 score'],\n", 244 | " index = ['LR', 'KNN', 'SVM', 'KSVM', 'NB', 'DTC', 'RTC'])\n", 245 | "result.style" 246 | ], 247 | "metadata": { 248 | "colab": { 249 | "base_uri": "https://localhost:8080/", 250 | "height": 269 251 | }, 252 | "id": "0btMvmVxZw_R", 253 | "outputId": "27042d88-33b0-4ec9-ba0f-8abbb3cfb1b2" 254 | }, 255 | "execution_count": null, 256 | "outputs": [ 257 | { 258 | "output_type": "execute_result", 259 | "data": { 260 | "text/plain": [ 261 | "" 262 | ], 263 | "text/html": [ 264 | "\n", 266 | "\n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | "
 Accuracyf1 score
LR0.93430.9174
KNN0.94890.9369
SVM0.94890.9369
KSVM0.94890.9369
NB0.95620.9483
DTC0.94890.9369
RTC0.94160.9273
\n" 312 | ] 313 | }, 314 | "metadata": {}, 315 | "execution_count": 9 316 | } 317 | ] 318 | } 319 | ] 320 | } -------------------------------------------------------------------------------- /Classification/Logistic_Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Logistic_Regression.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "machine_shape": "hm", 10 | "include_colab_link": true 11 | }, 12 | "kernelspec": { 13 | "name": "python3", 14 | "display_name": "Python 3" 15 | } 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "view-in-github", 22 | "colab_type": "text" 23 | }, 24 | "source": [ 25 | "\"Open" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "id": "0MRC0e0KhQ0S" 32 | }, 33 | "source": [ 34 | "# Logistic Regression" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "metadata": { 40 | "colab": { 41 | "base_uri": "https://localhost:8080/" 42 | }, 43 | "id": "ZyBDlIDqLn_R", 44 | "outputId": "c1ebddbc-bd7d-411d-d3c7-9ce90a20c8c3" 45 | }, 46 | "source": [ 47 | "from google.colab import drive\n", 48 | "drive.mount('/content/drive')" 49 | ], 50 | "execution_count": null, 51 | "outputs": [ 52 | { 53 | "output_type": "stream", 54 | "name": "stdout", 55 | "text": [ 56 | "Mounted at /content/drive\n" 57 | ] 58 | } 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": { 64 | "id": "LWd1UlMnhT2s" 65 | }, 66 | "source": [ 67 | "## Importing the libraries" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "metadata": { 73 | "id": "YvGPUQaHhXfL" 74 | }, 75 | "source": [ 76 | "import numpy as np\n", 77 | "import matplotlib.pyplot as plt\n", 78 | "import pandas as pd" 79 | ], 80 | "execution_count": null, 81 | "outputs": [] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": { 86 | "id": "K1VMqkGvhc3-" 87 | }, 88 | "source": [ 89 | "## Importing the dataset" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "metadata": { 95 | "id": "M52QDmyzhh9s", 96 | "colab": { 97 | "base_uri": "https://localhost:8080/", 98 | "height": 206 99 | }, 100 | "outputId": "d689084a-4f24-43be-b2a8-1d3b252f9a1d" 101 | }, 102 | "source": [ 103 | "dataset = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Datasets/50_Startups.csv')\n", 104 | "dataset.head()" 105 | ], 106 | "execution_count": null, 107 | "outputs": [ 108 | { 109 | "output_type": "execute_result", 110 | "data": { 111 | "text/html": [ 112 | "\n", 113 | "
\n", 114 | "
\n", 115 | "
\n", 116 | "\n", 129 | "\n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | "
R&D SpendAdministrationMarketing SpendStateProfit
0165349.20136897.80471784.10New York192261.83
1162597.70151377.59443898.53California191792.06
2153441.51101145.55407934.54Florida191050.39
3144372.41118671.85383199.62New York182901.99
4142107.3491391.77366168.42Florida166187.94
\n", 183 | "
\n", 184 | " \n", 194 | " \n", 195 | " \n", 232 | "\n", 233 | " \n", 257 | "
\n", 258 | "
\n", 259 | " " 260 | ], 261 | "text/plain": [ 262 | " R&D Spend Administration Marketing Spend State Profit\n", 263 | "0 165349.20 136897.80 471784.10 New York 192261.83\n", 264 | "1 162597.70 151377.59 443898.53 California 191792.06\n", 265 | "2 153441.51 101145.55 407934.54 Florida 191050.39\n", 266 | "3 144372.41 118671.85 383199.62 New York 182901.99\n", 267 | "4 142107.34 91391.77 366168.42 Florida 166187.94" 268 | ] 269 | }, 270 | "metadata": {}, 271 | "execution_count": 4 272 | } 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "source": [ 278 | "X = dataset.iloc[:, :-1].values\n", 279 | "y = dataset.iloc[:, -1].values" 280 | ], 281 | "metadata": { 282 | "id": "-s7D7xRpeZIY" 283 | }, 284 | "execution_count": null, 285 | "outputs": [] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": { 290 | "id": "YvxIPVyMhmKp" 291 | }, 292 | "source": [ 293 | "## Splitting the dataset into the Training set and Test set" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "metadata": { 299 | "id": "AVzJWAXIhxoC" 300 | }, 301 | "source": [ 302 | "from sklearn.model_selection import train_test_split\n", 303 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)" 304 | ], 305 | "execution_count": null, 306 | "outputs": [] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "metadata": { 311 | "id": "P3nS3-6r1i2B", 312 | "colab": { 313 | "base_uri": "https://localhost:8080/" 314 | }, 315 | "outputId": "04cef19c-235a-4e74-c75b-fdb275db0528" 316 | }, 317 | "source": [ 318 | "print(X_train)" 319 | ], 320 | "execution_count": null, 321 | "outputs": [ 322 | { 323 | "output_type": "stream", 324 | "name": "stdout", 325 | "text": [ 326 | "[[46426.07 157693.92 210797.67 'California']\n", 327 | " [91749.16 114175.79 294919.57 'Florida']\n", 328 | " [130298.13 145530.06 323876.68 'Florida']\n", 329 | " [119943.24 156547.42 256512.92 'Florida']\n", 330 | " [1000.23 124153.04 1903.93 'New York']\n", 331 | " [542.05 51743.15 0.0 'New York']\n", 332 | " [65605.48 153032.06 107138.38 'New York']\n", 333 | " [114523.61 122616.84 261776.23 'New York']\n", 334 | " [61994.48 115641.28 91131.24 'Florida']\n", 335 | " [63408.86 129219.61 46085.25 'California']\n", 336 | " [78013.11 121597.55 264346.06 'California']\n", 337 | " [23640.93 96189.63 148001.11 'California']\n", 338 | " [76253.86 113867.3 298664.47 'California']\n", 339 | " [15505.73 127382.3 35534.17 'New York']\n", 340 | " [120542.52 148718.95 311613.29 'New York']\n", 341 | " [91992.39 135495.07 252664.93 'California']\n", 342 | " [64664.71 139553.16 137962.62 'California']\n", 343 | " [131876.9 99814.71 362861.36 'New York']\n", 344 | " [94657.16 145077.58 282574.31 'New York']\n", 345 | " [28754.33 118546.05 172795.67 'California']\n", 346 | " [0.0 116983.8 45173.06 'California']\n", 347 | " [162597.7 151377.59 443898.53 'California']\n", 348 | " [93863.75 127320.38 249839.44 'Florida']\n", 349 | " [44069.95 51283.14 197029.42 'California']\n", 350 | " [77044.01 99281.34 140574.81 'New York']\n", 351 | " [134615.46 147198.87 127716.82 'California']\n", 352 | " [67532.53 105751.03 304768.73 'Florida']\n", 353 | " [28663.76 127056.21 201126.82 'Florida']\n", 354 | " [78389.47 153773.43 299737.29 'New York']\n", 355 | " [86419.7 153514.11 0.0 'New York']\n", 356 | " [123334.88 108679.17 304981.62 'California']\n", 357 | " [38558.51 82982.09 174999.3 'California']\n", 358 | " [1315.46 115816.21 297114.46 'Florida']\n", 359 | " [144372.41 118671.85 383199.62 'New York']\n", 360 | " [165349.2 136897.8 471784.1 'New York']\n", 361 | " [0.0 135426.92 0.0 'California']\n", 362 | " [22177.74 154806.14 28334.72 'California']]\n" 363 | ] 364 | } 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "metadata": { 370 | "id": "8dpDLojm1mVG", 371 | "colab": { 372 | "base_uri": "https://localhost:8080/" 373 | }, 374 | "outputId": "2bf9fb67-5df0-49ce-aa06-d87ab459655c" 375 | }, 376 | "source": [ 377 | "print(y_train)" 378 | ], 379 | "execution_count": null, 380 | "outputs": [ 381 | { 382 | "output_type": "stream", 383 | "name": "stdout", 384 | "text": [ 385 | "[ 96712.8 124266.9 155752.6 132602.65 64926.08 35673.41 101004.64\n", 386 | " 129917.04 99937.59 97427.84 126992.93 71498.49 118474.03 69758.98\n", 387 | " 152211.77 134307.35 107404.34 156991.12 125370.37 78239.91 14681.4\n", 388 | " 191792.06 141585.52 89949.14 108552.04 156122.51 108733.99 90708.19\n", 389 | " 111313.02 122776.86 149759.96 81005.76 49490.75 182901.99 192261.83\n", 390 | " 42559.73 65200.33]\n" 391 | ] 392 | } 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "metadata": { 398 | "id": "qbb7i0DH1qui", 399 | "colab": { 400 | "base_uri": "https://localhost:8080/" 401 | }, 402 | "outputId": "b8bf4ad7-ec5a-4041-afe4-182e67999732" 403 | }, 404 | "source": [ 405 | "print(X_test)" 406 | ], 407 | "execution_count": null, 408 | "outputs": [ 409 | { 410 | "output_type": "stream", 411 | "name": "stdout", 412 | "text": [ 413 | "[[66051.52 182645.56 118148.2 'Florida']\n", 414 | " [100671.96 91790.61 249744.55 'California']\n", 415 | " [101913.08 110594.11 229160.95 'Florida']\n", 416 | " [27892.92 84710.77 164470.71 'Florida']\n", 417 | " [153441.51 101145.55 407934.54 'Florida']\n", 418 | " [72107.6 127864.55 353183.81 'New York']\n", 419 | " [20229.59 65947.93 185265.1 'New York']\n", 420 | " [61136.38 152701.92 88218.23 'New York']\n", 421 | " [73994.56 122782.75 303319.26 'Florida']\n", 422 | " [142107.34 91391.77 366168.42 'Florida']\n", 423 | " [55493.95 103057.49 214634.81 'Florida']\n", 424 | " [46014.02 85047.44 205517.64 'New York']\n", 425 | " [75328.87 144135.98 134050.07 'Florida']]\n" 426 | ] 427 | } 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "metadata": { 433 | "id": "kj1hnFAR1s5w", 434 | "colab": { 435 | "base_uri": "https://localhost:8080/" 436 | }, 437 | "outputId": "b8219543-196c-4a81-c225-3078fe8d9182" 438 | }, 439 | "source": [ 440 | "print(y_test)" 441 | ], 442 | "execution_count": null, 443 | "outputs": [ 444 | { 445 | "output_type": "stream", 446 | "name": "stdout", 447 | "text": [ 448 | "[103282.38 144259.4 146121.95 77798.83 191050.39 105008.31 81229.06\n", 449 | " 97483.56 110352.25 166187.94 96778.92 96479.51 105733.54]\n" 450 | ] 451 | } 452 | ] 453 | }, 454 | { 455 | "cell_type": "markdown", 456 | "metadata": { 457 | "id": "kW3c7UYih0hT" 458 | }, 459 | "source": [ 460 | "## Feature Scaling (Write appropriate code for feature scaling)" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "metadata": { 466 | "id": "9fQlDPKCh8sc" 467 | }, 468 | "source": [ 469 | "" 470 | ], 471 | "execution_count": null, 472 | "outputs": [] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "metadata": { 477 | "id": "syrnD1Op2BSR", 478 | "colab": { 479 | "base_uri": "https://localhost:8080/" 480 | }, 481 | "outputId": "87b70fcf-cb09-4040-a83f-9c335c1bb052" 482 | }, 483 | "source": [ 484 | "print(X_train)" 485 | ], 486 | "execution_count": null, 487 | "outputs": [ 488 | { 489 | "output_type": "stream", 490 | "name": "stdout", 491 | "text": [ 492 | "[[46426.07 157693.92 210797.67 'California']\n", 493 | " [91749.16 114175.79 294919.57 'Florida']\n", 494 | " [130298.13 145530.06 323876.68 'Florida']\n", 495 | " [119943.24 156547.42 256512.92 'Florida']\n", 496 | " [1000.23 124153.04 1903.93 'New York']\n", 497 | " [542.05 51743.15 0.0 'New York']\n", 498 | " [65605.48 153032.06 107138.38 'New York']\n", 499 | " [114523.61 122616.84 261776.23 'New York']\n", 500 | " [61994.48 115641.28 91131.24 'Florida']\n", 501 | " [63408.86 129219.61 46085.25 'California']\n", 502 | " [78013.11 121597.55 264346.06 'California']\n", 503 | " [23640.93 96189.63 148001.11 'California']\n", 504 | " [76253.86 113867.3 298664.47 'California']\n", 505 | " [15505.73 127382.3 35534.17 'New York']\n", 506 | " [120542.52 148718.95 311613.29 'New York']\n", 507 | " [91992.39 135495.07 252664.93 'California']\n", 508 | " [64664.71 139553.16 137962.62 'California']\n", 509 | " [131876.9 99814.71 362861.36 'New York']\n", 510 | " [94657.16 145077.58 282574.31 'New York']\n", 511 | " [28754.33 118546.05 172795.67 'California']\n", 512 | " [0.0 116983.8 45173.06 'California']\n", 513 | " [162597.7 151377.59 443898.53 'California']\n", 514 | " [93863.75 127320.38 249839.44 'Florida']\n", 515 | " [44069.95 51283.14 197029.42 'California']\n", 516 | " [77044.01 99281.34 140574.81 'New York']\n", 517 | " [134615.46 147198.87 127716.82 'California']\n", 518 | " [67532.53 105751.03 304768.73 'Florida']\n", 519 | " [28663.76 127056.21 201126.82 'Florida']\n", 520 | " [78389.47 153773.43 299737.29 'New York']\n", 521 | " [86419.7 153514.11 0.0 'New York']\n", 522 | " [123334.88 108679.17 304981.62 'California']\n", 523 | " [38558.51 82982.09 174999.3 'California']\n", 524 | " [1315.46 115816.21 297114.46 'Florida']\n", 525 | " [144372.41 118671.85 383199.62 'New York']\n", 526 | " [165349.2 136897.8 471784.1 'New York']\n", 527 | " [0.0 135426.92 0.0 'California']\n", 528 | " [22177.74 154806.14 28334.72 'California']]\n" 529 | ] 530 | } 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "metadata": { 536 | "id": "JUd6iBRp2C3L", 537 | "colab": { 538 | "base_uri": "https://localhost:8080/" 539 | }, 540 | "outputId": "d7036047-e059-42a4-e4ed-698fe17c5cba" 541 | }, 542 | "source": [ 543 | "print(X_test)" 544 | ], 545 | "execution_count": null, 546 | "outputs": [ 547 | { 548 | "output_type": "stream", 549 | "name": "stdout", 550 | "text": [ 551 | "[[66051.52 182645.56 118148.2 'Florida']\n", 552 | " [100671.96 91790.61 249744.55 'California']\n", 553 | " [101913.08 110594.11 229160.95 'Florida']\n", 554 | " [27892.92 84710.77 164470.71 'Florida']\n", 555 | " [153441.51 101145.55 407934.54 'Florida']\n", 556 | " [72107.6 127864.55 353183.81 'New York']\n", 557 | " [20229.59 65947.93 185265.1 'New York']\n", 558 | " [61136.38 152701.92 88218.23 'New York']\n", 559 | " [73994.56 122782.75 303319.26 'Florida']\n", 560 | " [142107.34 91391.77 366168.42 'Florida']\n", 561 | " [55493.95 103057.49 214634.81 'Florida']\n", 562 | " [46014.02 85047.44 205517.64 'New York']\n", 563 | " [75328.87 144135.98 134050.07 'Florida']]\n" 564 | ] 565 | } 566 | ] 567 | }, 568 | { 569 | "cell_type": "markdown", 570 | "metadata": { 571 | "id": "bb6jCOCQiAmP" 572 | }, 573 | "source": [ 574 | "## Training the Logistic Regression model on the Training set" 575 | ] 576 | }, 577 | { 578 | "cell_type": "code", 579 | "metadata": { 580 | "id": "e0pFVAmciHQs" 581 | }, 582 | "source": [ 583 | "from sklearn.linear_model import LogisticRegression\n", 584 | "classifier = LogisticRegression(random_state = 0)\n", 585 | "classifier.fit(X_train, y_train)\n", 586 | "#classifier.fit(X_train, y_train)" 587 | ], 588 | "execution_count": null, 589 | "outputs": [] 590 | }, 591 | { 592 | "cell_type": "markdown", 593 | "metadata": { 594 | "id": "yyxW5b395mR2" 595 | }, 596 | "source": [ 597 | "## Predicting a new result" 598 | ] 599 | }, 600 | { 601 | "cell_type": "code", 602 | "metadata": { 603 | "id": "f8YOXsQy58rP" 604 | }, 605 | "source": [ 606 | "print(classifier.predict(sc.transform([[30,87000]])))" 607 | ], 608 | "execution_count": null, 609 | "outputs": [] 610 | }, 611 | { 612 | "cell_type": "markdown", 613 | "metadata": { 614 | "id": "vKYVQH-l5NpE" 615 | }, 616 | "source": [ 617 | "## Predicting the Test set results" 618 | ] 619 | }, 620 | { 621 | "cell_type": "code", 622 | "metadata": { 623 | "id": "p6VMTb2O4hwM" 624 | }, 625 | "source": [ 626 | "y_pred = classifier.predict(X_test)\n", 627 | "print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))" 628 | ], 629 | "execution_count": null, 630 | "outputs": [] 631 | }, 632 | { 633 | "cell_type": "markdown", 634 | "metadata": { 635 | "id": "h4Hwj34ziWQW" 636 | }, 637 | "source": [ 638 | "## Making the Confusion Matrix" 639 | ] 640 | }, 641 | { 642 | "cell_type": "code", 643 | "metadata": { 644 | "id": "D6bpZwUiiXic" 645 | }, 646 | "source": [ 647 | "from sklearn.metrics import confusion_matrix, accuracy_score\n", 648 | "cm = confusion_matrix(y_test, y_pred)\n", 649 | "print(cm)\n", 650 | "accuracy_score(y_test, y_pred)" 651 | ], 652 | "execution_count": null, 653 | "outputs": [] 654 | }, 655 | { 656 | "cell_type": "markdown", 657 | "metadata": { 658 | "id": "6OMC_P0diaoD" 659 | }, 660 | "source": [ 661 | "## Visualising the Training set results" 662 | ] 663 | }, 664 | { 665 | "cell_type": "code", 666 | "metadata": { 667 | "id": "_NOjKvZRid5l" 668 | }, 669 | "source": [ 670 | "from matplotlib.colors import ListedColormap\n", 671 | "X_set, y_set = sc.inverse_transform(X_train), y_train\n", 672 | "X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),\n", 673 | " np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))\n", 674 | "plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),\n", 675 | " alpha = 0.75, cmap = ListedColormap(('red', 'green')))\n", 676 | "plt.xlim(X1.min(), X1.max())\n", 677 | "plt.ylim(X2.min(), X2.max())\n", 678 | "for i, j in enumerate(np.unique(y_set)):\n", 679 | " plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)\n", 680 | "plt.title('Logistic Regression (Training set)')\n", 681 | "plt.xlabel('Age')\n", 682 | "plt.ylabel('Estimated Salary')\n", 683 | "plt.legend()\n", 684 | "plt.show()" 685 | ], 686 | "execution_count": null, 687 | "outputs": [] 688 | }, 689 | { 690 | "cell_type": "markdown", 691 | "metadata": { 692 | "id": "SZ-j28aPihZx" 693 | }, 694 | "source": [ 695 | "## Visualising the Test set results" 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "metadata": { 701 | "id": "qeTjz2vDilAC" 702 | }, 703 | "source": [ 704 | "from matplotlib.colors import ListedColormap\n", 705 | "X_set, y_set = sc.inverse_transform(X_test), y_test\n", 706 | "X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),\n", 707 | " np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))\n", 708 | "plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),\n", 709 | " alpha = 0.75, cmap = ListedColormap(('red', 'green')))\n", 710 | "plt.xlim(X1.min(), X1.max())\n", 711 | "plt.ylim(X2.min(), X2.max())\n", 712 | "for i, j in enumerate(np.unique(y_set)):\n", 713 | " plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)\n", 714 | "plt.title('Logistic Regression (Test set)')\n", 715 | "plt.xlabel('Age')\n", 716 | "plt.ylabel('Estimated Salary')\n", 717 | "plt.legend()\n", 718 | "plt.show()" 719 | ], 720 | "execution_count": null, 721 | "outputs": [] 722 | } 723 | ] 724 | } -------------------------------------------------------------------------------- /Convolutional Neural Network/Dog_Cat_Prediction_Using_CNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Dog Cat Prediction Using CNN.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "id": "view-in-github", 21 | "colab_type": "text" 22 | }, 23 | "source": [ 24 | "\"Open" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": { 30 | "id": "3DR-eO17geWu" 31 | }, 32 | "source": [ 33 | "# Convolutional Neural Network" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "source": [ 39 | "## Dog Cat Prediction " 40 | ], 41 | "metadata": { 42 | "id": "s_5sRChomAVK" 43 | } 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": { 48 | "id": "EMefrVPCg-60" 49 | }, 50 | "source": [ 51 | "### Importing the libraries" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "metadata": { 57 | "id": "sCV30xyVhFbE" 58 | }, 59 | "source": [ 60 | "import tensorflow as tf\n", 61 | "from keras.preprocessing.image import ImageDataGenerator" 62 | ], 63 | "execution_count": null, 64 | "outputs": [] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "metadata": { 69 | "id": "FIleuCAjoFD8" 70 | }, 71 | "source": [ 72 | "tf.__version__" 73 | ], 74 | "execution_count": null, 75 | "outputs": [] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": { 80 | "id": "oxQxCBWyoGPE" 81 | }, 82 | "source": [ 83 | "## Part 1 - Data Preprocessing" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": { 89 | "id": "MvE-heJNo3GG" 90 | }, 91 | "source": [ 92 | "### Preprocessing the Training set" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "metadata": { 98 | "id": "0koUcJMJpEBD" 99 | }, 100 | "source": [ 101 | "train_datagen = ImageDataGenerator(rescale = 1./255,\n", 102 | " shear_range = 0.2,\n", 103 | " zoom_range = 0.2,\n", 104 | " horizontal_flip = True)\n", 105 | "training_set = train_datagen.flow_from_directory('dataset/training_set',\n", 106 | " target_size = (64, 64),\n", 107 | " batch_size = 32,\n", 108 | " class_mode = 'binary')" 109 | ], 110 | "execution_count": null, 111 | "outputs": [] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": { 116 | "id": "mrCMmGw9pHys" 117 | }, 118 | "source": [ 119 | "### Preprocessing the Test set" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "metadata": { 125 | "id": "SH4WzfOhpKc3" 126 | }, 127 | "source": [ 128 | "test_datagen = ImageDataGenerator(rescale = 1./255)\n", 129 | "test_set = test_datagen.flow_from_directory('dataset/test_set',\n", 130 | " target_size = (64, 64),\n", 131 | " batch_size = 32,\n", 132 | " class_mode = 'binary')" 133 | ], 134 | "execution_count": null, 135 | "outputs": [] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": { 140 | "id": "af8O4l90gk7B" 141 | }, 142 | "source": [ 143 | "## Part 2 - Building the CNN" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": { 149 | "id": "ces1gXY2lmoX" 150 | }, 151 | "source": [ 152 | "### Initialising the CNN" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "metadata": { 158 | "id": "SAUt4UMPlhLS" 159 | }, 160 | "source": [ 161 | "cnn = tf.keras.models.Sequential()" 162 | ], 163 | "execution_count": null, 164 | "outputs": [] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": { 169 | "id": "u5YJj_XMl5LF" 170 | }, 171 | "source": [ 172 | "### Step 1 - Convolution" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "metadata": { 178 | "id": "XPzPrMckl-hV" 179 | }, 180 | "source": [ 181 | "cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[64, 64, 3]))" 182 | ], 183 | "execution_count": null, 184 | "outputs": [] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": { 189 | "id": "tf87FpvxmNOJ" 190 | }, 191 | "source": [ 192 | "### Step 2 - Pooling" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "metadata": { 198 | "id": "ncpqPl69mOac" 199 | }, 200 | "source": [ 201 | "cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))" 202 | ], 203 | "execution_count": null, 204 | "outputs": [] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": { 209 | "id": "xaTOgD8rm4mU" 210 | }, 211 | "source": [ 212 | "### Adding a second convolutional layer" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "metadata": { 218 | "id": "i_-FZjn_m8gk" 219 | }, 220 | "source": [ 221 | "cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))\n", 222 | "cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))" 223 | ], 224 | "execution_count": null, 225 | "outputs": [] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": { 230 | "id": "tmiEuvTunKfk" 231 | }, 232 | "source": [ 233 | "### Step 3 - Flattening" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "metadata": { 239 | "id": "6AZeOGCvnNZn" 240 | }, 241 | "source": [ 242 | "cnn.add(tf.keras.layers.Flatten())" 243 | ], 244 | "execution_count": null, 245 | "outputs": [] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": { 250 | "id": "dAoSECOm203v" 251 | }, 252 | "source": [ 253 | "### Step 4 - Full Connection" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "metadata": { 259 | "id": "8GtmUlLd26Nq" 260 | }, 261 | "source": [ 262 | "cnn.add(tf.keras.layers.Dense(units=128, activation='relu'))" 263 | ], 264 | "execution_count": null, 265 | "outputs": [] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": { 270 | "id": "yTldFvbX28Na" 271 | }, 272 | "source": [ 273 | "### Step 5 - Output Layer" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "metadata": { 279 | "id": "1p_Zj1Mc3Ko_" 280 | }, 281 | "source": [ 282 | "cnn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))" 283 | ], 284 | "execution_count": null, 285 | "outputs": [] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": { 290 | "id": "D6XkI90snSDl" 291 | }, 292 | "source": [ 293 | "## Part 3 - Training the CNN" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": { 299 | "id": "vfrFQACEnc6i" 300 | }, 301 | "source": [ 302 | "### Compiling the CNN" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "metadata": { 308 | "id": "NALksrNQpUlJ" 309 | }, 310 | "source": [ 311 | "cnn.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])" 312 | ], 313 | "execution_count": null, 314 | "outputs": [] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": { 319 | "id": "ehS-v3MIpX2h" 320 | }, 321 | "source": [ 322 | "### Training the CNN on the Training set and evaluating it on the Test set" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "metadata": { 328 | "id": "XUj1W4PJptta" 329 | }, 330 | "source": [ 331 | "cnn.fit(x = training_set, validation_data = test_set, epochs = 25)" 332 | ], 333 | "execution_count": null, 334 | "outputs": [] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": { 339 | "id": "U3PZasO0006Z" 340 | }, 341 | "source": [ 342 | "## Part 4 - Making a single prediction" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "metadata": { 348 | "id": "gsSiWEJY1BPB" 349 | }, 350 | "source": [ 351 | "import numpy as np\n", 352 | "from keras.preprocessing import image\n", 353 | "test_image = image.load_img('dataset/single_prediction/cat_or_dog_1.jpg', target_size = (64, 64))\n", 354 | "test_image = image.img_to_array(test_image)\n", 355 | "test_image = np.expand_dims(test_image, axis = 0)\n", 356 | "result = cnn.predict(test_image)\n", 357 | "training_set.class_indices\n", 358 | "if result[0][0] == 1:\n", 359 | " prediction = 'dog'\n", 360 | "else:\n", 361 | " prediction = 'cat'" 362 | ], 363 | "execution_count": null, 364 | "outputs": [] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "metadata": { 369 | "id": "ED9KB3I54c1i" 370 | }, 371 | "source": [ 372 | "print(prediction)" 373 | ], 374 | "execution_count": null, 375 | "outputs": [] 376 | } 377 | ] 378 | } -------------------------------------------------------------------------------- /Extraa/BNLP.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "BNLP.ipynb", 7 | "provenance": [], 8 | "authorship_tag": "ABX9TyMROqozYJdXidtM0wAAMv9i", 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | } 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "id": "view-in-github", 24 | "colab_type": "text" 25 | }, 26 | "source": [ 27 | "\"Open" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": { 34 | "colab": { 35 | "base_uri": "https://localhost:8080/" 36 | }, 37 | "id": "TNPC-NlVPBlq", 38 | "outputId": "4260ed05-a90d-4d45-e30b-e3571d405008" 39 | }, 40 | "outputs": [ 41 | { 42 | "output_type": "stream", 43 | "name": "stdout", 44 | "text": [ 45 | "Collecting bnlp_toolkit\n", 46 | " Downloading bnlp_toolkit-3.1.2-py3-none-any.whl (17 kB)\n", 47 | "Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from bnlp_toolkit) (1.4.1)\n", 48 | "Collecting sklearn-crfsuite\n", 49 | " Downloading sklearn_crfsuite-0.3.6-py2.py3-none-any.whl (12 kB)\n", 50 | "Collecting gensim==4.0.1\n", 51 | " Downloading gensim-4.0.1-cp37-cp37m-manylinux1_x86_64.whl (23.9 MB)\n", 52 | "\u001b[K |████████████████████████████████| 23.9 MB 1.4 MB/s \n", 53 | "\u001b[?25hCollecting sentencepiece\n", 54 | " Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", 55 | "\u001b[K |████████████████████████████████| 1.2 MB 63.4 MB/s \n", 56 | "\u001b[?25hRequirement already satisfied: wasabi in /usr/local/lib/python3.7/dist-packages (from bnlp_toolkit) (0.9.0)\n", 57 | "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from bnlp_toolkit) (1.19.5)\n", 58 | "Requirement already satisfied: nltk in /usr/local/lib/python3.7/dist-packages (from bnlp_toolkit) (3.2.5)\n", 59 | "Requirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.7/dist-packages (from gensim==4.0.1->bnlp_toolkit) (5.2.1)\n", 60 | "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from nltk->bnlp_toolkit) (1.15.0)\n", 61 | "Requirement already satisfied: tabulate in /usr/local/lib/python3.7/dist-packages (from sklearn-crfsuite->bnlp_toolkit) (0.8.9)\n", 62 | "Requirement already satisfied: tqdm>=2.0 in /usr/local/lib/python3.7/dist-packages (from sklearn-crfsuite->bnlp_toolkit) (4.62.3)\n", 63 | "Collecting python-crfsuite>=0.8.3\n", 64 | " Downloading python_crfsuite-0.9.7-cp37-cp37m-manylinux1_x86_64.whl (743 kB)\n", 65 | "\u001b[K |████████████████████████████████| 743 kB 56.9 MB/s \n", 66 | "\u001b[?25hInstalling collected packages: python-crfsuite, sklearn-crfsuite, sentencepiece, gensim, bnlp-toolkit\n", 67 | " Attempting uninstall: gensim\n", 68 | " Found existing installation: gensim 3.6.0\n", 69 | " Uninstalling gensim-3.6.0:\n", 70 | " Successfully uninstalled gensim-3.6.0\n", 71 | "Successfully installed bnlp-toolkit-3.1.2 gensim-4.0.1 python-crfsuite-0.9.7 sentencepiece-0.1.96 sklearn-crfsuite-0.3.6\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "!pip install bnlp_toolkit" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "source": [ 82 | "from bnlp import BasicTokenizer\n", 83 | "from bnlp import NLTKTokenizer" 84 | ], 85 | "metadata": { 86 | "id": "2GDOeiZhPOCv" 87 | }, 88 | "execution_count": 5, 89 | "outputs": [] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "source": [ 94 | "basic_tokenizer = BasicTokenizer()\n", 95 | "raw_text = \"আমি বাংলায় গান গাই।\"\n", 96 | "tokens = basic_tokenizer.tokenize(raw_text)\n", 97 | "print(tokens)" 98 | ], 99 | "metadata": { 100 | "colab": { 101 | "base_uri": "https://localhost:8080/" 102 | }, 103 | "id": "soWsVpN0PqBC", 104 | "outputId": "b084a3fe-d442-4943-d31d-44b5a116a8b4" 105 | }, 106 | "execution_count": 3, 107 | "outputs": [ 108 | { 109 | "output_type": "stream", 110 | "name": "stdout", 111 | "text": [ 112 | "['আমি', 'বাংলায়', 'গান', 'গাই', '।']\n" 113 | ] 114 | } 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "source": [ 120 | "bnltk = NLTKTokenizer()\n", 121 | "text = \"আমি ভাত খাই। সে বাজারে যায়। তিনি কি সত্যিই ভালো মানুষ?\"\n", 122 | "word_tokens = bnltk.word_tokenize(text)\n", 123 | "sentence_tokens = bnltk.sentence_tokenize(text)\n", 124 | "print(word_tokens)\n", 125 | "print(sentence_tokens)" 126 | ], 127 | "metadata": { 128 | "colab": { 129 | "base_uri": "https://localhost:8080/" 130 | }, 131 | "id": "xJdTaFBwP91P", 132 | "outputId": "0e378b58-2574-4b9d-caae-98f1d0c4f66e" 133 | }, 134 | "execution_count": 6, 135 | "outputs": [ 136 | { 137 | "output_type": "stream", 138 | "name": "stdout", 139 | "text": [ 140 | "['আমি', 'ভাত', 'খাই', '।', 'সে', 'বাজারে', 'যায়', '।', 'তিনি', 'কি', 'সত্যিই', 'ভালো', 'মানুষ', '?']\n", 141 | "['আমি ভাত খাই।', 'সে বাজারে যায়।', 'তিনি কি সত্যিই ভালো মানুষ?']\n" 142 | ] 143 | } 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "source": [ 149 | "" 150 | ], 151 | "metadata": { 152 | "id": "7Gem5V8QQNDY" 153 | }, 154 | "execution_count": null, 155 | "outputs": [] 156 | } 157 | ] 158 | } -------------------------------------------------------------------------------- /Extraa/DTALE_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "DTALE_example.ipynb", 7 | "provenance": [], 8 | "authorship_tag": "ABX9TyOcHTwn0QntuAEDTOsOhrQO", 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | } 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "id": "view-in-github", 24 | "colab_type": "text" 25 | }, 26 | "source": [ 27 | "\"Open" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "source": [ 33 | "!pip install dtale" 34 | ], 35 | "metadata": { 36 | "colab": { 37 | "base_uri": "https://localhost:8080/", 38 | "height": 1000 39 | }, 40 | "id": "qK04FbguAMnJ", 41 | "outputId": "09b3a512-71f7-4a21-ca98-95868a75e11e" 42 | }, 43 | "execution_count": null, 44 | "outputs": [ 45 | { 46 | "output_type": "stream", 47 | "name": "stdout", 48 | "text": [ 49 | "Collecting dtale\n", 50 | " Downloading dtale-2.1.2-py2.py3-none-any.whl (12.7 MB)\n", 51 | "\u001b[K |████████████████████████████████| 12.7 MB 5.2 MB/s \n", 52 | "\u001b[?25hRequirement already satisfied: plotly>=5.0.0 in /usr/local/lib/python3.7/dist-packages (from dtale) (5.5.0)\n", 53 | "Requirement already satisfied: xarray in /usr/local/lib/python3.7/dist-packages (from dtale) (0.18.2)\n", 54 | "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from dtale) (1.15.0)\n", 55 | "Collecting dash-daq\n", 56 | " Downloading dash_daq-0.5.0.tar.gz (642 kB)\n", 57 | "\u001b[K |████████████████████████████████| 642 kB 56.5 MB/s \n", 58 | "\u001b[?25hCollecting flask-ngrok\n", 59 | " Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)\n", 60 | "Collecting statsmodels==0.12.2\n", 61 | " Downloading statsmodels-0.12.2-cp37-cp37m-manylinux1_x86_64.whl (9.5 MB)\n", 62 | "\u001b[K |████████████████████████████████| 9.5 MB 40.7 MB/s \n", 63 | "\u001b[?25hCollecting kaleido\n", 64 | " Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)\n", 65 | "\u001b[K |████████████████████████████████| 79.9 MB 104 kB/s \n", 66 | "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from dtale) (1.21.5)\n", 67 | "Collecting dash-bootstrap-components\n", 68 | " Downloading dash_bootstrap_components-1.0.3-py3-none-any.whl (209 kB)\n", 69 | "\u001b[K |████████████████████████████████| 209 kB 68.0 MB/s \n", 70 | "\u001b[?25hRequirement already satisfied: itsdangerous in /usr/local/lib/python3.7/dist-packages (from dtale) (1.1.0)\n", 71 | "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from dtale) (1.3.5)\n", 72 | "Requirement already satisfied: future>=0.14.0 in /usr/local/lib/python3.7/dist-packages (from dtale) (0.16.0)\n", 73 | "Collecting squarify\n", 74 | " Downloading squarify-0.4.3-py3-none-any.whl (4.3 kB)\n", 75 | "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from dtale) (3.2.2)\n", 76 | "Collecting dash-colorscales\n", 77 | " Downloading dash_colorscales-0.0.4.tar.gz (62 kB)\n", 78 | "\u001b[K |████████████████████████████████| 62 kB 624 kB/s \n", 79 | "\u001b[?25hCollecting lz4\n", 80 | " Downloading lz4-4.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", 81 | "\u001b[K |████████████████████████████████| 1.2 MB 55.4 MB/s \n", 82 | "\u001b[?25hRequirement already satisfied: et-xmlfile in /usr/local/lib/python3.7/dist-packages (from dtale) (1.1.0)\n", 83 | "Requirement already satisfied: seaborn in /usr/local/lib/python3.7/dist-packages (from dtale) (0.11.2)\n", 84 | "Requirement already satisfied: xlrd in /usr/local/lib/python3.7/dist-packages (from dtale) (1.1.0)\n", 85 | "Collecting dash>=2.0.0\n", 86 | " Downloading dash-2.3.0-py3-none-any.whl (9.6 MB)\n", 87 | "\u001b[K |████████████████████████████████| 9.6 MB 45.4 MB/s \n", 88 | "\u001b[?25hCollecting packaging<=21.0\n", 89 | " Downloading packaging-21.0-py3-none-any.whl (40 kB)\n", 90 | "\u001b[K |████████████████████████████████| 40 kB 5.2 MB/s \n", 91 | "\u001b[?25hCollecting strsimpy\n", 92 | " Downloading strsimpy-0.2.1-py3-none-any.whl (45 kB)\n", 93 | "\u001b[K |████████████████████████████████| 45 kB 2.8 MB/s \n", 94 | "\u001b[?25hRequirement already satisfied: Flask in /usr/local/lib/python3.7/dist-packages (from dtale) (1.1.4)\n", 95 | "Collecting Flask-Compress\n", 96 | " Downloading Flask_Compress-1.11-py3-none-any.whl (7.9 kB)\n", 97 | "Requirement already satisfied: networkx in /usr/local/lib/python3.7/dist-packages (from dtale) (2.6.3)\n", 98 | "Collecting scikit-learn==0.24.2\n", 99 | " Downloading scikit_learn-0.24.2-cp37-cp37m-manylinux2010_x86_64.whl (22.3 MB)\n", 100 | "\u001b[K |████████████████████████████████| 22.3 MB 10.0 MB/s \n", 101 | "\u001b[?25hCollecting ppscore\n", 102 | " Downloading ppscore-1.2.0.tar.gz (47 kB)\n", 103 | "\u001b[K |████████████████████████████████| 47 kB 4.0 MB/s \n", 104 | "\u001b[?25hCollecting missingno<=0.4.2\n", 105 | " Downloading missingno-0.4.2-py3-none-any.whl (9.7 kB)\n", 106 | "Collecting scipy==1.7.3\n", 107 | " Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)\n", 108 | "\u001b[K |████████████████████████████████| 38.1 MB 1.2 MB/s \n", 109 | "\u001b[?25hRequirement already satisfied: openpyxl in /usr/local/lib/python3.7/dist-packages (from dtale) (3.0.9)\n", 110 | "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from dtale) (2.23.0)\n", 111 | "Requirement already satisfied: cycler in /usr/local/lib/python3.7/dist-packages (from dtale) (0.11.0)\n", 112 | "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn==0.24.2->dtale) (3.1.0)\n", 113 | "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn==0.24.2->dtale) (1.1.0)\n", 114 | "Requirement already satisfied: patsy>=0.5 in /usr/local/lib/python3.7/dist-packages (from statsmodels==0.12.2->dtale) (0.5.2)\n", 115 | "Collecting dash-table==5.0.0\n", 116 | " Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)\n", 117 | "Collecting dash-html-components==2.0.0\n", 118 | " Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)\n", 119 | "Collecting dash-core-components==2.0.0\n", 120 | " Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)\n", 121 | "Requirement already satisfied: click<8.0,>=5.1 in /usr/local/lib/python3.7/dist-packages (from Flask->dtale) (7.1.2)\n", 122 | "Requirement already satisfied: Jinja2<3.0,>=2.10.1 in /usr/local/lib/python3.7/dist-packages (from Flask->dtale) (2.11.3)\n", 123 | "Requirement already satisfied: Werkzeug<2.0,>=0.15 in /usr/local/lib/python3.7/dist-packages (from Flask->dtale) (1.0.1)\n", 124 | "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from Jinja2<3.0,>=2.10.1->Flask->dtale) (2.0.1)\n", 125 | "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging<=21.0->dtale) (3.0.7)\n", 126 | "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->dtale) (2.8.2)\n", 127 | "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->dtale) (2018.9)\n", 128 | "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.7/dist-packages (from plotly>=5.0.0->dtale) (8.0.1)\n", 129 | "Collecting brotli\n", 130 | " Downloading Brotli-1.0.9-cp37-cp37m-manylinux1_x86_64.whl (357 kB)\n", 131 | "\u001b[K |████████████████████████████████| 357 kB 49.6 MB/s \n", 132 | "\u001b[?25hRequirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->dtale) (1.4.0)\n", 133 | "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib->dtale) (3.10.0.2)\n", 134 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->dtale) (1.24.3)\n", 135 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->dtale) (3.0.4)\n", 136 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->dtale) (2021.10.8)\n", 137 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->dtale) (2.10)\n", 138 | "Requirement already satisfied: setuptools>=40.4 in /usr/local/lib/python3.7/dist-packages (from xarray->dtale) (57.4.0)\n", 139 | "Building wheels for collected packages: dash-colorscales, dash-daq, ppscore\n", 140 | " Building wheel for dash-colorscales (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 141 | " Created wheel for dash-colorscales: filename=dash_colorscales-0.0.4-py3-none-any.whl size=62590 sha256=138f7071fde1bb0d116d92a3e716fca108fbb19a9d780c633c3ae5f11a9de2f5\n", 142 | " Stored in directory: /root/.cache/pip/wheels/2f/22/7e/183ba2af565e3eb955021fbb4fe8fe4a6b1ed8ae3e5c03236a\n", 143 | " Building wheel for dash-daq (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 144 | " Created wheel for dash-daq: filename=dash_daq-0.5.0-py3-none-any.whl size=669714 sha256=20575d7775d6da812a2fc613531265b06f84d0e535ec0b2d2496855643eb39de\n", 145 | " Stored in directory: /root/.cache/pip/wheels/fe/54/53/a8d448db5592874db4313240571ca2c069e55f6a6b29bf5847\n", 146 | " Building wheel for ppscore (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 147 | " Created wheel for ppscore: filename=ppscore-1.2.0-py2.py3-none-any.whl size=13068 sha256=5f43e5fa39cf000985b7b90e089d41119a7a70b3d7237d81bc1f3c4cc1b813b7\n", 148 | " Stored in directory: /root/.cache/pip/wheels/d2/3c/58/2ff786414b21713edc6f4fdb54fdee89ac37bca5edd1f60634\n", 149 | "Successfully built dash-colorscales dash-daq ppscore\n", 150 | "Installing collected packages: brotli, scipy, Flask-Compress, dash-table, dash-html-components, dash-core-components, scikit-learn, dash, strsimpy, statsmodels, squarify, ppscore, packaging, missingno, lz4, kaleido, flask-ngrok, dash-daq, dash-colorscales, dash-bootstrap-components, dtale\n", 151 | " Attempting uninstall: scipy\n", 152 | " Found existing installation: scipy 1.4.1\n", 153 | " Uninstalling scipy-1.4.1:\n", 154 | " Successfully uninstalled scipy-1.4.1\n", 155 | " Attempting uninstall: scikit-learn\n", 156 | " Found existing installation: scikit-learn 1.0.2\n", 157 | " Uninstalling scikit-learn-1.0.2:\n", 158 | " Successfully uninstalled scikit-learn-1.0.2\n", 159 | " Attempting uninstall: statsmodels\n", 160 | " Found existing installation: statsmodels 0.10.2\n", 161 | " Uninstalling statsmodels-0.10.2:\n", 162 | " Successfully uninstalled statsmodels-0.10.2\n", 163 | " Attempting uninstall: packaging\n", 164 | " Found existing installation: packaging 21.3\n", 165 | " Uninstalling packaging-21.3:\n", 166 | " Successfully uninstalled packaging-21.3\n", 167 | " Attempting uninstall: missingno\n", 168 | " Found existing installation: missingno 0.5.1\n", 169 | " Uninstalling missingno-0.5.1:\n", 170 | " Successfully uninstalled missingno-0.5.1\n", 171 | "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", 172 | "yellowbrick 1.4 requires scikit-learn>=1.0.0, but you have scikit-learn 0.24.2 which is incompatible.\n", 173 | "datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.\n", 174 | "albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.\u001b[0m\n", 175 | "Successfully installed Flask-Compress-1.11 brotli-1.0.9 dash-2.3.0 dash-bootstrap-components-1.0.3 dash-colorscales-0.0.4 dash-core-components-2.0.0 dash-daq-0.5.0 dash-html-components-2.0.0 dash-table-5.0.0 dtale-2.1.2 flask-ngrok-0.0.25 kaleido-0.2.1 lz4-4.0.0 missingno-0.4.2 packaging-21.0 ppscore-1.2.0 scikit-learn-0.24.2 scipy-1.7.3 squarify-0.4.3 statsmodels-0.12.2 strsimpy-0.2.1\n" 176 | ] 177 | }, 178 | { 179 | "output_type": "display_data", 180 | "data": { 181 | "application/vnd.colab-display-data+json": { 182 | "pip_warning": { 183 | "packages": [ 184 | "packaging" 185 | ] 186 | } 187 | } 188 | }, 189 | "metadata": {} 190 | } 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": { 197 | "colab": { 198 | "base_uri": "https://localhost:8080/", 199 | "height": 34 200 | }, 201 | "id": "xP0HVp5V_1b9", 202 | "outputId": "47a9c89c-c081-4434-c912-1cf8f02853b0" 203 | }, 204 | "outputs": [ 205 | { 206 | "output_type": "execute_result", 207 | "data": { 208 | "text/plain": [ 209 | "https://17knnfjgmkx-496ff2e9c6d22116-40001-colab.googleusercontent.com/dtale/main/11" 210 | ] 211 | }, 212 | "metadata": {}, 213 | "execution_count": 15 214 | } 215 | ], 216 | "source": [ 217 | "import pandas as pd\n", 218 | "\n", 219 | "import dtale\n", 220 | "import dtale.app as dtale_app\n", 221 | "\n", 222 | "dtale_app.USE_NGROK = False\n", 223 | "dtale_app.USE_COLAB = True\n", 224 | "\n", 225 | "dtale.show(pd.DataFrame([1,2,3]))" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "source": [ 231 | "x = dtale.show(pd.read_csv('/content/sample_data/california_housing_train.csv'))" 232 | ], 233 | "metadata": { 234 | "id": "WFpUxwJtAr_Z" 235 | }, 236 | "execution_count": null, 237 | "outputs": [] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "source": [ 242 | "x" 243 | ], 244 | "metadata": { 245 | "colab": { 246 | "base_uri": "https://localhost:8080/" 247 | }, 248 | "id": "qfVmH3XEBH_n", 249 | "outputId": "f432f09f-4399-43eb-fa15-2de345e268fa" 250 | }, 251 | "execution_count": null, 252 | "outputs": [ 253 | { 254 | "output_type": "execute_result", 255 | "data": { 256 | "text/plain": [ 257 | "https://17knnfjgmkx-496ff2e9c6d22116-40001-colab.googleusercontent.com/dtale/main/12" 258 | ] 259 | }, 260 | "metadata": {}, 261 | "execution_count": 17 262 | } 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "source": [ 268 | "df = pd.DataFrame(dict(\n", 269 | " a=[1,2,3,4,5],\n", 270 | " b=[6,7,8,9,10],\n", 271 | " c=['a','b','c','d','e']\n", 272 | "))\n", 273 | "dtale.show(\n", 274 | " df,\n", 275 | " locked=['c'],\n", 276 | " column_formats={'a': {'fmt': '0.0000'}},\n", 277 | " nan_display='...',\n", 278 | " background_mode='heatmap-col',\n", 279 | " sort=[('a','DESC')],\n", 280 | " vertical_headers=True,\n", 281 | ")" 282 | ], 283 | "metadata": { 284 | "colab": { 285 | "base_uri": "https://localhost:8080/", 286 | "height": 52 287 | }, 288 | "id": "Pkg-tyUZBw1X", 289 | "outputId": "3e0c15fa-ea11-4060-e86d-c0eea83fad32" 290 | }, 291 | "execution_count": null, 292 | "outputs": [ 293 | { 294 | "output_type": "stream", 295 | "name": "stderr", 296 | "text": [ 297 | "Unhandled exception in thread started by ._start at 0x7f02d3605cb0>\n" 298 | ] 299 | }, 300 | { 301 | "output_type": "execute_result", 302 | "data": { 303 | "text/plain": [ 304 | "https://17knnfjgmkx-496ff2e9c6d22116-40001-colab.googleusercontent.com/dtale/main/13" 305 | ] 306 | }, 307 | "metadata": {}, 308 | "execution_count": 18 309 | } 310 | ] 311 | } 312 | ] 313 | } -------------------------------------------------------------------------------- /Extraa/Data_Preprocessing_Tools.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Data_Preprocessing_Tools.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "id": "view-in-github", 21 | "colab_type": "text" 22 | }, 23 | "source": [ 24 | "\"Open" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": { 30 | "id": "37puETfgRzzg" 31 | }, 32 | "source": [ 33 | "# Data Preprocessing Tools" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "source": [ 39 | "## Google Drive Mount" 40 | ], 41 | "metadata": { 42 | "id": "-3LK3IUeaxwE" 43 | } 44 | }, 45 | { 46 | "cell_type": "code", 47 | "metadata": { 48 | "colab": { 49 | "base_uri": "https://localhost:8080/" 50 | }, 51 | "id": "uoybHr_uELhL", 52 | "outputId": "b57fcc7a-d549-41b2-a5a5-d1de861ea496" 53 | }, 54 | "source": [ 55 | "from google.colab import drive\n", 56 | "drive.mount('/content/drive')" 57 | ], 58 | "execution_count": null, 59 | "outputs": [ 60 | { 61 | "output_type": "stream", 62 | "name": "stdout", 63 | "text": [ 64 | "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" 65 | ] 66 | } 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": { 72 | "id": "EoRP98MpR-qj" 73 | }, 74 | "source": [ 75 | "## Importing the libraries" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "metadata": { 81 | "id": "N-qiINBQSK2g" 82 | }, 83 | "source": [ 84 | "import numpy as np\n", 85 | "import matplotlib.pyplot as plt\n", 86 | "import pandas as pd" 87 | ], 88 | "execution_count": null, 89 | "outputs": [] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": { 94 | "id": "RopL7tUZSQkT" 95 | }, 96 | "source": [ 97 | "## Importing the dataset" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "metadata": { 103 | "id": "WwEPNDWySTKm" 104 | }, 105 | "source": [ 106 | "dataset = pd.read_csv('/content/drive/MyDrive/Datasets/Social_Network_Ads.csv')\n", 107 | "X = dataset.iloc[:, :-1].values\n", 108 | "y = dataset.iloc[:, -1].values" 109 | ], 110 | "execution_count": null, 111 | "outputs": [] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "metadata": { 116 | "id": "hCsz2yCebe1R", 117 | "colab": { 118 | "base_uri": "https://localhost:8080/" 119 | }, 120 | "outputId": "20d06c5d-39a7-439a-c893-b9b2f0dfb238" 121 | }, 122 | "source": [ 123 | "print(X)" 124 | ], 125 | "execution_count": null, 126 | "outputs": [ 127 | { 128 | "output_type": "stream", 129 | "name": "stdout", 130 | "text": [ 131 | "[[15624510 'Male' 19 19000]\n", 132 | " [15810944 'Male' 35 20000]\n", 133 | " [15668575 'Female' 26 43000]\n", 134 | " ...\n", 135 | " [15654296 'Female' 50 20000]\n", 136 | " [15755018 'Male' 36 33000]\n", 137 | " [15594041 'Female' 49 36000]]\n" 138 | ] 139 | } 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "metadata": { 145 | "id": "eYrOQ43XcJR3", 146 | "colab": { 147 | "base_uri": "https://localhost:8080/" 148 | }, 149 | "outputId": "957997f6-31ae-401d-8f4f-0162fd6438e5" 150 | }, 151 | "source": [ 152 | "print(y)" 153 | ], 154 | "execution_count": null, 155 | "outputs": [ 156 | { 157 | "output_type": "stream", 158 | "name": "stdout", 159 | "text": [ 160 | "[0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 0 0 0 0 0\n", 161 | " 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0\n", 162 | " 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0\n", 163 | " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0\n", 164 | " 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0\n", 165 | " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 1 1 0 0 0 1 0 0 0 1 0 1\n", 166 | " 1 1 0 0 1 1 0 1 1 0 1 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 0 1 1 0 1 0 0 1\n", 167 | " 1 0 1 1 0 1 1 0 0 1 0 0 1 1 1 1 1 0 1 1 1 1 0 1 1 0 1 0 1 0 1 1 1 1 0 0 0\n", 168 | " 1 1 0 1 1 1 1 1 0 0 0 1 1 0 0 1 0 1 0 1 1 0 1 0 1 1 0 1 1 0 0 0 1 1 0 1 0\n", 169 | " 0 1 0 1 0 0 1 1 0 0 1 1 0 1 1 0 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0 1 1 1 1 0 1\n", 170 | " 1 1 0 1 0 1 0 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 0 1]\n" 171 | ] 172 | } 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": { 178 | "id": "nhfKXNxlSabC" 179 | }, 180 | "source": [ 181 | "## Taking care of missing data" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "metadata": { 187 | "id": "c93k7ipkSexq" 188 | }, 189 | "source": [ 190 | "from sklearn.impute import SimpleImputer\n", 191 | "imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')\n", 192 | "imputer.fit(X[:, 1:3])\n", 193 | "X[:, 1:3] = imputer.transform(X[:, 1:3])" 194 | ], 195 | "execution_count": null, 196 | "outputs": [] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "metadata": { 201 | "id": "3UgLdMS_bjq_", 202 | "colab": { 203 | "base_uri": "https://localhost:8080/" 204 | }, 205 | "outputId": "dd615b9c-99fe-4604-a8c4-82a8e23a1a77" 206 | }, 207 | "source": [ 208 | "print(X)" 209 | ], 210 | "execution_count": null, 211 | "outputs": [ 212 | { 213 | "output_type": "stream", 214 | "name": "stdout", 215 | "text": [ 216 | "[[15624510 'Male' 19 19000]\n", 217 | " [15810944 'Male' 35 20000]\n", 218 | " [15668575 'Female' 26 43000]\n", 219 | " ...\n", 220 | " [15654296 'Female' 50 20000]\n", 221 | " [15755018 'Male' 36 33000]\n", 222 | " [15594041 'Female' 49 36000]]\n" 223 | ] 224 | } 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": { 230 | "id": "CriG6VzVSjcK" 231 | }, 232 | "source": [ 233 | "## Encoding categorical data" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": { 239 | "id": "AhSpdQWeSsFh" 240 | }, 241 | "source": [ 242 | "### Encoding the Independent Variable" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "metadata": { 248 | "id": "5hwuVddlSwVi" 249 | }, 250 | "source": [ 251 | "from sklearn.compose import ColumnTransformer\n", 252 | "from sklearn.preprocessing import OneHotEncoder\n", 253 | "ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')\n", 254 | "X = np.array(ct.fit_transform(X))" 255 | ], 256 | "execution_count": null, 257 | "outputs": [] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "metadata": { 262 | "id": "f7QspewyeBfx", 263 | "colab": { 264 | "base_uri": "https://localhost:8080/" 265 | }, 266 | "outputId": "3213532c-7c86-4efc-8b72-209aabf87319" 267 | }, 268 | "source": [ 269 | "print(X)" 270 | ], 271 | "execution_count": null, 272 | "outputs": [ 273 | { 274 | "output_type": "stream", 275 | "name": "stdout", 276 | "text": [ 277 | "[[0.0 1.0 15624510 19 19000]\n", 278 | " [0.0 1.0 15810944 35 20000]\n", 279 | " [1.0 0.0 15668575 26 43000]\n", 280 | " ...\n", 281 | " [1.0 0.0 15654296 50 20000]\n", 282 | " [0.0 1.0 15755018 36 33000]\n", 283 | " [1.0 0.0 15594041 49 36000]]\n" 284 | ] 285 | } 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": { 291 | "id": "DXh8oVSITIc6" 292 | }, 293 | "source": [ 294 | "### Encoding the Dependent Variable" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "metadata": { 300 | "id": "XgHCShVyTOYY" 301 | }, 302 | "source": [ 303 | "from sklearn.preprocessing import LabelEncoder\n", 304 | "le = LabelEncoder()\n", 305 | "y = le.fit_transform(y)" 306 | ], 307 | "execution_count": null, 308 | "outputs": [] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "metadata": { 313 | "id": "FyhY8-gPpFCa", 314 | "colab": { 315 | "base_uri": "https://localhost:8080/" 316 | }, 317 | "outputId": "523b1918-f089-417f-a8b0-d039d28c49b9" 318 | }, 319 | "source": [ 320 | "print(y)" 321 | ], 322 | "execution_count": null, 323 | "outputs": [ 324 | { 325 | "output_type": "stream", 326 | "name": "stdout", 327 | "text": [ 328 | "[0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 0 0 0 0 0\n", 329 | " 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0\n", 330 | " 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0\n", 331 | " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0\n", 332 | " 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0\n", 333 | " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 1 1 0 0 0 1 0 0 0 1 0 1\n", 334 | " 1 1 0 0 1 1 0 1 1 0 1 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 0 1 1 0 1 0 0 1\n", 335 | " 1 0 1 1 0 1 1 0 0 1 0 0 1 1 1 1 1 0 1 1 1 1 0 1 1 0 1 0 1 0 1 1 1 1 0 0 0\n", 336 | " 1 1 0 1 1 1 1 1 0 0 0 1 1 0 0 1 0 1 0 1 1 0 1 0 1 1 0 1 1 0 0 0 1 1 0 1 0\n", 337 | " 0 1 0 1 0 0 1 1 0 0 1 1 0 1 1 0 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0 1 1 1 1 0 1\n", 338 | " 1 1 0 1 0 1 0 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 0 1]\n" 339 | ] 340 | } 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": { 346 | "id": "qb_vcgm3qZKW" 347 | }, 348 | "source": [ 349 | "## Splitting the dataset into the Training set and Test set" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "metadata": { 355 | "id": "pXgA6CzlqbCl" 356 | }, 357 | "source": [ 358 | "from sklearn.model_selection import train_test_split\n", 359 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)" 360 | ], 361 | "execution_count": null, 362 | "outputs": [] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "metadata": { 367 | "id": "GuwQhFdKrYTM", 368 | "colab": { 369 | "base_uri": "https://localhost:8080/" 370 | }, 371 | "outputId": "c63d27ce-7e75-4a14-ec3e-cac8f2690974" 372 | }, 373 | "source": [ 374 | "print(X_train)" 375 | ], 376 | "execution_count": null, 377 | "outputs": [ 378 | { 379 | "output_type": "stream", 380 | "name": "stdout", 381 | "text": [ 382 | "[[1.0 0.0 15699284 29 28000]\n", 383 | " [1.0 0.0 15599081 45 22000]\n", 384 | " [0.0 1.0 15747043 46 117000]\n", 385 | " ...\n", 386 | " [0.0 1.0 15706071 51 23000]\n", 387 | " [0.0 1.0 15646227 46 79000]\n", 388 | " [0.0 1.0 15689425 30 49000]]\n" 389 | ] 390 | } 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "metadata": { 396 | "id": "TUrX_Tvcrbi4", 397 | "colab": { 398 | "base_uri": "https://localhost:8080/" 399 | }, 400 | "outputId": "37341b3d-a400-4b87-e508-93a3cca81829" 401 | }, 402 | "source": [ 403 | "print(X_test)" 404 | ], 405 | "execution_count": null, 406 | "outputs": [ 407 | { 408 | "output_type": "stream", 409 | "name": "stdout", 410 | "text": [ 411 | "[[0.0 1.0 15755018 36 33000]\n", 412 | " [1.0 0.0 15697020 39 61000]\n", 413 | " [0.0 1.0 15796351 36 118000]\n", 414 | " [0.0 1.0 15665760 39 122000]\n", 415 | " [1.0 0.0 15794661 26 118000]\n", 416 | " [1.0 0.0 15717560 38 65000]\n", 417 | " [1.0 0.0 15680243 20 36000]\n", 418 | " [0.0 1.0 15596522 49 89000]\n", 419 | " [0.0 1.0 15669656 31 18000]\n", 420 | " [0.0 1.0 15638646 48 141000]\n", 421 | " [1.0 0.0 15644296 34 72000]\n", 422 | " [1.0 0.0 15629885 39 73000]\n", 423 | " [0.0 1.0 15674206 35 72000]\n", 424 | " [1.0 0.0 15575247 48 131000]\n", 425 | " [1.0 0.0 15611191 53 82000]\n", 426 | " [0.0 1.0 15685346 56 133000]\n", 427 | " [0.0 1.0 15774744 60 83000]\n", 428 | " [0.0 1.0 15728773 27 58000]\n", 429 | " [1.0 0.0 15667265 28 87000]\n", 430 | " [0.0 1.0 15593715 60 102000]\n", 431 | " [1.0 0.0 15724423 40 75000]\n", 432 | " [1.0 0.0 15780572 50 88000]\n", 433 | " [1.0 0.0 15715622 44 139000]\n", 434 | " [0.0 1.0 15622478 47 43000]\n", 435 | " [0.0 1.0 15617482 45 26000]\n", 436 | " [0.0 1.0 15809823 26 15000]\n", 437 | " [1.0 0.0 15574372 58 47000]\n", 438 | " [0.0 1.0 15708196 49 74000]\n", 439 | " [1.0 0.0 15778830 53 34000]\n", 440 | " [1.0 0.0 15794566 52 114000]\n", 441 | " [0.0 1.0 15668385 39 42000]\n", 442 | " [0.0 1.0 15804002 19 76000]\n", 443 | " [1.0 0.0 15578738 18 86000]\n", 444 | " [0.0 1.0 15727467 57 74000]\n", 445 | " [1.0 0.0 15598044 27 84000]\n", 446 | " [0.0 1.0 15595917 30 80000]\n", 447 | " [0.0 1.0 15642885 22 18000]\n", 448 | " [1.0 0.0 15584545 32 86000]\n", 449 | " [1.0 0.0 15654296 50 20000]\n", 450 | " [0.0 1.0 15741094 19 25000]\n", 451 | " [1.0 0.0 15746203 47 144000]\n", 452 | " [1.0 0.0 15660866 58 101000]\n", 453 | " [0.0 1.0 15570932 34 115000]\n", 454 | " [1.0 0.0 15595135 23 66000]\n", 455 | " [0.0 1.0 15775335 56 60000]\n", 456 | " [1.0 0.0 15663939 31 118000]\n", 457 | " [1.0 0.0 15668521 48 35000]\n", 458 | " [1.0 0.0 15733973 47 113000]\n", 459 | " [1.0 0.0 15747097 39 79000]\n", 460 | " [1.0 0.0 15613014 52 38000]\n", 461 | " [0.0 1.0 15718071 24 58000]\n", 462 | " [0.0 1.0 15622171 37 53000]\n", 463 | " [1.0 0.0 15721007 42 80000]\n", 464 | " [0.0 1.0 15704583 46 28000]\n", 465 | " [0.0 1.0 15673539 42 73000]\n", 466 | " [1.0 0.0 15631070 37 62000]\n", 467 | " [0.0 1.0 15791373 60 42000]\n", 468 | " [0.0 1.0 15591433 36 52000]\n", 469 | " [1.0 0.0 15569641 58 95000]\n", 470 | " [0.0 1.0 15577514 43 129000]\n", 471 | " [0.0 1.0 15789863 27 89000]\n", 472 | " [1.0 0.0 15654574 23 82000]\n", 473 | " [1.0 0.0 15619407 38 112000]\n", 474 | " [1.0 0.0 15638003 35 50000]\n", 475 | " [0.0 1.0 15699619 36 99000]\n", 476 | " [0.0 1.0 15694879 37 144000]\n", 477 | " [1.0 0.0 15706185 26 35000]\n", 478 | " [1.0 0.0 15717893 42 70000]\n", 479 | " [1.0 0.0 15680752 43 133000]\n", 480 | " [1.0 0.0 15733964 38 50000]\n", 481 | " [1.0 0.0 15666675 46 96000]\n", 482 | " [1.0 0.0 15709441 35 44000]\n", 483 | " [1.0 0.0 15800515 38 113000]\n", 484 | " [0.0 1.0 15627220 39 71000]\n", 485 | " [1.0 0.0 15606274 26 52000]\n", 486 | " [1.0 0.0 15768151 54 108000]\n", 487 | " [1.0 0.0 15591915 33 51000]\n", 488 | " [0.0 1.0 15685576 26 16000]\n", 489 | " [0.0 1.0 15725660 30 87000]\n", 490 | " [1.0 0.0 15575002 35 60000]]\n" 491 | ] 492 | } 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "metadata": { 498 | "id": "pSMHiIsWreQY", 499 | "colab": { 500 | "base_uri": "https://localhost:8080/" 501 | }, 502 | "outputId": "aa2bcb2e-89f0-435e-98ff-e998e94f372e" 503 | }, 504 | "source": [ 505 | "print(y_train)" 506 | ], 507 | "execution_count": null, 508 | "outputs": [ 509 | { 510 | "output_type": "stream", 511 | "name": "stdout", 512 | "text": [ 513 | "[0 1 1 0 0 0 1 0 1 0 1 1 1 0 0 1 1 1 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 1 0 0\n", 514 | " 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0\n", 515 | " 1 1 0 0 0 1 0 1 1 0 0 0 1 1 0 0 1 1 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1\n", 516 | " 0 1 0 0 0 0 1 0 0 1 0 0 1 1 0 1 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 0 1 0 0 0 1\n", 517 | " 0 1 1 0 1 1 0 0 0 0 1 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0 0 1 1 0 0 0 0 1\n", 518 | " 0 1 0 1 0 0 1 0 1 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 1\n", 519 | " 0 1 0 0 0 0 0 1 0 0 1 1 1 0 0 0 1 0 0 1 1 0 1 0 1 0 0 0 0 0 1 0 1 0 0 1 1\n", 520 | " 0 0 1 1 1 0 0 0 1 0 1 1 1 0 0 1 0 0 0 1 0 0 0 1 0 1 0 1 0 1 1 0 0 1 0 0 1\n", 521 | " 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0]\n" 522 | ] 523 | } 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "metadata": { 529 | "id": "I_tW7H56rgtW", 530 | "colab": { 531 | "base_uri": "https://localhost:8080/" 532 | }, 533 | "outputId": "1563dcb5-ce73-48b6-dac6-cc133e08c4a9" 534 | }, 535 | "source": [ 536 | "print(y_test)" 537 | ], 538 | "execution_count": null, 539 | "outputs": [ 540 | { 541 | "output_type": "stream", 542 | "name": "stdout", 543 | "text": [ 544 | "[0 0 1 1 0 0 0 1 0 0 0 0 0 1 1 1 1 0 0 1 0 1 1 0 1 0 1 0 1 0 0 0 0 1 0 0 0\n", 545 | " 0 1 0 1 1 0 0 1 1 1 1 0 1 0 0 1 1 1 0 1 0 1 1 0 0 0 0 1 1 0 0 0 0 0 0 1 0\n", 546 | " 0 1 0 0 0 0]\n" 547 | ] 548 | } 549 | ] 550 | }, 551 | { 552 | "cell_type": "markdown", 553 | "metadata": { 554 | "id": "TpGqbS4TqkIR" 555 | }, 556 | "source": [ 557 | "## Feature Scaling" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "metadata": { 563 | "id": "AxjSUXFQqo-3" 564 | }, 565 | "source": [ 566 | "from sklearn.preprocessing import StandardScaler\n", 567 | "sc = StandardScaler()\n", 568 | "X_train[:, 3:] = sc.fit_transform(X_train[:, 3:])\n", 569 | "X_test[:, 3:] = sc.transform(X_test[:, 3:])" 570 | ], 571 | "execution_count": null, 572 | "outputs": [] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "metadata": { 577 | "id": "DWPET8ZdlMnu", 578 | "colab": { 579 | "base_uri": "https://localhost:8080/" 580 | }, 581 | "outputId": "398f2a6d-484f-4d13-ca00-985f79447b1f" 582 | }, 583 | "source": [ 584 | "print(X_train)" 585 | ], 586 | "execution_count": null, 587 | "outputs": [ 588 | { 589 | "output_type": "stream", 590 | "name": "stdout", 591 | "text": [ 592 | "[[1.0 0.0 15699284 -0.8033008104771288 -1.1912179543545012]\n", 593 | " [1.0 0.0 15599081 0.7569799746681295 -1.368598012704171]\n", 594 | " [0.0 1.0 15747043 0.8544975237397081 1.4399195778322673]\n", 595 | " ...\n", 596 | " [0.0 1.0 15706071 1.3420852690976013 -1.3390346696458928]\n", 597 | " [0.0 1.0 15646227 0.8544975237397081 0.31651254161769204]\n", 598 | " [0.0 1.0 15689425 -0.7057832614055501 -0.5703877501306569]]\n" 599 | ] 600 | } 601 | ] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "metadata": { 606 | "id": "sTXykB_QlRjE", 607 | "colab": { 608 | "base_uri": "https://localhost:8080/" 609 | }, 610 | "outputId": "ee300b2b-1510-4ac6-b966-91a389cedc2b" 611 | }, 612 | "source": [ 613 | "print(X_test)" 614 | ], 615 | "execution_count": null, 616 | "outputs": [ 617 | { 618 | "output_type": "stream", 619 | "name": "stdout", 620 | "text": [ 621 | "[[0.0 1.0 15755018 -0.12067796697607829 -1.0434012390631098]\n", 622 | " [1.0 0.0 15697020 0.17187468023865762 -0.21562763343131733]\n", 623 | " [0.0 1.0 15796351 -0.12067796697607829 1.4694829208905458]\n", 624 | " [0.0 1.0 15665760 0.17187468023865762 1.587736293123659]\n", 625 | " [1.0 0.0 15794661 -1.0958534576918646 1.4694829208905458]\n", 626 | " [1.0 0.0 15717560 0.074357131167079 -0.09737426119820415]\n", 627 | " [1.0 0.0 15680243 -1.6809587521213365 -0.9547112098882748]\n", 628 | " [0.0 1.0 15596522 1.147050170954444 0.6121459722004751]\n", 629 | " [0.0 1.0 15669656 -0.6082657123339715 -1.4868513849372842]\n", 630 | " [0.0 1.0 15638646 1.0495326218828653 2.1494398112309465]\n", 631 | " [1.0 0.0 15644296 -0.3157130651192356 0.10956914020974394]\n", 632 | " [1.0 0.0 15629885 0.17187468023865762 0.13913248326802224]\n", 633 | " [0.0 1.0 15674206 -0.21819551604765694 0.10956914020974394]\n", 634 | " [1.0 0.0 15575247 1.0495326218828653 1.8538063806481635]\n", 635 | " [1.0 0.0 15611191 1.5371203672407585 0.4052025707925269]\n", 636 | " [0.0 1.0 15685346 1.8296730144554945 1.9129330667647202]\n", 637 | " [0.0 1.0 15774744 2.219743210741809 0.4347659138508052]\n", 638 | " [0.0 1.0 15728773 -0.9983359086202861 -0.30431766260615223]\n", 639 | " [1.0 0.0 15667265 -0.9008183595487074 0.5530192860839184]\n", 640 | " [0.0 1.0 15593715 2.219743210741809 0.9964694319580929]\n", 641 | " [1.0 0.0 15724423 0.26939222931023626 0.19825916938457885]\n", 642 | " [1.0 0.0 15780572 1.2445677200260228 0.5825826291421967]\n", 643 | " [1.0 0.0 15715622 0.6594624255965509 2.09031312511439]\n", 644 | " [0.0 1.0 15622478 0.9520150728112867 -0.7477678084803268]\n", 645 | " [0.0 1.0 15617482 0.7569799746681295 -1.2503446404710579]\n", 646 | " [0.0 1.0 15809823 -1.0958534576918646 -1.5755414141121191]\n", 647 | " [1.0 0.0 15574372 2.0247081125986517 -0.6295144362472135]\n", 648 | " [0.0 1.0 15708196 1.147050170954444 0.16869582632630054]\n", 649 | " [1.0 0.0 15778830 1.5371203672407585 -1.0138378960048313]\n", 650 | " [1.0 0.0 15794566 1.43960281816918 1.3512295486574324]\n", 651 | " [0.0 1.0 15668385 0.17187468023865762 -0.777331151538605]\n", 652 | " [0.0 1.0 15804002 -1.7784763011929152 0.22782251244285714]\n", 653 | " [1.0 0.0 15578738 -1.875993850264494 0.5234559430256401]\n", 654 | " [0.0 1.0 15727467 1.9271905635270732 0.16869582632630054]\n", 655 | " [1.0 0.0 15598044 -0.9983359086202861 0.46432925690908355]\n", 656 | " [0.0 1.0 15595917 -0.7057832614055501 0.34607588467597034]\n", 657 | " [0.0 1.0 15642885 -1.4859236539781793 -1.4868513849372842]\n", 658 | " [1.0 0.0 15584545 -0.5107481632623928 0.5234559430256401]\n", 659 | " [1.0 0.0 15654296 1.2445677200260228 -1.4277246988207275]\n", 660 | " [0.0 1.0 15741094 -1.7784763011929152 -1.279907983529336]\n", 661 | " [1.0 0.0 15746203 0.9520150728112867 2.2381298404057817]\n", 662 | " [1.0 0.0 15660866 2.0247081125986517 0.9669060888998146]\n", 663 | " [0.0 1.0 15570932 -0.3157130651192356 1.3807928917157108]\n", 664 | " [1.0 0.0 15595135 -1.3884061049066005 -0.06781091813992585]\n", 665 | " [0.0 1.0 15775335 1.8296730144554945 -0.24519097648959565]\n", 666 | " [1.0 0.0 15663939 -0.6082657123339715 1.4694829208905458]\n", 667 | " [1.0 0.0 15668521 1.0495326218828653 -0.9842745529465531]\n", 668 | " [1.0 0.0 15733973 0.9520150728112867 1.3216662055991542]\n", 669 | " [1.0 0.0 15747097 0.17187468023865762 0.31651254161769204]\n", 670 | " [1.0 0.0 15613014 1.43960281816918 -0.8955845237717182]\n", 671 | " [0.0 1.0 15718071 -1.290888555835022 -0.30431766260615223]\n", 672 | " [0.0 1.0 15622171 -0.02316041790449965 -0.45213437789754374]\n", 673 | " [1.0 0.0 15721007 0.46442732745339355 0.34607588467597034]\n", 674 | " [0.0 1.0 15704583 0.8544975237397081 -1.1912179543545012]\n", 675 | " [0.0 1.0 15673539 0.46442732745339355 0.13913248326802224]\n", 676 | " [1.0 0.0 15631070 -0.02316041790449965 -0.18606429037303904]\n", 677 | " [0.0 1.0 15791373 2.219743210741809 -0.777331151538605]\n", 678 | " [0.0 1.0 15591433 -0.12067796697607829 -0.48169772095582203]\n", 679 | " [1.0 0.0 15569641 2.0247081125986517 0.7895260305501448]\n", 680 | " [0.0 1.0 15577514 0.5619448765249722 1.794679694531607]\n", 681 | " [0.0 1.0 15789863 -0.9983359086202861 0.6121459722004751]\n", 682 | " [1.0 0.0 15654574 -1.3884061049066005 0.4052025707925269]\n", 683 | " [1.0 0.0 15619407 0.074357131167079 1.292102862540876]\n", 684 | " [1.0 0.0 15638003 -0.21819551604765694 -0.5408244070723787]\n", 685 | " [0.0 1.0 15699619 -0.12067796697607829 0.907779402783258]\n", 686 | " [0.0 1.0 15694879 -0.02316041790449965 2.2381298404057817]\n", 687 | " [1.0 0.0 15706185 -1.0958534576918646 -0.9842745529465531]\n", 688 | " [1.0 0.0 15717893 0.46442732745339355 0.050442454093187344]\n", 689 | " [1.0 0.0 15680752 0.5619448765249722 1.9129330667647202]\n", 690 | " [1.0 0.0 15733964 0.074357131167079 -0.5408244070723787]\n", 691 | " [1.0 0.0 15666675 0.8544975237397081 0.8190893736084232]\n", 692 | " [1.0 0.0 15709441 -0.21819551604765694 -0.7182044654220484]\n", 693 | " [1.0 0.0 15800515 0.074357131167079 1.3216662055991542]\n", 694 | " [0.0 1.0 15627220 0.17187468023865762 0.08000579715146565]\n", 695 | " [1.0 0.0 15606274 -1.0958534576918646 -0.48169772095582203]\n", 696 | " [1.0 0.0 15768151 1.6346379163123372 1.1738494903077628]\n", 697 | " [1.0 0.0 15591915 -0.4132306141908142 -0.5112610640141003]\n", 698 | " [0.0 1.0 15685576 -1.0958534576918646 -1.5459780710538409]\n", 699 | " [0.0 1.0 15725660 -0.7057832614055501 0.5530192860839184]\n", 700 | " [1.0 0.0 15575002 -0.21819551604765694 -0.24519097648959565]]\n" 701 | ] 702 | } 703 | ] 704 | } 705 | ] 706 | } -------------------------------------------------------------------------------- /Extraa/NER.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyOF+xzJxU0/pmqAiH5INnf7", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "source": [ 32 | "import nltk\n", 33 | "nltk.download('punkt')\n", 34 | "nltk.download('averaged_perceptron_tagger')\n", 35 | "nltk.download('maxent_ne_chunker')\n", 36 | "nltk.download('words')" 37 | ], 38 | "metadata": { 39 | "colab": { 40 | "base_uri": "https://localhost:8080/" 41 | }, 42 | "id": "MYi0bXYZUpbS", 43 | "outputId": "aa885ca6-890c-44bc-b143-c12c0bb7ce26" 44 | }, 45 | "execution_count": 9, 46 | "outputs": [ 47 | { 48 | "output_type": "stream", 49 | "name": "stderr", 50 | "text": [ 51 | "[nltk_data] Downloading package punkt to /root/nltk_data...\n", 52 | "[nltk_data] Package punkt is already up-to-date!\n", 53 | "[nltk_data] Downloading package averaged_perceptron_tagger to\n", 54 | "[nltk_data] /root/nltk_data...\n", 55 | "[nltk_data] Package averaged_perceptron_tagger is already up-to-\n", 56 | "[nltk_data] date!\n", 57 | "[nltk_data] Downloading package maxent_ne_chunker to\n", 58 | "[nltk_data] /root/nltk_data...\n", 59 | "[nltk_data] Package maxent_ne_chunker is already up-to-date!\n", 60 | "[nltk_data] Downloading package words to /root/nltk_data...\n", 61 | "[nltk_data] Unzipping corpora/words.zip.\n" 62 | ] 63 | }, 64 | { 65 | "output_type": "execute_result", 66 | "data": { 67 | "text/plain": [ 68 | "True" 69 | ] 70 | }, 71 | "metadata": {}, 72 | "execution_count": 9 73 | } 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 15, 79 | "metadata": { 80 | "id": "dltfNG2QULdV" 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "# Tokenize the text and tag each token with its part of speech\n", 85 | "def tag_pos(text):\n", 86 | " tokens = nltk.word_tokenize(text)\n", 87 | " return nltk.pos_tag(tokens)\n", 88 | "\n", 89 | "# Identify named entities in the text\n", 90 | "def tag_entities(text):\n", 91 | " entities = nltk.ne_chunk(tag_pos(text))\n", 92 | " return entities\n", 93 | "\n", 94 | "# Print the named entities in the text\n", 95 | "def print_entities(text):\n", 96 | " for entity in tag_entities(text):\n", 97 | " if isinstance(entity, nltk.tree.Tree):\n", 98 | " label = entity.label()\n", 99 | " text = \" \".join([token[0] for token in entity])\n", 100 | " print(f\"{label}: {text}\")\n", 101 | "\n", 102 | "text = \"A test set is the portion of data used to evaluate the machine learning model and forecast results.\"\n", 103 | "print_entities(text)" 104 | ] 105 | } 106 | ] 107 | } -------------------------------------------------------------------------------- /Extraa/Simple_Text_Preprocessing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Simple_Text_Preprocessing.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "view-in-github", 22 | "colab_type": "text" 23 | }, 24 | "source": [ 25 | "\"Open" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "metadata": { 31 | "id": "RUee8aFXpH17" 32 | }, 33 | "source": [ 34 | "import re" 35 | ], 36 | "execution_count": null, 37 | "outputs": [] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "metadata": { 42 | "colab": { 43 | "base_uri": "https://localhost:8080/" 44 | }, 45 | "id": "76eUmE5pOvq3", 46 | "outputId": "6d3cc8d4-32d9-48fa-c432-ea7178043ffe" 47 | }, 48 | "source": [ 49 | "from google.colab import drive\n", 50 | "drive.mount('/content/drive')\n", 51 | "%cd /content/drive/MyDrive/Colab Notebooks/AI_LAB_Fall_2020/" 52 | ], 53 | "execution_count": null, 54 | "outputs": [ 55 | { 56 | "output_type": "stream", 57 | "name": "stdout", 58 | "text": [ 59 | "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n", 60 | "[Errno 2] No such file or directory: '/content/drive/MyDrive/Colab Notebooks/AI_LAB_Fall_2020/'\n", 61 | "/content\n" 62 | ] 63 | } 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "metadata": { 69 | "id": "t4QCWdk8N-ra" 70 | }, 71 | "source": [ 72 | "def read_files(file_loc):\n", 73 | " dataset = []\n", 74 | " \n", 75 | " with open(file_loc, 'r', encoding='utf-8') as test_file:\n", 76 | " for line in test_file:\n", 77 | " dataset.append(line)\n", 78 | " \n", 79 | " return dataset" 80 | ], 81 | "execution_count": null, 82 | "outputs": [] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "metadata": { 87 | "id": "VVqh8cxQTJOa" 88 | }, 89 | "source": [ 90 | "def separate_labels(dataset):\n", 91 | " documents = []\n", 92 | " labels = []\n", 93 | "\n", 94 | " for line in dataset:\n", 95 | " splitted_line = line.strip().split('\\t', 2)\n", 96 | " labels.append(splitted_line[1])\n", 97 | " documents.append(splitted_line[2])\n", 98 | "\n", 99 | " return documents, labels" 100 | ], 101 | "execution_count": null, 102 | "outputs": [] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "source": [ 107 | "from google.colab import drive\n", 108 | "drive.mount('/content/drive')" 109 | ], 110 | "metadata": { 111 | "colab": { 112 | "base_uri": "https://localhost:8080/" 113 | }, 114 | "id": "b33fQkNF-nBX", 115 | "outputId": "0548a98f-8dd5-42de-a7eb-745103b26b87" 116 | }, 117 | "execution_count": null, 118 | "outputs": [ 119 | { 120 | "output_type": "stream", 121 | "name": "stdout", 122 | "text": [ 123 | "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" 124 | ] 125 | } 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "metadata": { 131 | "id": "PLOZfny8UbwB" 132 | }, 133 | "source": [ 134 | "def remove_urls(data):\n", 135 | " url_removed_data = []\n", 136 | "\n", 137 | " # Your Code Here...\n", 138 | " for line in data:\n", 139 | " url_removed_data.append(re.sub('http[s]?://\\S+', '', line))\n", 140 | "\n", 141 | " return url_removed_data" 142 | ], 143 | "execution_count": null, 144 | "outputs": [] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "metadata": { 149 | "id": "g1LEQlJAUfeH" 150 | }, 151 | "source": [ 152 | "def remove_hashtag(data):\n", 153 | " '''This function removes HashTag # from the entire dataset'''\n", 154 | " hashtag_removed_data = []\n", 155 | "\n", 156 | " # map hashtag to space\n", 157 | " translator = str.maketrans('#', ' '*len('#'), '')\n", 158 | "\n", 159 | " for line in data:\n", 160 | " hashtag_removed_data.append(line.translate(translator))\n", 161 | "\n", 162 | " return hashtag_removed_data" 163 | ], 164 | "execution_count": null, 165 | "outputs": [] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "metadata": { 170 | "id": "CPy53mwTIt9Y" 171 | }, 172 | "source": [ 173 | "# remove whitespace from text \n", 174 | "def remove_whitespace(data):\n", 175 | " '''This function removes multiple whitespaces and replace them with a single whitespace'''\n", 176 | " whitespace_removed_data = []\n", 177 | " \n", 178 | " for line in data:\n", 179 | " whitespace_removed_data.append(\" \".join(line.split()))\n", 180 | " \n", 181 | " return whitespace_removed_data" 182 | ], 183 | "execution_count": null, 184 | "outputs": [] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "metadata": { 189 | "id": "e3UHR8hNUSSa" 190 | }, 191 | "source": [ 192 | "def pre_processing(documents):\n", 193 | " '''This function calls all the pre-processing submodules'''\n", 194 | "\n", 195 | " documents = remove_urls(documents)\n", 196 | " documents = remove_hashtag(documents)\n", 197 | " documents = remove_whitespace(documents)\n", 198 | "\n", 199 | " return documents" 200 | ], 201 | "execution_count": null, 202 | "outputs": [] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "metadata": { 207 | "colab": { 208 | "base_uri": "https://localhost:8080/" 209 | }, 210 | "id": "2f-LKvxSN0T2", 211 | "outputId": "712b193c-770e-4131-d2aa-7ed4955ea3d7" 212 | }, 213 | "source": [ 214 | "def main():\n", 215 | " print(\"Reading The Dataset ...\")\n", 216 | " dataset = read_files('/content/drive/MyDrive/Colab Notebooks/datasets/corona_data/train.tsv')\n", 217 | " documents, labels = separate_labels(dataset)\n", 218 | "\n", 219 | " # print(dataset)\n", 220 | " \n", 221 | " print('\\nBefore Preprocessing:')\n", 222 | " print(documents[0])\n", 223 | "\n", 224 | " documents = pre_processing(documents)\n", 225 | "\n", 226 | " print('\\nAfter Preprocessing:')\n", 227 | " print(documents[0])\n", 228 | "\n", 229 | "if __name__ == \"__main__\":\n", 230 | " main()" 231 | ], 232 | "execution_count": null, 233 | "outputs": [ 234 | { 235 | "output_type": "stream", 236 | "name": "stdout", 237 | "text": [ 238 | "Reading The Dataset ...\n", 239 | "\n", 240 | "Before Preprocessing:\n", 241 | "@MeNyrbie @Phil_Gahan @Chrisitv https://t.co/iFz9FAn2Pa and https://t.co/xX6ghGFzCC and https://t.co/I2NlzdxNo8\n", 242 | "\n", 243 | "After Preprocessing:\n", 244 | "@MeNyrbie @Phil_Gahan @Chrisitv and and\n" 245 | ] 246 | } 247 | ] 248 | } 249 | ] 250 | } -------------------------------------------------------------------------------- /Extraa/data_preprocessing_tools.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "data_preprocessing_tools.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "id": "view-in-github", 21 | "colab_type": "text" 22 | }, 23 | "source": [ 24 | "\"Open" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": { 30 | "id": "37puETfgRzzg" 31 | }, 32 | "source": [ 33 | "# Data Preprocessing Tools" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "source": [ 39 | "## Google Drive Mount" 40 | ], 41 | "metadata": { 42 | "id": "-3LK3IUeaxwE" 43 | } 44 | }, 45 | { 46 | "cell_type": "code", 47 | "metadata": { 48 | "colab": { 49 | "base_uri": "https://localhost:8080/" 50 | }, 51 | "id": "uoybHr_uELhL", 52 | "outputId": "b57fcc7a-d549-41b2-a5a5-d1de861ea496" 53 | }, 54 | "source": [ 55 | "from google.colab import drive\n", 56 | "drive.mount('/content/drive')" 57 | ], 58 | "execution_count": null, 59 | "outputs": [ 60 | { 61 | "output_type": "stream", 62 | "name": "stdout", 63 | "text": [ 64 | "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" 65 | ] 66 | } 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": { 72 | "id": "EoRP98MpR-qj" 73 | }, 74 | "source": [ 75 | "## Importing the libraries" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "metadata": { 81 | "id": "N-qiINBQSK2g" 82 | }, 83 | "source": [ 84 | "import numpy as np\n", 85 | "import matplotlib.pyplot as plt\n", 86 | "import pandas as pd" 87 | ], 88 | "execution_count": null, 89 | "outputs": [] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": { 94 | "id": "RopL7tUZSQkT" 95 | }, 96 | "source": [ 97 | "## Importing the dataset" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "metadata": { 103 | "id": "WwEPNDWySTKm" 104 | }, 105 | "source": [ 106 | "dataset = pd.read_csv('/content/drive/MyDrive/Datasets/Social_Network_Ads.csv')\n", 107 | "X = dataset.iloc[:, :-1].values\n", 108 | "y = dataset.iloc[:, -1].values" 109 | ], 110 | "execution_count": null, 111 | "outputs": [] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "metadata": { 116 | "id": "hCsz2yCebe1R", 117 | "colab": { 118 | "base_uri": "https://localhost:8080/" 119 | }, 120 | "outputId": "20d06c5d-39a7-439a-c893-b9b2f0dfb238" 121 | }, 122 | "source": [ 123 | "print(X)" 124 | ], 125 | "execution_count": null, 126 | "outputs": [ 127 | { 128 | "output_type": "stream", 129 | "name": "stdout", 130 | "text": [ 131 | "[[15624510 'Male' 19 19000]\n", 132 | " [15810944 'Male' 35 20000]\n", 133 | " [15668575 'Female' 26 43000]\n", 134 | " ...\n", 135 | " [15654296 'Female' 50 20000]\n", 136 | " [15755018 'Male' 36 33000]\n", 137 | " [15594041 'Female' 49 36000]]\n" 138 | ] 139 | } 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "metadata": { 145 | "id": "eYrOQ43XcJR3", 146 | "colab": { 147 | "base_uri": "https://localhost:8080/" 148 | }, 149 | "outputId": "957997f6-31ae-401d-8f4f-0162fd6438e5" 150 | }, 151 | "source": [ 152 | "print(y)" 153 | ], 154 | "execution_count": null, 155 | "outputs": [ 156 | { 157 | "output_type": "stream", 158 | "name": "stdout", 159 | "text": [ 160 | "[0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 0 0 0 0 0\n", 161 | " 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0\n", 162 | " 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0\n", 163 | " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0\n", 164 | " 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0\n", 165 | " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 1 1 0 0 0 1 0 0 0 1 0 1\n", 166 | " 1 1 0 0 1 1 0 1 1 0 1 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 0 1 1 0 1 0 0 1\n", 167 | " 1 0 1 1 0 1 1 0 0 1 0 0 1 1 1 1 1 0 1 1 1 1 0 1 1 0 1 0 1 0 1 1 1 1 0 0 0\n", 168 | " 1 1 0 1 1 1 1 1 0 0 0 1 1 0 0 1 0 1 0 1 1 0 1 0 1 1 0 1 1 0 0 0 1 1 0 1 0\n", 169 | " 0 1 0 1 0 0 1 1 0 0 1 1 0 1 1 0 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0 1 1 1 1 0 1\n", 170 | " 1 1 0 1 0 1 0 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 0 1]\n" 171 | ] 172 | } 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": { 178 | "id": "nhfKXNxlSabC" 179 | }, 180 | "source": [ 181 | "## Taking care of missing data" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "metadata": { 187 | "id": "c93k7ipkSexq" 188 | }, 189 | "source": [ 190 | "from sklearn.impute import SimpleImputer\n", 191 | "imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')\n", 192 | "imputer.fit(X[:, 1:3])\n", 193 | "X[:, 1:3] = imputer.transform(X[:, 1:3])" 194 | ], 195 | "execution_count": null, 196 | "outputs": [] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "metadata": { 201 | "id": "3UgLdMS_bjq_", 202 | "colab": { 203 | "base_uri": "https://localhost:8080/" 204 | }, 205 | "outputId": "dd615b9c-99fe-4604-a8c4-82a8e23a1a77" 206 | }, 207 | "source": [ 208 | "print(X)" 209 | ], 210 | "execution_count": null, 211 | "outputs": [ 212 | { 213 | "output_type": "stream", 214 | "name": "stdout", 215 | "text": [ 216 | "[[15624510 'Male' 19 19000]\n", 217 | " [15810944 'Male' 35 20000]\n", 218 | " [15668575 'Female' 26 43000]\n", 219 | " ...\n", 220 | " [15654296 'Female' 50 20000]\n", 221 | " [15755018 'Male' 36 33000]\n", 222 | " [15594041 'Female' 49 36000]]\n" 223 | ] 224 | } 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": { 230 | "id": "CriG6VzVSjcK" 231 | }, 232 | "source": [ 233 | "## Encoding categorical data" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": { 239 | "id": "AhSpdQWeSsFh" 240 | }, 241 | "source": [ 242 | "### Encoding the Independent Variable" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "metadata": { 248 | "id": "5hwuVddlSwVi" 249 | }, 250 | "source": [ 251 | "from sklearn.compose import ColumnTransformer\n", 252 | "from sklearn.preprocessing import OneHotEncoder\n", 253 | "ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')\n", 254 | "X = np.array(ct.fit_transform(X))" 255 | ], 256 | "execution_count": null, 257 | "outputs": [] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "metadata": { 262 | "id": "f7QspewyeBfx", 263 | "colab": { 264 | "base_uri": "https://localhost:8080/" 265 | }, 266 | "outputId": "3213532c-7c86-4efc-8b72-209aabf87319" 267 | }, 268 | "source": [ 269 | "print(X)" 270 | ], 271 | "execution_count": null, 272 | "outputs": [ 273 | { 274 | "output_type": "stream", 275 | "name": "stdout", 276 | "text": [ 277 | "[[0.0 1.0 15624510 19 19000]\n", 278 | " [0.0 1.0 15810944 35 20000]\n", 279 | " [1.0 0.0 15668575 26 43000]\n", 280 | " ...\n", 281 | " [1.0 0.0 15654296 50 20000]\n", 282 | " [0.0 1.0 15755018 36 33000]\n", 283 | " [1.0 0.0 15594041 49 36000]]\n" 284 | ] 285 | } 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": { 291 | "id": "DXh8oVSITIc6" 292 | }, 293 | "source": [ 294 | "### Encoding the Dependent Variable" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "metadata": { 300 | "id": "XgHCShVyTOYY" 301 | }, 302 | "source": [ 303 | "from sklearn.preprocessing import LabelEncoder\n", 304 | "le = LabelEncoder()\n", 305 | "y = le.fit_transform(y)" 306 | ], 307 | "execution_count": null, 308 | "outputs": [] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "metadata": { 313 | "id": "FyhY8-gPpFCa", 314 | "colab": { 315 | "base_uri": "https://localhost:8080/" 316 | }, 317 | "outputId": "523b1918-f089-417f-a8b0-d039d28c49b9" 318 | }, 319 | "source": [ 320 | "print(y)" 321 | ], 322 | "execution_count": null, 323 | "outputs": [ 324 | { 325 | "output_type": "stream", 326 | "name": "stdout", 327 | "text": [ 328 | "[0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 0 0 0 0 0\n", 329 | " 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0\n", 330 | " 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0\n", 331 | " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0\n", 332 | " 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0\n", 333 | " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 1 1 0 0 0 1 0 0 0 1 0 1\n", 334 | " 1 1 0 0 1 1 0 1 1 0 1 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 0 1 1 0 1 0 0 1\n", 335 | " 1 0 1 1 0 1 1 0 0 1 0 0 1 1 1 1 1 0 1 1 1 1 0 1 1 0 1 0 1 0 1 1 1 1 0 0 0\n", 336 | " 1 1 0 1 1 1 1 1 0 0 0 1 1 0 0 1 0 1 0 1 1 0 1 0 1 1 0 1 1 0 0 0 1 1 0 1 0\n", 337 | " 0 1 0 1 0 0 1 1 0 0 1 1 0 1 1 0 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0 1 1 1 1 0 1\n", 338 | " 1 1 0 1 0 1 0 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 0 1]\n" 339 | ] 340 | } 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": { 346 | "id": "qb_vcgm3qZKW" 347 | }, 348 | "source": [ 349 | "## Splitting the dataset into the Training set and Test set" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "metadata": { 355 | "id": "pXgA6CzlqbCl" 356 | }, 357 | "source": [ 358 | "from sklearn.model_selection import train_test_split\n", 359 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)" 360 | ], 361 | "execution_count": null, 362 | "outputs": [] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "metadata": { 367 | "id": "GuwQhFdKrYTM", 368 | "colab": { 369 | "base_uri": "https://localhost:8080/" 370 | }, 371 | "outputId": "c63d27ce-7e75-4a14-ec3e-cac8f2690974" 372 | }, 373 | "source": [ 374 | "print(X_train)" 375 | ], 376 | "execution_count": null, 377 | "outputs": [ 378 | { 379 | "output_type": "stream", 380 | "name": "stdout", 381 | "text": [ 382 | "[[1.0 0.0 15699284 29 28000]\n", 383 | " [1.0 0.0 15599081 45 22000]\n", 384 | " [0.0 1.0 15747043 46 117000]\n", 385 | " ...\n", 386 | " [0.0 1.0 15706071 51 23000]\n", 387 | " [0.0 1.0 15646227 46 79000]\n", 388 | " [0.0 1.0 15689425 30 49000]]\n" 389 | ] 390 | } 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "metadata": { 396 | "id": "TUrX_Tvcrbi4", 397 | "colab": { 398 | "base_uri": "https://localhost:8080/" 399 | }, 400 | "outputId": "37341b3d-a400-4b87-e508-93a3cca81829" 401 | }, 402 | "source": [ 403 | "print(X_test)" 404 | ], 405 | "execution_count": null, 406 | "outputs": [ 407 | { 408 | "output_type": "stream", 409 | "name": "stdout", 410 | "text": [ 411 | "[[0.0 1.0 15755018 36 33000]\n", 412 | " [1.0 0.0 15697020 39 61000]\n", 413 | " [0.0 1.0 15796351 36 118000]\n", 414 | " [0.0 1.0 15665760 39 122000]\n", 415 | " [1.0 0.0 15794661 26 118000]\n", 416 | " [1.0 0.0 15717560 38 65000]\n", 417 | " [1.0 0.0 15680243 20 36000]\n", 418 | " [0.0 1.0 15596522 49 89000]\n", 419 | " [0.0 1.0 15669656 31 18000]\n", 420 | " [0.0 1.0 15638646 48 141000]\n", 421 | " [1.0 0.0 15644296 34 72000]\n", 422 | " [1.0 0.0 15629885 39 73000]\n", 423 | " [0.0 1.0 15674206 35 72000]\n", 424 | " [1.0 0.0 15575247 48 131000]\n", 425 | " [1.0 0.0 15611191 53 82000]\n", 426 | " [0.0 1.0 15685346 56 133000]\n", 427 | " [0.0 1.0 15774744 60 83000]\n", 428 | " [0.0 1.0 15728773 27 58000]\n", 429 | " [1.0 0.0 15667265 28 87000]\n", 430 | " [0.0 1.0 15593715 60 102000]\n", 431 | " [1.0 0.0 15724423 40 75000]\n", 432 | " [1.0 0.0 15780572 50 88000]\n", 433 | " [1.0 0.0 15715622 44 139000]\n", 434 | " [0.0 1.0 15622478 47 43000]\n", 435 | " [0.0 1.0 15617482 45 26000]\n", 436 | " [0.0 1.0 15809823 26 15000]\n", 437 | " [1.0 0.0 15574372 58 47000]\n", 438 | " [0.0 1.0 15708196 49 74000]\n", 439 | " [1.0 0.0 15778830 53 34000]\n", 440 | " [1.0 0.0 15794566 52 114000]\n", 441 | " [0.0 1.0 15668385 39 42000]\n", 442 | " [0.0 1.0 15804002 19 76000]\n", 443 | " [1.0 0.0 15578738 18 86000]\n", 444 | " [0.0 1.0 15727467 57 74000]\n", 445 | " [1.0 0.0 15598044 27 84000]\n", 446 | " [0.0 1.0 15595917 30 80000]\n", 447 | " [0.0 1.0 15642885 22 18000]\n", 448 | " [1.0 0.0 15584545 32 86000]\n", 449 | " [1.0 0.0 15654296 50 20000]\n", 450 | " [0.0 1.0 15741094 19 25000]\n", 451 | " [1.0 0.0 15746203 47 144000]\n", 452 | " [1.0 0.0 15660866 58 101000]\n", 453 | " [0.0 1.0 15570932 34 115000]\n", 454 | " [1.0 0.0 15595135 23 66000]\n", 455 | " [0.0 1.0 15775335 56 60000]\n", 456 | " [1.0 0.0 15663939 31 118000]\n", 457 | " [1.0 0.0 15668521 48 35000]\n", 458 | " [1.0 0.0 15733973 47 113000]\n", 459 | " [1.0 0.0 15747097 39 79000]\n", 460 | " [1.0 0.0 15613014 52 38000]\n", 461 | " [0.0 1.0 15718071 24 58000]\n", 462 | " [0.0 1.0 15622171 37 53000]\n", 463 | " [1.0 0.0 15721007 42 80000]\n", 464 | " [0.0 1.0 15704583 46 28000]\n", 465 | " [0.0 1.0 15673539 42 73000]\n", 466 | " [1.0 0.0 15631070 37 62000]\n", 467 | " [0.0 1.0 15791373 60 42000]\n", 468 | " [0.0 1.0 15591433 36 52000]\n", 469 | " [1.0 0.0 15569641 58 95000]\n", 470 | " [0.0 1.0 15577514 43 129000]\n", 471 | " [0.0 1.0 15789863 27 89000]\n", 472 | " [1.0 0.0 15654574 23 82000]\n", 473 | " [1.0 0.0 15619407 38 112000]\n", 474 | " [1.0 0.0 15638003 35 50000]\n", 475 | " [0.0 1.0 15699619 36 99000]\n", 476 | " [0.0 1.0 15694879 37 144000]\n", 477 | " [1.0 0.0 15706185 26 35000]\n", 478 | " [1.0 0.0 15717893 42 70000]\n", 479 | " [1.0 0.0 15680752 43 133000]\n", 480 | " [1.0 0.0 15733964 38 50000]\n", 481 | " [1.0 0.0 15666675 46 96000]\n", 482 | " [1.0 0.0 15709441 35 44000]\n", 483 | " [1.0 0.0 15800515 38 113000]\n", 484 | " [0.0 1.0 15627220 39 71000]\n", 485 | " [1.0 0.0 15606274 26 52000]\n", 486 | " [1.0 0.0 15768151 54 108000]\n", 487 | " [1.0 0.0 15591915 33 51000]\n", 488 | " [0.0 1.0 15685576 26 16000]\n", 489 | " [0.0 1.0 15725660 30 87000]\n", 490 | " [1.0 0.0 15575002 35 60000]]\n" 491 | ] 492 | } 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "metadata": { 498 | "id": "pSMHiIsWreQY", 499 | "colab": { 500 | "base_uri": "https://localhost:8080/" 501 | }, 502 | "outputId": "aa2bcb2e-89f0-435e-98ff-e998e94f372e" 503 | }, 504 | "source": [ 505 | "print(y_train)" 506 | ], 507 | "execution_count": null, 508 | "outputs": [ 509 | { 510 | "output_type": "stream", 511 | "name": "stdout", 512 | "text": [ 513 | "[0 1 1 0 0 0 1 0 1 0 1 1 1 0 0 1 1 1 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 1 0 0\n", 514 | " 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0\n", 515 | " 1 1 0 0 0 1 0 1 1 0 0 0 1 1 0 0 1 1 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1\n", 516 | " 0 1 0 0 0 0 1 0 0 1 0 0 1 1 0 1 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 0 1 0 0 0 1\n", 517 | " 0 1 1 0 1 1 0 0 0 0 1 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0 0 1 1 0 0 0 0 1\n", 518 | " 0 1 0 1 0 0 1 0 1 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 1\n", 519 | " 0 1 0 0 0 0 0 1 0 0 1 1 1 0 0 0 1 0 0 1 1 0 1 0 1 0 0 0 0 0 1 0 1 0 0 1 1\n", 520 | " 0 0 1 1 1 0 0 0 1 0 1 1 1 0 0 1 0 0 0 1 0 0 0 1 0 1 0 1 0 1 1 0 0 1 0 0 1\n", 521 | " 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0]\n" 522 | ] 523 | } 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "metadata": { 529 | "id": "I_tW7H56rgtW", 530 | "colab": { 531 | "base_uri": "https://localhost:8080/" 532 | }, 533 | "outputId": "1563dcb5-ce73-48b6-dac6-cc133e08c4a9" 534 | }, 535 | "source": [ 536 | "print(y_test)" 537 | ], 538 | "execution_count": null, 539 | "outputs": [ 540 | { 541 | "output_type": "stream", 542 | "name": "stdout", 543 | "text": [ 544 | "[0 0 1 1 0 0 0 1 0 0 0 0 0 1 1 1 1 0 0 1 0 1 1 0 1 0 1 0 1 0 0 0 0 1 0 0 0\n", 545 | " 0 1 0 1 1 0 0 1 1 1 1 0 1 0 0 1 1 1 0 1 0 1 1 0 0 0 0 1 1 0 0 0 0 0 0 1 0\n", 546 | " 0 1 0 0 0 0]\n" 547 | ] 548 | } 549 | ] 550 | }, 551 | { 552 | "cell_type": "markdown", 553 | "metadata": { 554 | "id": "TpGqbS4TqkIR" 555 | }, 556 | "source": [ 557 | "## Feature Scaling" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "metadata": { 563 | "id": "AxjSUXFQqo-3" 564 | }, 565 | "source": [ 566 | "from sklearn.preprocessing import StandardScaler\n", 567 | "sc = StandardScaler()\n", 568 | "X_train[:, 3:] = sc.fit_transform(X_train[:, 3:])\n", 569 | "X_test[:, 3:] = sc.transform(X_test[:, 3:])" 570 | ], 571 | "execution_count": null, 572 | "outputs": [] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "metadata": { 577 | "id": "DWPET8ZdlMnu", 578 | "colab": { 579 | "base_uri": "https://localhost:8080/" 580 | }, 581 | "outputId": "398f2a6d-484f-4d13-ca00-985f79447b1f" 582 | }, 583 | "source": [ 584 | "print(X_train)" 585 | ], 586 | "execution_count": null, 587 | "outputs": [ 588 | { 589 | "output_type": "stream", 590 | "name": "stdout", 591 | "text": [ 592 | "[[1.0 0.0 15699284 -0.8033008104771288 -1.1912179543545012]\n", 593 | " [1.0 0.0 15599081 0.7569799746681295 -1.368598012704171]\n", 594 | " [0.0 1.0 15747043 0.8544975237397081 1.4399195778322673]\n", 595 | " ...\n", 596 | " [0.0 1.0 15706071 1.3420852690976013 -1.3390346696458928]\n", 597 | " [0.0 1.0 15646227 0.8544975237397081 0.31651254161769204]\n", 598 | " [0.0 1.0 15689425 -0.7057832614055501 -0.5703877501306569]]\n" 599 | ] 600 | } 601 | ] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "metadata": { 606 | "id": "sTXykB_QlRjE", 607 | "colab": { 608 | "base_uri": "https://localhost:8080/" 609 | }, 610 | "outputId": "ee300b2b-1510-4ac6-b966-91a389cedc2b" 611 | }, 612 | "source": [ 613 | "print(X_test)" 614 | ], 615 | "execution_count": null, 616 | "outputs": [ 617 | { 618 | "output_type": "stream", 619 | "name": "stdout", 620 | "text": [ 621 | "[[0.0 1.0 15755018 -0.12067796697607829 -1.0434012390631098]\n", 622 | " [1.0 0.0 15697020 0.17187468023865762 -0.21562763343131733]\n", 623 | " [0.0 1.0 15796351 -0.12067796697607829 1.4694829208905458]\n", 624 | " [0.0 1.0 15665760 0.17187468023865762 1.587736293123659]\n", 625 | " [1.0 0.0 15794661 -1.0958534576918646 1.4694829208905458]\n", 626 | " [1.0 0.0 15717560 0.074357131167079 -0.09737426119820415]\n", 627 | " [1.0 0.0 15680243 -1.6809587521213365 -0.9547112098882748]\n", 628 | " [0.0 1.0 15596522 1.147050170954444 0.6121459722004751]\n", 629 | " [0.0 1.0 15669656 -0.6082657123339715 -1.4868513849372842]\n", 630 | " [0.0 1.0 15638646 1.0495326218828653 2.1494398112309465]\n", 631 | " [1.0 0.0 15644296 -0.3157130651192356 0.10956914020974394]\n", 632 | " [1.0 0.0 15629885 0.17187468023865762 0.13913248326802224]\n", 633 | " [0.0 1.0 15674206 -0.21819551604765694 0.10956914020974394]\n", 634 | " [1.0 0.0 15575247 1.0495326218828653 1.8538063806481635]\n", 635 | " [1.0 0.0 15611191 1.5371203672407585 0.4052025707925269]\n", 636 | " [0.0 1.0 15685346 1.8296730144554945 1.9129330667647202]\n", 637 | " [0.0 1.0 15774744 2.219743210741809 0.4347659138508052]\n", 638 | " [0.0 1.0 15728773 -0.9983359086202861 -0.30431766260615223]\n", 639 | " [1.0 0.0 15667265 -0.9008183595487074 0.5530192860839184]\n", 640 | " [0.0 1.0 15593715 2.219743210741809 0.9964694319580929]\n", 641 | " [1.0 0.0 15724423 0.26939222931023626 0.19825916938457885]\n", 642 | " [1.0 0.0 15780572 1.2445677200260228 0.5825826291421967]\n", 643 | " [1.0 0.0 15715622 0.6594624255965509 2.09031312511439]\n", 644 | " [0.0 1.0 15622478 0.9520150728112867 -0.7477678084803268]\n", 645 | " [0.0 1.0 15617482 0.7569799746681295 -1.2503446404710579]\n", 646 | " [0.0 1.0 15809823 -1.0958534576918646 -1.5755414141121191]\n", 647 | " [1.0 0.0 15574372 2.0247081125986517 -0.6295144362472135]\n", 648 | " [0.0 1.0 15708196 1.147050170954444 0.16869582632630054]\n", 649 | " [1.0 0.0 15778830 1.5371203672407585 -1.0138378960048313]\n", 650 | " [1.0 0.0 15794566 1.43960281816918 1.3512295486574324]\n", 651 | " [0.0 1.0 15668385 0.17187468023865762 -0.777331151538605]\n", 652 | " [0.0 1.0 15804002 -1.7784763011929152 0.22782251244285714]\n", 653 | " [1.0 0.0 15578738 -1.875993850264494 0.5234559430256401]\n", 654 | " [0.0 1.0 15727467 1.9271905635270732 0.16869582632630054]\n", 655 | " [1.0 0.0 15598044 -0.9983359086202861 0.46432925690908355]\n", 656 | " [0.0 1.0 15595917 -0.7057832614055501 0.34607588467597034]\n", 657 | " [0.0 1.0 15642885 -1.4859236539781793 -1.4868513849372842]\n", 658 | " [1.0 0.0 15584545 -0.5107481632623928 0.5234559430256401]\n", 659 | " [1.0 0.0 15654296 1.2445677200260228 -1.4277246988207275]\n", 660 | " [0.0 1.0 15741094 -1.7784763011929152 -1.279907983529336]\n", 661 | " [1.0 0.0 15746203 0.9520150728112867 2.2381298404057817]\n", 662 | " [1.0 0.0 15660866 2.0247081125986517 0.9669060888998146]\n", 663 | " [0.0 1.0 15570932 -0.3157130651192356 1.3807928917157108]\n", 664 | " [1.0 0.0 15595135 -1.3884061049066005 -0.06781091813992585]\n", 665 | " [0.0 1.0 15775335 1.8296730144554945 -0.24519097648959565]\n", 666 | " [1.0 0.0 15663939 -0.6082657123339715 1.4694829208905458]\n", 667 | " [1.0 0.0 15668521 1.0495326218828653 -0.9842745529465531]\n", 668 | " [1.0 0.0 15733973 0.9520150728112867 1.3216662055991542]\n", 669 | " [1.0 0.0 15747097 0.17187468023865762 0.31651254161769204]\n", 670 | " [1.0 0.0 15613014 1.43960281816918 -0.8955845237717182]\n", 671 | " [0.0 1.0 15718071 -1.290888555835022 -0.30431766260615223]\n", 672 | " [0.0 1.0 15622171 -0.02316041790449965 -0.45213437789754374]\n", 673 | " [1.0 0.0 15721007 0.46442732745339355 0.34607588467597034]\n", 674 | " [0.0 1.0 15704583 0.8544975237397081 -1.1912179543545012]\n", 675 | " [0.0 1.0 15673539 0.46442732745339355 0.13913248326802224]\n", 676 | " [1.0 0.0 15631070 -0.02316041790449965 -0.18606429037303904]\n", 677 | " [0.0 1.0 15791373 2.219743210741809 -0.777331151538605]\n", 678 | " [0.0 1.0 15591433 -0.12067796697607829 -0.48169772095582203]\n", 679 | " [1.0 0.0 15569641 2.0247081125986517 0.7895260305501448]\n", 680 | " [0.0 1.0 15577514 0.5619448765249722 1.794679694531607]\n", 681 | " [0.0 1.0 15789863 -0.9983359086202861 0.6121459722004751]\n", 682 | " [1.0 0.0 15654574 -1.3884061049066005 0.4052025707925269]\n", 683 | " [1.0 0.0 15619407 0.074357131167079 1.292102862540876]\n", 684 | " [1.0 0.0 15638003 -0.21819551604765694 -0.5408244070723787]\n", 685 | " [0.0 1.0 15699619 -0.12067796697607829 0.907779402783258]\n", 686 | " [0.0 1.0 15694879 -0.02316041790449965 2.2381298404057817]\n", 687 | " [1.0 0.0 15706185 -1.0958534576918646 -0.9842745529465531]\n", 688 | " [1.0 0.0 15717893 0.46442732745339355 0.050442454093187344]\n", 689 | " [1.0 0.0 15680752 0.5619448765249722 1.9129330667647202]\n", 690 | " [1.0 0.0 15733964 0.074357131167079 -0.5408244070723787]\n", 691 | " [1.0 0.0 15666675 0.8544975237397081 0.8190893736084232]\n", 692 | " [1.0 0.0 15709441 -0.21819551604765694 -0.7182044654220484]\n", 693 | " [1.0 0.0 15800515 0.074357131167079 1.3216662055991542]\n", 694 | " [0.0 1.0 15627220 0.17187468023865762 0.08000579715146565]\n", 695 | " [1.0 0.0 15606274 -1.0958534576918646 -0.48169772095582203]\n", 696 | " [1.0 0.0 15768151 1.6346379163123372 1.1738494903077628]\n", 697 | " [1.0 0.0 15591915 -0.4132306141908142 -0.5112610640141003]\n", 698 | " [0.0 1.0 15685576 -1.0958534576918646 -1.5459780710538409]\n", 699 | " [0.0 1.0 15725660 -0.7057832614055501 0.5530192860839184]\n", 700 | " [1.0 0.0 15575002 -0.21819551604765694 -0.24519097648959565]]\n" 701 | ] 702 | } 703 | ] 704 | } 705 | ] 706 | } -------------------------------------------------------------------------------- /Extraa/pd_np.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "pd/np.ipynb", 7 | "provenance": [], 8 | "mount_file_id": "https://github.com/ekramasif/Basic-Machine-Learning/blob/main/pd_np.ipynb", 9 | "authorship_tag": "ABX9TyOnWt7QGfohqyNDYCEntfh5", 10 | "include_colab_link": true 11 | }, 12 | "kernelspec": { 13 | "name": "python3", 14 | "display_name": "Python 3" 15 | }, 16 | "language_info": { 17 | "name": "python" 18 | } 19 | }, 20 | "cells": [ 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "view-in-github", 25 | "colab_type": "text" 26 | }, 27 | "source": [ 28 | "\"Open" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "source": [ 34 | "#Import libraries" 35 | ], 36 | "metadata": { 37 | "id": "2aoW-Xn39Bbt" 38 | } 39 | }, 40 | { 41 | "cell_type": "code", 42 | "source": [ 43 | "import pandas as pd\n", 44 | "import numpy as np\n", 45 | "import numpy as np\n", 46 | "from sklearn.model_selection import train_test_split\n", 47 | "import seaborn as sns\n", 48 | "import matplotlib.pyplot as plt\n", 49 | "import matplotlib\n", 50 | "from matplotlib import cm\n", 51 | "\n", 52 | "from theano import *\n", 53 | "import theano.tensor as T" 54 | ], 55 | "metadata": { 56 | "id": "Mg3u-Tvqzxjw" 57 | }, 58 | "execution_count": null, 59 | "outputs": [] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "source": [ 64 | "#Dataset Reads" 65 | ], 66 | "metadata": { 67 | "id": "i81YYb7E-Wi4" 68 | } 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "source": [ 73 | "## Google Drive Mount" 74 | ], 75 | "metadata": { 76 | "id": "C5VxJBvGHZv1" 77 | } 78 | }, 79 | { 80 | "cell_type": "code", 81 | "source": [ 82 | "from google.colab import drive\n", 83 | "drive.mount('/content/drive')" 84 | ], 85 | "metadata": { 86 | "colab": { 87 | "base_uri": "https://localhost:8080/" 88 | }, 89 | "id": "N61kT-pw-Si1", 90 | "outputId": "6643f4ee-1cf5-4703-9b1f-82c9bd0523e5" 91 | }, 92 | "execution_count": null, 93 | "outputs": [ 94 | { 95 | "output_type": "stream", 96 | "name": "stdout", 97 | "text": [ 98 | "Mounted at /content/drive\n" 99 | ] 100 | } 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "source": [ 106 | "df = pd.read_csv('/content/drive/MyDrive/Datasets/Data.csv')\n", 107 | "df.head()" 108 | ], 109 | "metadata": { 110 | "colab": { 111 | "base_uri": "https://localhost:8080/", 112 | "height": 206 113 | }, 114 | "id": "PxI4N8-X-zVI", 115 | "outputId": "5fd9d474-40cb-47b7-a9a8-1d2932fe890c" 116 | }, 117 | "execution_count": null, 118 | "outputs": [ 119 | { 120 | "output_type": "execute_result", 121 | "data": { 122 | "text/html": [ 123 | "\n", 124 | "
\n", 125 | "
\n", 126 | "
\n", 127 | "\n", 140 | "\n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | "
CountryAgeSalaryPurchased
0France44.072000.0No
1Spain27.048000.0Yes
2Germany30.054000.0No
3Spain38.061000.0No
4Germany40.0NaNYes
\n", 188 | "
\n", 189 | " \n", 199 | " \n", 200 | " \n", 237 | "\n", 238 | " \n", 262 | "
\n", 263 | "
\n", 264 | " " 265 | ], 266 | "text/plain": [ 267 | " Country Age Salary Purchased\n", 268 | "0 France 44.0 72000.0 No\n", 269 | "1 Spain 27.0 48000.0 Yes\n", 270 | "2 Germany 30.0 54000.0 No\n", 271 | "3 Spain 38.0 61000.0 No\n", 272 | "4 Germany 40.0 NaN Yes" 273 | ] 274 | }, 275 | "metadata": {}, 276 | "execution_count": 4 277 | } 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "source": [ 283 | "df.isnull().values.any()" 284 | ], 285 | "metadata": { 286 | "id": "6_x--lAm0Siv", 287 | "outputId": "57cab242-9dd0-4a02-cd63-f22ebb7b719a", 288 | "colab": { 289 | "base_uri": "https://localhost:8080/" 290 | } 291 | }, 292 | "execution_count": null, 293 | "outputs": [ 294 | { 295 | "output_type": "execute_result", 296 | "data": { 297 | "text/plain": [ 298 | "True" 299 | ] 300 | }, 301 | "metadata": {}, 302 | "execution_count": 5 303 | } 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "source": [ 309 | "count_NaN = df.isnull().sum()\n", 310 | "print('Count of NaN: ')\n", 311 | "print (str(count_NaN))" 312 | ], 313 | "metadata": { 314 | "id": "NsDx-AEa8vtY", 315 | "outputId": "58c82029-2362-49b8-e8d8-078a45fa853f", 316 | "colab": { 317 | "base_uri": "https://localhost:8080/" 318 | } 319 | }, 320 | "execution_count": null, 321 | "outputs": [ 322 | { 323 | "output_type": "stream", 324 | "name": "stdout", 325 | "text": [ 326 | "Count of NaN: \n", 327 | "Country 0\n", 328 | "Age 1\n", 329 | "Salary 1\n", 330 | "Purchased 0\n", 331 | "dtype: int64\n" 332 | ] 333 | } 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "source": [ 339 | "df = df.fillna(0)\n", 340 | "df.head()" 341 | ], 342 | "metadata": { 343 | "id": "y6FVwUwM7xsn" 344 | }, 345 | "execution_count": null, 346 | "outputs": [] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "source": [ 351 | "column = len(df.columns)\n", 352 | "print(column)" 353 | ], 354 | "metadata": { 355 | "id": "0HicBPL9DPDP" 356 | }, 357 | "execution_count": null, 358 | "outputs": [] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "source": [ 363 | "df.shape" 364 | ], 365 | "metadata": { 366 | "id": "gTOr-SvBRB8L" 367 | }, 368 | "execution_count": null, 369 | "outputs": [] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "source": [ 374 | "train_Data = df.copy()\n", 375 | "train_Out = train_Data.pop('Purchased')" 376 | ], 377 | "metadata": { 378 | "id": "qjydk_9M-y9F" 379 | }, 380 | "execution_count": null, 381 | "outputs": [] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "source": [ 386 | "print(np.array(train_Out))" 387 | ], 388 | "metadata": { 389 | "id": "U3NLiPuTERWt" 390 | }, 391 | "execution_count": null, 392 | "outputs": [] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "source": [ 397 | "x_train, x_test, y_train, y_test = train_test_split(train_Data, train_Out, test_size=0.2, random_state=5)" 398 | ], 399 | "metadata": { 400 | "id": "hccLld5uL4e5" 401 | }, 402 | "execution_count": null, 403 | "outputs": [] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "source": [ 408 | "x_test" 409 | ], 410 | "metadata": { 411 | "id": "JJz2SYIAMSNb" 412 | }, 413 | "execution_count": null, 414 | "outputs": [] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "source": [ 419 | "x_train" 420 | ], 421 | "metadata": { 422 | "id": "ye6sB_zXNcyt" 423 | }, 424 | "execution_count": null, 425 | "outputs": [] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "source": [ 430 | "df.set_index('Country')[\"Age\"].plot.bar(\n", 431 | " xlabel='Country',\n", 432 | " ylabel='Age',\n", 433 | " rot=90,\n", 434 | " figsize=(10,5),\n", 435 | " fontsize=10,\n", 436 | " color=\"red\"\n", 437 | " )\n" 438 | ], 439 | "metadata": { 440 | "id": "Q1pAHolTz7e-" 441 | }, 442 | "execution_count": null, 443 | "outputs": [] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "source": [ 448 | "df.set_index('Country')[\"Salary\"].plot.bar(\n", 449 | " xlabel='Country',\n", 450 | " ylabel='Salary',\n", 451 | " rot=90,\n", 452 | " figsize=(10,5),\n", 453 | " fontsize=10,\n", 454 | " color=\"blue\"\n", 455 | " )" 456 | ], 457 | "metadata": { 458 | "id": "39upbjae_SUM" 459 | }, 460 | "execution_count": null, 461 | "outputs": [] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "source": [ 466 | "max(df['Salary'])" 467 | ], 468 | "metadata": { 469 | "id": "x3xWiHW5-CKz" 470 | }, 471 | "execution_count": null, 472 | "outputs": [] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "source": [ 477 | "df1 = pd.read_csv('/content/drive/MyDrive/Datasets/Wine.csv')\n", 478 | "df1.head(120)" 479 | ], 480 | "metadata": { 481 | "id": "7SK6zHNp-m-c" 482 | }, 483 | "execution_count": null, 484 | "outputs": [] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "source": [ 489 | "column = df1.shape\n", 490 | "print(column)" 491 | ], 492 | "metadata": { 493 | "id": "-kjZ1du-DVW7" 494 | }, 495 | "execution_count": null, 496 | "outputs": [] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "source": [ 501 | "count_NaN1 = df1.isnull().sum()\n", 502 | "print ('Count NaN of Wine: ' + str(count_NaN1))" 503 | ], 504 | "metadata": { 505 | "id": "G0co7z6rDlkj" 506 | }, 507 | "execution_count": null, 508 | "outputs": [] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "source": [ 513 | "X = df1.iloc[:,:-1]\n", 514 | "X" 515 | ], 516 | "metadata": { 517 | "id": "hLFJV5X5EnSz" 518 | }, 519 | "execution_count": null, 520 | "outputs": [] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "source": [ 525 | "Y = df1.iloc[:,-1]\n", 526 | "Y" 527 | ], 528 | "metadata": { 529 | "id": "kqLDVBqdE3MT" 530 | }, 531 | "execution_count": null, 532 | "outputs": [] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "source": [ 537 | "# Y = df1.iloc[:,13]\n", 538 | "# Y" 539 | ], 540 | "metadata": { 541 | "id": "SCwZiOf5KbSZ" 542 | }, 543 | "execution_count": null, 544 | "outputs": [] 545 | }, 546 | { 547 | "cell_type": "markdown", 548 | "source": [ 549 | "# Seaborn" 550 | ], 551 | "metadata": { 552 | "id": "uSnqrxlicDi8" 553 | } 554 | }, 555 | { 556 | "cell_type": "code", 557 | "source": [ 558 | "\n", 559 | "\n", 560 | "# Apply the default theme\n", 561 | "sns.set_theme(style=\"ticks\")\n", 562 | "\n", 563 | "\n", 564 | "\n", 565 | "# Create a visualization\n", 566 | "# Show the results of a linear regression\n", 567 | "sns.lmplot(x=\"Age\", y=\"Salary\", data=df)\n", 568 | "\n", 569 | "# sns.lineplot(x=\"Age\", y=\"Salary\", hue=\"region\", style=\"event\", data=df)" 570 | ], 571 | "metadata": { 572 | "id": "iQNqOWK6cHp_" 573 | }, 574 | "execution_count": null, 575 | "outputs": [] 576 | }, 577 | { 578 | "cell_type": "code", 579 | "source": [ 580 | "# Show the results of a linear regression\n", 581 | "sns.lmplot(x=\"Alcohol\", y=\"Total_Phenols\", data=df1)" 582 | ], 583 | "metadata": { 584 | "id": "GbzWKqZIpklk" 585 | }, 586 | "execution_count": null, 587 | "outputs": [] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "source": [ 592 | "# plotting strip plot with seaborn\n", 593 | "\n", 594 | "sns.swarmplot(x='Country', y='Age', data=df)" 595 | ], 596 | "metadata": { 597 | "id": "40BHiB3ImaYS" 598 | }, 599 | "execution_count": null, 600 | "outputs": [] 601 | }, 602 | { 603 | "cell_type": "code", 604 | "source": [ 605 | "sns.swarmplot(x='Country', y='Salary', data=df)" 606 | ], 607 | "metadata": { 608 | "id": "GH_-KgK0m3UR" 609 | }, 610 | "execution_count": null, 611 | "outputs": [] 612 | }, 613 | { 614 | "cell_type": "code", 615 | "source": [ 616 | "sns.swarmplot(x=\"Alcohol\", y=\"Malic_Acid\", data=df1)" 617 | ], 618 | "metadata": { 619 | "id": "RnK372krqrcB" 620 | }, 621 | "execution_count": null, 622 | "outputs": [] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "source": [ 627 | "sns.countplot(x ='Country', data = df)" 628 | ], 629 | "metadata": { 630 | "id": "MRIcGWYEs8MH" 631 | }, 632 | "execution_count": null, 633 | "outputs": [] 634 | }, 635 | { 636 | "cell_type": "code", 637 | "source": [ 638 | "sns.boxplot(x='Country', y='Age', data=df, hue='Purchased')" 639 | ], 640 | "metadata": { 641 | "id": "re9KMxmb1uRZ" 642 | }, 643 | "execution_count": null, 644 | "outputs": [] 645 | }, 646 | { 647 | "cell_type": "code", 648 | "source": [ 649 | "sns.violinplot(x='Salary', y='Country', data=df, hue='Purchased', split=True)" 650 | ], 651 | "metadata": { 652 | "id": "1rTJmKc62ZEp" 653 | }, 654 | "execution_count": null, 655 | "outputs": [] 656 | }, 657 | { 658 | "cell_type": "code", 659 | "source": [ 660 | "sns.stripplot(x='Salary', y='Country', data=df, hue='Purchased',jitter=True, dodge=True)" 661 | ], 662 | "metadata": { 663 | "id": "aYIbIvbR21kV" 664 | }, 665 | "execution_count": null, 666 | "outputs": [] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "source": [ 671 | "# PairGrid object with hue\n", 672 | "graph = sns.PairGrid(df, hue ='Purchased')\n", 673 | "# type of graph for diagonal\n", 674 | "graph = graph.map_diag(plt.hist)\n", 675 | "# type of graph for non-diagonal\n", 676 | "graph = graph.map_offdiag(plt.scatter)\n", 677 | "# to add legends\n", 678 | "graph = graph.add_legend()\n", 679 | "# to show\n", 680 | "plt.show()" 681 | ], 682 | "metadata": { 683 | "id": "LeGSamKW3RWu" 684 | }, 685 | "execution_count": null, 686 | "outputs": [] 687 | }, 688 | { 689 | "cell_type": "code", 690 | "source": [ 691 | "# PairGrid object with hue\n", 692 | "graph = sns.PairGrid(df1)\n", 693 | "# type of graph for non-diagonal(upper part)\n", 694 | "graph = graph.map_upper(sns.scatterplot)\n", 695 | "# type of graph for non-diagonal(lower part)\n", 696 | "graph = graph.map_lower(sns.kdeplot)\n", 697 | "# type of graph for diagonal\n", 698 | "graph = graph.map_diag(sns.kdeplot, lw = 2)\n", 699 | "# to show\n", 700 | "plt.show()" 701 | ], 702 | "metadata": { 703 | "id": "-AJIANrj4Qej" 704 | }, 705 | "execution_count": null, 706 | "outputs": [] 707 | }, 708 | { 709 | "cell_type": "markdown", 710 | "source": [ 711 | "## Theano library" 712 | ], 713 | "metadata": { 714 | "id": "RWFGXktCmV_F" 715 | } 716 | }, 717 | { 718 | "cell_type": "code", 719 | "source": [ 720 | "# Declaring variable\n", 721 | "a = tensor.dmatrix('a')\n", 722 | " \n", 723 | "# Sigmoid function\n", 724 | "sigmoid = 1 / (1 + tensor.exp(-a))\n", 725 | " \n", 726 | "# Now it takes matrix as parameters\n", 727 | "log = theano.function([a], sigmoid)\n", 728 | " \n", 729 | "# Calling function\n", 730 | "print(log([[0, 1], [-1, -2]]))" 731 | ], 732 | "metadata": { 733 | "id": "KAgtzFAamVZB" 734 | }, 735 | "execution_count": null, 736 | "outputs": [] 737 | } 738 | ] 739 | } -------------------------------------------------------------------------------- /GenAI/Gemini/SocialMediaPostDescriptionApp/.env.example: -------------------------------------------------------------------------------- 1 | API_KEY="" -------------------------------------------------------------------------------- /GenAI/Gemini/SocialMediaPostDescriptionApp/.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | venv/ -------------------------------------------------------------------------------- /GenAI/Gemini/SocialMediaPostDescriptionApp/README.md: -------------------------------------------------------------------------------- 1 | # AI_SocialM 2 | -------------------------------------------------------------------------------- /GenAI/Gemini/SocialMediaPostDescriptionApp/app.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | 3 | load_dotenv() # Load all the environment variables 4 | 5 | import streamlit as st 6 | import os 7 | import google.generativeai as genai 8 | from PIL import Image 9 | import io 10 | 11 | genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) 12 | 13 | ## Function to load Google Gemini Pro Vision API And get response 14 | 15 | def get_gemini_response(input, image, prompt): 16 | model = genai.GenerativeModel('gemini-pro-vision') 17 | response = model.generate_content([input, image[0], prompt]) 18 | return response.text 19 | 20 | def input_image_setup(uploaded_file): 21 | # Check if a file has been uploaded 22 | if uploaded_file is not None: 23 | # Open the uploaded image 24 | image = Image.open(uploaded_file) 25 | 26 | # Resize the image to 250x150 pixels 27 | image_resized = image.resize((250, 150)) 28 | 29 | # Convert resized image to bytes 30 | img_byte_array = io.BytesIO() 31 | image_resized.save(img_byte_array, format='JPEG') 32 | bytes_data = img_byte_array.getvalue() 33 | 34 | image_parts = [ 35 | { 36 | "mime_type": 'image/jpeg', # Assuming JPEG format after resizing 37 | "data": bytes_data 38 | } 39 | ] 40 | return image_parts 41 | else: 42 | raise FileNotFoundError("No file uploaded") 43 | 44 | ## Initialize our Streamlit app 45 | 46 | st.set_page_config(page_title="Gemini Social Media post description App") 47 | 48 | st.header("Gemini Social Media post description App") 49 | input_prompt = st.text_input("Input Prompt:", key="input") 50 | uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) 51 | image = "" 52 | if uploaded_file is not None: 53 | image = Image.open(uploaded_file) 54 | st.image(image, caption="Uploaded Image.", use_column_width=True) 55 | 56 | submit = st.button("Write the Description") 57 | 58 | input_prompt = """Determine Image Content: 59 | 60 | Look closely at the uploaded image. 61 | Identify and describe what is prominently featured in the image. Is it a person, animal, or something else? Provide as much detail as possible about the content. 62 | Area Detection: 63 | 64 | Consider the background and setting of the image. 65 | Describe the location or scene where the image was likely taken. Is it indoors or outdoors? If outdoors, is it in a natural or urban environment? Specify if it's a house, street, restaurant, shop, office, or another relevant area.""" 66 | 67 | ## If submit button is clicked 68 | 69 | if submit: 70 | image_data = input_image_setup(uploaded_file) 71 | response = get_gemini_response(input_prompt, image_data, input_prompt) 72 | st.subheader("The Response is") 73 | st.write(response) 74 | -------------------------------------------------------------------------------- /GenAI/Gemini/SocialMediaPostDescriptionApp/requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | google-generativeai 3 | python-dotenv 4 | langchain 5 | PyPDF2 6 | chromadb 7 | pdf2image 8 | faiss-cpu 9 | langchain_google_genai -------------------------------------------------------------------------------- /GenAI/OpenAI/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekramasif/Basic-Machine-Learning/71f028a0e66dda2f7ef5eff8909f81d9db61ba8f/GenAI/OpenAI/README.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Ekram Asif 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PDF/MACHINE LEARNING CHEATSHEET (w chen).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekramasif/Basic-Machine-Learning/71f028a0e66dda2f7ef5eff8909f81d9db61ba8f/PDF/MACHINE LEARNING CHEATSHEET (w chen).pdf -------------------------------------------------------------------------------- /PDF/Machine Learning Interview Cheat sheets (Anwar V0.1.0.3).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekramasif/Basic-Machine-Learning/71f028a0e66dda2f7ef5eff8909f81d9db61ba8f/PDF/Machine Learning Interview Cheat sheets (Anwar V0.1.0.3).pdf -------------------------------------------------------------------------------- /PDF/PythonCheatSheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekramasif/Basic-Machine-Learning/71f028a0e66dda2f7ef5eff8909f81d9db61ba8f/PDF/PythonCheatSheet.pdf -------------------------------------------------------------------------------- /PDF/Rules of Machine Learning- Best Practices for ML Engineering.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekramasif/Basic-Machine-Learning/71f028a0e66dda2f7ef5eff8909f81d9db61ba8f/PDF/Rules of Machine Learning- Best Practices for ML Engineering.pdf -------------------------------------------------------------------------------- /PDF/azure-machine-learning-algorithm-cheat-sheet-nov2019.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekramasif/Basic-Machine-Learning/71f028a0e66dda2f7ef5eff8909f81d9db61ba8f/PDF/azure-machine-learning-algorithm-cheat-sheet-nov2019.pdf -------------------------------------------------------------------------------- /PDF/plotly_cheat_sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekramasif/Basic-Machine-Learning/71f028a0e66dda2f7ef5eff8909f81d9db61ba8f/PDF/plotly_cheat_sheet.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |

Basic Machine Learning

3 |
4 | 5 | [![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/ekramasif/Basic-Machine-Learning/blob/main/LICENSE) 6 | 7 |

8 | 9 |

10 | 11 | > - This repository contains a topic-wise curated list of Machine Learning and Deep Learning tutorials, articles and other resources. 12 | 13 | 14 | 15 | 16 | ## Contents 17 | - [Introduction](#general) 18 | - [Interview Resources](#interview) 19 | - [Artificial Intelligence](#ai) 20 | - [Genetic Algorithms](#ga) 21 | - [Resources on Kaggle](#kaggle) 22 | - [Cheat Sheets](#cs) 23 | - [Classification](#classification) 24 | - [Linear Regression](#linear) 25 | - [Logistic Regression](#logistic) 26 | - [Model Validation using Resampling](#validation) 27 | - [Cross Validation](#cross) 28 | - [Deep Learning](#deep) 29 | - [Frameworks](#frame) 30 | - [Feed Forward Networks](#feed) 31 | - [Recurrent Neural Nets, LSTM, GRU](#rnn) 32 | - [Restricted Boltzmann Machine, DBNs](#rbm) 33 | - [Autoencoders](#auto) 34 | - [Convolutional Neural Nets](#cnn) 35 | - [Graph Representation Learning](#nrl) 36 | - [Natural Language Processing](#nlp) 37 | - [Word2Vec](#word2vec) 38 | - [Computer Vision](#vision) 39 | - [Support Vector Machine](#svm) 40 | - [Reinforcement Learning](#rl) 41 | - [Decision Trees](#dt) 42 | - [Random Forest / Bagging](#rf) 43 | - [Boosting](#gbm) 44 | - [Bayesian Machine Learning](#bayes) 45 | - [Semi Supervised Learning](#semi) 46 | - [Other Useful Tutorials](#other) 47 | 48 | 49 | 50 | ## Introduction 51 | 52 | - [Machine Learning Course by Andrew Ng (Stanford University)](https://www.coursera.org/learn/machine-learning) 53 | 54 | 55 | ----- 56 | Contributing 57 | ---- 58 | Have anything in mind that you think is awesome and would fit in this list? Feel free to send me a pull request! 59 | 60 | 61 | --------------------------------------------------------------------------------