├── .vscode └── settings.json ├── Basics └── Methods │ └── Methods.ipynb ├── Big O ├── .ipynb_checkpoints │ └── Big O-checkpoint.ipynb └── Big O.ipynb ├── ML algorithm ├── 1-Linear Regression │ ├── Linear Regression.ipynb │ ├── Project │ │ ├── .ipynb_checkpoints │ │ │ └── Linear Regression Project-checkpoint.ipynb │ │ ├── Ecommerce Customers │ │ └── Linear Regression Project.ipynb │ └── USA_Housing.csv ├── 10-NLP │ ├── .ipynb_checkpoints │ │ └── NLP-checkpoint.ipynb │ ├── NLP.ipynb │ └── smsspamcollection │ │ ├── SMSSpamCollection │ │ └── readme ├── 2-Logistic Regression │ ├── Logistic Regression.ipynb │ ├── Project │ │ ├── 02-Logistic Regression Project.ipynb │ │ └── advertising.csv │ ├── titanic_test.csv │ └── titanic_train.csv ├── 3-KNN │ ├── Classified Data │ ├── KNN.ipynb │ └── Project │ │ ├── 02-K Nearest Neighbors Project.ipynb │ │ └── KNN_Project_Data ├── 4-Decission & Random Tree │ ├── Decission & Random Tree.ipynb │ ├── Project │ │ ├── 02-Decision Trees and Random Forest Project.ipynb │ │ └── loan_data.csv │ └── kyphosis.csv ├── 5-SVM │ ├── .ipynb_checkpoints │ │ └── SVM-checkpoint.ipynb │ ├── Project │ │ ├── .ipynb_checkpoints │ │ │ └── 02-Support Vector Machines Project-checkpoint.ipynb │ │ └── 02-Support Vector Machines Project.ipynb │ └── SVM.ipynb ├── 6-K Means Clustering │ ├── K means Clustering.ipynb │ └── Project │ │ ├── 02-K Means Clustering Project-checkpoint.ipynb │ │ └── College_Data ├── 8-PrincipalComponentAnalysis │ ├── PCA.ipynb │ └── PCA.png └── 9-Recommender System │ ├── .ipynb_checkpoints │ └── Recommender System!!-checkpoint.ipynb │ ├── Movie_Id_Titles │ ├── Recommender System!!.ipynb │ ├── u.data │ └── u.item └── ML basics ├── 1_NUMPY.ipynb ├── 2_PANDAS.ipynb ├── 3_MatplotLib.ipynb ├── 4_Seaborn.ipynb ├── 5_Pandas_built_in_visualization_function.ipynb ├── Excel_Sample.xlsx ├── Name ├── SMALL PROJECTS ├── PANDAS │ ├── Ecommerce Purchases │ ├── Ecommerce Purchases Exercise -checkpoint.ipynb │ ├── SF Salaries Exercise.ipynb │ └── Salaries.csv └── capstone project │ ├── .ipynb_checkpoints │ └── 911 Calls Data Capstone Project-checkpoint.ipynb │ ├── 03-Finance Project.ipynb │ ├── 911 Calls Data Capstone Project.ipynb │ ├── 911.csv │ └── precipitation.html ├── df1 ├── df2 ├── df3 ├── example ├── group.png ├── multi_index_example └── my_picture.png /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.pythonPath": "/Users/ishikakesarwani/opt/anaconda3/bin/python" 3 | } -------------------------------------------------------------------------------- /Basics/Methods/Methods.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# METHODS" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | " Methods are just calls you can make off for an object that will effect the object or result in some manner" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "s=\" hello my name is ishika\"" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "**s.tab** \n", 31 | "will give a result of all the string objects that you can use!" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 5, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/plain": [ 42 | "' hello my name is ishika'" 43 | ] 44 | }, 45 | "execution_count": 5, 46 | "metadata": {}, 47 | "output_type": "execute_result" 48 | } 49 | ], 50 | "source": [ 51 | "s.lower()" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 7, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/plain": [ 62 | "' HELLO MY NAME IS ISHIKA'" 63 | ] 64 | }, 65 | "execution_count": 7, 66 | "metadata": {}, 67 | "output_type": "execute_result" 68 | } 69 | ], 70 | "source": [ 71 | "s.upper()" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 12, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "['hello', 'my', 'name', 'is', 'ishika']" 83 | ] 84 | }, 85 | "execution_count": 12, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "s.split()" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 15, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "['hello what are ', 'you doing?']" 103 | ] 104 | }, 105 | "execution_count": 15, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "tweet=\"hello what are #you doing?\"\n", 112 | "tweet.split(\"#\")\n" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 16, 118 | "metadata": { 119 | "scrolled": true 120 | }, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "text/plain": [ 125 | "'you doing?'" 126 | ] 127 | }, 128 | "execution_count": 16, 129 | "metadata": {}, 130 | "output_type": "execute_result" 131 | } 132 | ], 133 | "source": [ 134 | "tweet.split('#')[1]" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "# Some useful methods for a dictionary!" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 19, 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "data": { 151 | "text/plain": [ 152 | "{'k1': 1, 'k2': 2}" 153 | ] 154 | }, 155 | "execution_count": 19, 156 | "metadata": {}, 157 | "output_type": "execute_result" 158 | } 159 | ], 160 | "source": [ 161 | "d={'k1':1,'k2':2}\n", 162 | "d" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 20, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "data": { 172 | "text/plain": [ 173 | "dict_keys(['k1', 'k2'])" 174 | ] 175 | }, 176 | "execution_count": 20, 177 | "metadata": {}, 178 | "output_type": "execute_result" 179 | } 180 | ], 181 | "source": [ 182 | "d.keys()" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 21, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "data": { 192 | "text/plain": [ 193 | "dict_items([('k1', 1), ('k2', 2)])" 194 | ] 195 | }, 196 | "execution_count": 21, 197 | "metadata": {}, 198 | "output_type": "execute_result" 199 | } 200 | ], 201 | "source": [ 202 | "d.items()" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 22, 208 | "metadata": {}, 209 | "outputs": [ 210 | { 211 | "data": { 212 | "text/plain": [ 213 | "dict_values([1, 2])" 214 | ] 215 | }, 216 | "execution_count": 22, 217 | "metadata": {}, 218 | "output_type": "execute_result" 219 | } 220 | ], 221 | "source": [ 222 | "d.values()" 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "metadata": {}, 228 | "source": [ 229 | "# # Some useful methods for a List" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 23, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "list=[1,2,3]" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 24, 244 | "metadata": {}, 245 | "outputs": [ 246 | { 247 | "data": { 248 | "text/plain": [ 249 | "3" 250 | ] 251 | }, 252 | "execution_count": 24, 253 | "metadata": {}, 254 | "output_type": "execute_result" 255 | } 256 | ], 257 | "source": [ 258 | "list.pop()\n", 259 | "#pops the last item!" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 26, 265 | "metadata": {}, 266 | "outputs": [ 267 | { 268 | "data": { 269 | "text/plain": [ 270 | "[1, 2]" 271 | ] 272 | }, 273 | "execution_count": 26, 274 | "metadata": {}, 275 | "output_type": "execute_result" 276 | } 277 | ], 278 | "source": [ 279 | "list" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 32, 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [ 288 | "list=[1,2,3,4,5]\n", 289 | "item=list.pop()" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 33, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "data": { 299 | "text/plain": [ 300 | "5" 301 | ] 302 | }, 303 | "execution_count": 33, 304 | "metadata": {}, 305 | "output_type": "execute_result" 306 | } 307 | ], 308 | "source": [ 309 | "item\n", 310 | "#this will be re-assigned, with the number which was popped!" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 34, 316 | "metadata": {}, 317 | "outputs": [ 318 | { 319 | "data": { 320 | "text/plain": [ 321 | "[1, 2, 3, 4]" 322 | ] 323 | }, 324 | "execution_count": 34, 325 | "metadata": {}, 326 | "output_type": "execute_result" 327 | } 328 | ], 329 | "source": [ 330 | "list\n" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 35, 336 | "metadata": {}, 337 | "outputs": [ 338 | { 339 | "name": "stdout", 340 | "output_type": "stream", 341 | "text": [ 342 | "(1, 2)\n", 343 | "(3, 4)\n", 344 | "(5, 6)\n" 345 | ] 346 | } 347 | ], 348 | "source": [ 349 | "x=[(1,2),(3,4),(5,6)]\n", 350 | "for item in x:\n", 351 | " print(item)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 36, 357 | "metadata": {}, 358 | "outputs": [ 359 | { 360 | "name": "stdout", 361 | "output_type": "stream", 362 | "text": [ 363 | "1\n", 364 | "3\n", 365 | "5\n" 366 | ] 367 | } 368 | ], 369 | "source": [ 370 | "for (a,b) in x:\n", 371 | " print(a)" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [] 380 | } 381 | ], 382 | "metadata": { 383 | "kernelspec": { 384 | "display_name": "Python 3", 385 | "language": "python", 386 | "name": "python3" 387 | }, 388 | "language_info": { 389 | "codemirror_mode": { 390 | "name": "ipython", 391 | "version": 3 392 | }, 393 | "file_extension": ".py", 394 | "mimetype": "text/x-python", 395 | "name": "python", 396 | "nbconvert_exporter": "python", 397 | "pygments_lexer": "ipython3", 398 | "version": "3.8.5" 399 | } 400 | }, 401 | "nbformat": 4, 402 | "nbformat_minor": 4 403 | } 404 | -------------------------------------------------------------------------------- /ML algorithm/10-NLP/.ipynb_checkpoints/NLP-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 5 6 | } 7 | -------------------------------------------------------------------------------- /ML algorithm/10-NLP/NLP.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "3f61273a", 6 | "metadata": {}, 7 | "source": [ 8 | "# NLP!!" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "91d18f16", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import nltk" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "id": "6ced05ee", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "NLTK Downloader\n", 32 | "---------------------------------------------------------------------------\n", 33 | " d) Download l) List u) Update c) Config h) Help q) Quit\n", 34 | "---------------------------------------------------------------------------\n", 35 | "Downloader> d\n", 36 | "\n", 37 | "Download which package (l=list; x=cancel)?\n", 38 | " Identifier> stopwords\n" 39 | ] 40 | }, 41 | { 42 | "name": "stderr", 43 | "output_type": "stream", 44 | "text": [ 45 | " Downloading package stopwords to\n", 46 | " /Users/ishikakesarwani/nltk_data...\n", 47 | " Unzipping corpora/stopwords.zip.\n" 48 | ] 49 | }, 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "\n", 55 | "---------------------------------------------------------------------------\n", 56 | " d) Download l) List u) Update c) Config h) Help q) Quit\n", 57 | "---------------------------------------------------------------------------\n", 58 | "Downloader> x\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "nltk.download_shell()" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 3, 69 | "id": "c6221fef", 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "name": "stderr", 74 | "output_type": "stream", 75 | "text": [ 76 | "[nltk_data] Downloading package stopwords to\n", 77 | "[nltk_data] /Users/ishikakesarwani/nltk_data...\n", 78 | "[nltk_data] Package stopwords is already up-to-date!\n" 79 | ] 80 | }, 81 | { 82 | "data": { 83 | "text/plain": [ 84 | "True" 85 | ] 86 | }, 87 | "execution_count": 3, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [ 93 | "nltk.download('stopwords')" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "id": "8697dbd2", 99 | "metadata": {}, 100 | "source": [ 101 | "We'll be using a dataset from the [UCI datasets](https://archive.ics.uci.edu/ml/datasets/SMS+Spam+Collection)! " 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 4, 107 | "id": "56f36f92", 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "messages = [line.rstrip() for line in open('smsspamcollection/SMSSpamCollection')]" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 5, 117 | "id": "ae89d856", 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "name": "stdout", 122 | "output_type": "stream", 123 | "text": [ 124 | "5574\n" 125 | ] 126 | } 127 | ], 128 | "source": [ 129 | "print(len(messages))" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 14, 135 | "id": "9c0b17b3", 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "0 ham\tGo until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...\n", 143 | "\n", 144 | "\n", 145 | "1 ham\tOk lar... Joking wif u oni...\n", 146 | "\n", 147 | "\n", 148 | "2 spam\tFree entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's\n", 149 | "\n", 150 | "\n", 151 | "3 ham\tU dun say so early hor... U c already then say...\n", 152 | "\n", 153 | "\n", 154 | "4 ham\tNah I don't think he goes to usf, he lives around here though\n", 155 | "\n", 156 | "\n", 157 | "5 spam\tFreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you up for it still? Tb ok! XxX std chgs to send, £1.50 to rcv\n", 158 | "\n", 159 | "\n", 160 | "6 ham\tEven my brother is not like to speak with me. They treat me like aids patent.\n", 161 | "\n", 162 | "\n", 163 | "7 ham\tAs per your request 'Melle Melle (Oru Minnaminunginte Nurungu Vettam)' has been set as your callertune for all Callers. Press *9 to copy your friends Callertune\n", 164 | "\n", 165 | "\n", 166 | "8 spam\tWINNER!! As a valued network customer you have been selected to receivea £900 prize reward! To claim call 09061701461. Claim code KL341. Valid 12 hours only.\n", 167 | "\n", 168 | "\n", 169 | "9 spam\tHad your mobile 11 months or more? U R entitled to Update to the latest colour mobiles with camera for Free! Call The Mobile Update Co FREE on 08002986030\n", 170 | "\n", 171 | "\n" 172 | ] 173 | } 174 | ], 175 | "source": [ 176 | "for mess_no,message in enumerate(messages[:10]):\n", 177 | " print(mess_no,message)\n", 178 | " print('\\n')" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "id": "4145cbd5", 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "id": "8472401f", 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [] 196 | } 197 | ], 198 | "metadata": { 199 | "kernelspec": { 200 | "display_name": "Python 3", 201 | "language": "python", 202 | "name": "python3" 203 | }, 204 | "language_info": { 205 | "codemirror_mode": { 206 | "name": "ipython", 207 | "version": 3 208 | }, 209 | "file_extension": ".py", 210 | "mimetype": "text/x-python", 211 | "name": "python", 212 | "nbconvert_exporter": "python", 213 | "pygments_lexer": "ipython3", 214 | "version": "3.8.10" 215 | } 216 | }, 217 | "nbformat": 4, 218 | "nbformat_minor": 5 219 | } 220 | -------------------------------------------------------------------------------- /ML algorithm/10-NLP/smsspamcollection/readme: -------------------------------------------------------------------------------- 1 | SMS Spam Collection v.1 2 | ------------------------- 3 | 4 | 1. DESCRIPTION 5 | -------------- 6 | 7 | The SMS Spam Collection v.1 (hereafter the corpus) is a set of SMS tagged messages that have been collected for SMS Spam research. It contains one set of SMS messages in English of 5,574 messages, tagged acording being ham (legitimate) or spam. 8 | 9 | 1.1. Compilation 10 | ---------------- 11 | 12 | This corpus has been collected from free or free for research sources at the Web: 13 | 14 | - A collection of between 425 SMS spam messages extracted manually from the Grumbletext Web site. This is a UK forum in which cell phone users make public claims about SMS spam messages, most of them without reporting the very spam message received. The identification of the text of spam messages in the claims is a very hard and time-consuming task, and it involved carefully scanning hundreds of web pages. The Grumbletext Web site is: http://www.grumbletext.co.uk/ 15 | - A list of 450 SMS ham messages collected from Caroline Tag's PhD Theses available at http://etheses.bham.ac.uk/253/1/Tagg09PhD.pdf 16 | - A subset of 3,375 SMS ham messages of the NUS SMS Corpus (NSC), which is a corpus of about 10,000 legitimate messages collected for research at the Department of Computer Science at the National University of Singapore. The messages largely originate from Singaporeans and mostly from students attending the University. These messages were collected from volunteers who were made aware that their contributions were going to be made publicly available. The NUS SMS Corpus is avalaible at: http://www.comp.nus.edu.sg/~rpnlpir/downloads/corpora/smsCorpus/ 17 | - The amount of 1,002 SMS ham messages and 322 spam messages extracted from the SMS Spam Corpus v.0.1 Big created by Jos� Mar�a G�mez Hidalgo and public available at: http://www.esp.uem.es/jmgomez/smsspamcorpus/ 18 | 19 | 20 | 1.2. Statistics 21 | --------------- 22 | 23 | There is one collection: 24 | 25 | - The SMS Spam Collection v.1 (text file: smsspamcollection) has a total of 4,827 SMS legitimate messages (86.6%) and a total of 747 (13.4%) spam messages. 26 | 27 | 28 | 1.3. Format 29 | ----------- 30 | 31 | The files contain one message per line. Each line is composed by two columns: one with label (ham or spam) and other with the raw text. Here are some examples: 32 | 33 | ham What you doing?how are you? 34 | ham Ok lar... Joking wif u oni... 35 | ham dun say so early hor... U c already then say... 36 | ham MY NO. IN LUTON 0125698789 RING ME IF UR AROUND! H* 37 | ham Siva is in hostel aha:-. 38 | ham Cos i was out shopping wif darren jus now n i called him 2 ask wat present he wan lor. Then he started guessing who i was wif n he finally guessed darren lor. 39 | spam FreeMsg: Txt: CALL to No: 86888 & claim your reward of 3 hours talk time to use from your phone now! ubscribe6GBP/ mnth inc 3hrs 16 stop?txtStop 40 | spam Sunshine Quiz! Win a super Sony DVD recorder if you canname the capital of Australia? Text MQUIZ to 82277. B 41 | spam URGENT! Your Mobile No 07808726822 was awarded a L2,000 Bonus Caller Prize on 02/09/03! This is our 2nd attempt to contact YOU! Call 0871-872-9758 BOX95QU 42 | 43 | Note: messages are not chronologically sorted. 44 | 45 | 46 | 2. USAGE 47 | -------- 48 | 49 | We offer a comprehensive study of this corpus in the following paper that is under review. This work presents a number of statistics, studies and baseline results for several machine learning methods. 50 | 51 | [1] Almeida, T.A., G�mez Hidalgo, J.M., Yamakami, A. Contributions to the study of SMS Spam Filtering: New Collection and Results. Proceedings of the 2011 ACM Symposium on Document Engineering (ACM DOCENG'11), Mountain View, CA, USA, 2011. (Under review) 52 | 53 | 54 | 3. ABOUT 55 | -------- 56 | 57 | The corpus has been collected by Tiago Agostinho de Almeida (http://www.dt.fee.unicamp.br/~tiago) and Jos� Mar�a G�mez Hidalgo (http://www.esp.uem.es/jmgomez). 58 | 59 | We would like to thank Dr. Min-Yen Kan (http://www.comp.nus.edu.sg/~kanmy/) and his team for making the NUS SMS Corpus available. See: http://www.comp.nus.edu.sg/~rpnlpir/downloads/corpora/smsCorpus/. He is currently collecting a bigger SMS corpus at: http://wing.comp.nus.edu.sg:8080/SMSCorpus/ 60 | 61 | 4. LICENSE/DISCLAIMER 62 | --------------------- 63 | 64 | We would appreciate if: 65 | 66 | - In case you find this corpus useful, please make a reference to previous paper and the web page: http://www.dt.fee.unicamp.br/~tiago/smsspamcollection/ in your papers, research, etc. 67 | - Send us a message to tiago@dt.fee.unicamp.br in case you make use of the corpus. 68 | 69 | The SMS Spam Collection v.1 is provided for free and with no limitations excepting: 70 | 71 | 1. Tiago Agostinho de Almeida and Jos� Mar�a G�mez Hidalgo hold the copyrigth (c) for the SMS Spam Collection v.1. 72 | 73 | 2. No Warranty/Use At Your Risk. THE CORPUS IS MADE AT NO CHARGE. ACCORDINGLY, THE CORPUS IS PROVIDED `AS IS,' WITHOUT WARRANTY OF ANY KIND, INCLUDING WITHOUT LIMITATION THE WARRANTIES THAT THEY ARE MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. YOU ARE SOLELY RESPONSIBLE FOR YOUR USE, DISTRIBUTION, MODIFICATION, REPRODUCTION AND PUBLICATION OF THE CORPUS AND ANY DERIVATIVE WORKS THEREOF BY YOU AND ANY OF YOUR SUBLICENSEES (COLLECTIVELY, `YOUR CORPUS USE'). THE ENTIRE RISK AS TO YOUR CORPUS USE IS BORNE BY YOU. YOU AGREE TO INDEMNIFY AND HOLD THE COPYRIGHT HOLDERS, AND THEIR AFFILIATES HARMLESS FROM ANY CLAIMS ARISING FROM OR RELATING TO YOUR CORPUS USE. 74 | 75 | 3. Limitation of Liability. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR THEIR AFFILIATES, OR THE CORPUS CONTRIBUTING EDITORS, BE LIABLE FOR ANY INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF ADVISED OF THE POSSIBILITY THEREOF, AND REGARDLESS OF WHETHER ANY CLAIM IS BASED UPON ANY CONTRACT, TORT OR OTHER LEGAL OR EQUITABLE THEORY, RELATING OR ARISING FROM THE CORPUS, YOUR CORPUS USE OR THIS LICENSE AGREEMENT. 76 | -------------------------------------------------------------------------------- /ML algorithm/2-Logistic Regression/titanic_test.csv: -------------------------------------------------------------------------------- 1 | PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q 3 | 893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47,1,0,363272,7,,S 4 | 894,2,"Myles, Mr. Thomas Francis",male,62,0,0,240276,9.6875,,Q 5 | 895,3,"Wirz, Mr. Albert",male,27,0,0,315154,8.6625,,S 6 | 896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22,1,1,3101298,12.2875,,S 7 | 897,3,"Svensson, Mr. Johan Cervin",male,14,0,0,7538,9.225,,S 8 | 898,3,"Connolly, Miss. Kate",female,30,0,0,330972,7.6292,,Q 9 | 899,2,"Caldwell, Mr. Albert Francis",male,26,1,1,248738,29,,S 10 | 900,3,"Abrahim, Mrs. Joseph (Sophie Halaut Easu)",female,18,0,0,2657,7.2292,,C 11 | 901,3,"Davies, Mr. John Samuel",male,21,2,0,A/4 48871,24.15,,S 12 | 902,3,"Ilieff, Mr. Ylio",male,,0,0,349220,7.8958,,S 13 | 903,1,"Jones, Mr. Charles Cresson",male,46,0,0,694,26,,S 14 | 904,1,"Snyder, Mrs. John Pillsbury (Nelle Stevenson)",female,23,1,0,21228,82.2667,B45,S 15 | 905,2,"Howard, Mr. Benjamin",male,63,1,0,24065,26,,S 16 | 906,1,"Chaffee, Mrs. Herbert Fuller (Carrie Constance Toogood)",female,47,1,0,W.E.P. 5734,61.175,E31,S 17 | 907,2,"del Carlo, Mrs. Sebastiano (Argenia Genovesi)",female,24,1,0,SC/PARIS 2167,27.7208,,C 18 | 908,2,"Keane, Mr. Daniel",male,35,0,0,233734,12.35,,Q 19 | 909,3,"Assaf, Mr. Gerios",male,21,0,0,2692,7.225,,C 20 | 910,3,"Ilmakangas, Miss. Ida Livija",female,27,1,0,STON/O2. 3101270,7.925,,S 21 | 911,3,"Assaf Khalil, Mrs. Mariana (Miriam"")""",female,45,0,0,2696,7.225,,C 22 | 912,1,"Rothschild, Mr. Martin",male,55,1,0,PC 17603,59.4,,C 23 | 913,3,"Olsen, Master. Artur Karl",male,9,0,1,C 17368,3.1708,,S 24 | 914,1,"Flegenheim, Mrs. Alfred (Antoinette)",female,,0,0,PC 17598,31.6833,,S 25 | 915,1,"Williams, Mr. Richard Norris II",male,21,0,1,PC 17597,61.3792,,C 26 | 916,1,"Ryerson, Mrs. Arthur Larned (Emily Maria Borie)",female,48,1,3,PC 17608,262.375,B57 B59 B63 B66,C 27 | 917,3,"Robins, Mr. Alexander A",male,50,1,0,A/5. 3337,14.5,,S 28 | 918,1,"Ostby, Miss. Helene Ragnhild",female,22,0,1,113509,61.9792,B36,C 29 | 919,3,"Daher, Mr. Shedid",male,22.5,0,0,2698,7.225,,C 30 | 920,1,"Brady, Mr. John Bertram",male,41,0,0,113054,30.5,A21,S 31 | 921,3,"Samaan, Mr. Elias",male,,2,0,2662,21.6792,,C 32 | 922,2,"Louch, Mr. Charles Alexander",male,50,1,0,SC/AH 3085,26,,S 33 | 923,2,"Jefferys, Mr. Clifford Thomas",male,24,2,0,C.A. 31029,31.5,,S 34 | 924,3,"Dean, Mrs. Bertram (Eva Georgetta Light)",female,33,1,2,C.A. 2315,20.575,,S 35 | 925,3,"Johnston, Mrs. Andrew G (Elizabeth Lily"" Watson)""",female,,1,2,W./C. 6607,23.45,,S 36 | 926,1,"Mock, Mr. Philipp Edmund",male,30,1,0,13236,57.75,C78,C 37 | 927,3,"Katavelas, Mr. Vassilios (Catavelas Vassilios"")""",male,18.5,0,0,2682,7.2292,,C 38 | 928,3,"Roth, Miss. Sarah A",female,,0,0,342712,8.05,,S 39 | 929,3,"Cacic, Miss. Manda",female,21,0,0,315087,8.6625,,S 40 | 930,3,"Sap, Mr. Julius",male,25,0,0,345768,9.5,,S 41 | 931,3,"Hee, Mr. Ling",male,,0,0,1601,56.4958,,S 42 | 932,3,"Karun, Mr. Franz",male,39,0,1,349256,13.4167,,C 43 | 933,1,"Franklin, Mr. Thomas Parham",male,,0,0,113778,26.55,D34,S 44 | 934,3,"Goldsmith, Mr. Nathan",male,41,0,0,SOTON/O.Q. 3101263,7.85,,S 45 | 935,2,"Corbett, Mrs. Walter H (Irene Colvin)",female,30,0,0,237249,13,,S 46 | 936,1,"Kimball, Mrs. Edwin Nelson Jr (Gertrude Parsons)",female,45,1,0,11753,52.5542,D19,S 47 | 937,3,"Peltomaki, Mr. Nikolai Johannes",male,25,0,0,STON/O 2. 3101291,7.925,,S 48 | 938,1,"Chevre, Mr. Paul Romaine",male,45,0,0,PC 17594,29.7,A9,C 49 | 939,3,"Shaughnessy, Mr. Patrick",male,,0,0,370374,7.75,,Q 50 | 940,1,"Bucknell, Mrs. William Robert (Emma Eliza Ward)",female,60,0,0,11813,76.2917,D15,C 51 | 941,3,"Coutts, Mrs. William (Winnie Minnie"" Treanor)""",female,36,0,2,C.A. 37671,15.9,,S 52 | 942,1,"Smith, Mr. Lucien Philip",male,24,1,0,13695,60,C31,S 53 | 943,2,"Pulbaum, Mr. Franz",male,27,0,0,SC/PARIS 2168,15.0333,,C 54 | 944,2,"Hocking, Miss. Ellen Nellie""""",female,20,2,1,29105,23,,S 55 | 945,1,"Fortune, Miss. Ethel Flora",female,28,3,2,19950,263,C23 C25 C27,S 56 | 946,2,"Mangiavacchi, Mr. Serafino Emilio",male,,0,0,SC/A.3 2861,15.5792,,C 57 | 947,3,"Rice, Master. Albert",male,10,4,1,382652,29.125,,Q 58 | 948,3,"Cor, Mr. Bartol",male,35,0,0,349230,7.8958,,S 59 | 949,3,"Abelseth, Mr. Olaus Jorgensen",male,25,0,0,348122,7.65,F G63,S 60 | 950,3,"Davison, Mr. Thomas Henry",male,,1,0,386525,16.1,,S 61 | 951,1,"Chaudanson, Miss. Victorine",female,36,0,0,PC 17608,262.375,B61,C 62 | 952,3,"Dika, Mr. Mirko",male,17,0,0,349232,7.8958,,S 63 | 953,2,"McCrae, Mr. Arthur Gordon",male,32,0,0,237216,13.5,,S 64 | 954,3,"Bjorklund, Mr. Ernst Herbert",male,18,0,0,347090,7.75,,S 65 | 955,3,"Bradley, Miss. Bridget Delia",female,22,0,0,334914,7.725,,Q 66 | 956,1,"Ryerson, Master. John Borie",male,13,2,2,PC 17608,262.375,B57 B59 B63 B66,C 67 | 957,2,"Corey, Mrs. Percy C (Mary Phyllis Elizabeth Miller)",female,,0,0,F.C.C. 13534,21,,S 68 | 958,3,"Burns, Miss. Mary Delia",female,18,0,0,330963,7.8792,,Q 69 | 959,1,"Moore, Mr. Clarence Bloomfield",male,47,0,0,113796,42.4,,S 70 | 960,1,"Tucker, Mr. Gilbert Milligan Jr",male,31,0,0,2543,28.5375,C53,C 71 | 961,1,"Fortune, Mrs. Mark (Mary McDougald)",female,60,1,4,19950,263,C23 C25 C27,S 72 | 962,3,"Mulvihill, Miss. Bertha E",female,24,0,0,382653,7.75,,Q 73 | 963,3,"Minkoff, Mr. Lazar",male,21,0,0,349211,7.8958,,S 74 | 964,3,"Nieminen, Miss. Manta Josefina",female,29,0,0,3101297,7.925,,S 75 | 965,1,"Ovies y Rodriguez, Mr. Servando",male,28.5,0,0,PC 17562,27.7208,D43,C 76 | 966,1,"Geiger, Miss. Amalie",female,35,0,0,113503,211.5,C130,C 77 | 967,1,"Keeping, Mr. Edwin",male,32.5,0,0,113503,211.5,C132,C 78 | 968,3,"Miles, Mr. Frank",male,,0,0,359306,8.05,,S 79 | 969,1,"Cornell, Mrs. Robert Clifford (Malvina Helen Lamson)",female,55,2,0,11770,25.7,C101,S 80 | 970,2,"Aldworth, Mr. Charles Augustus",male,30,0,0,248744,13,,S 81 | 971,3,"Doyle, Miss. Elizabeth",female,24,0,0,368702,7.75,,Q 82 | 972,3,"Boulos, Master. Akar",male,6,1,1,2678,15.2458,,C 83 | 973,1,"Straus, Mr. Isidor",male,67,1,0,PC 17483,221.7792,C55 C57,S 84 | 974,1,"Case, Mr. Howard Brown",male,49,0,0,19924,26,,S 85 | 975,3,"Demetri, Mr. Marinko",male,,0,0,349238,7.8958,,S 86 | 976,2,"Lamb, Mr. John Joseph",male,,0,0,240261,10.7083,,Q 87 | 977,3,"Khalil, Mr. Betros",male,,1,0,2660,14.4542,,C 88 | 978,3,"Barry, Miss. Julia",female,27,0,0,330844,7.8792,,Q 89 | 979,3,"Badman, Miss. Emily Louisa",female,18,0,0,A/4 31416,8.05,,S 90 | 980,3,"O'Donoghue, Ms. Bridget",female,,0,0,364856,7.75,,Q 91 | 981,2,"Wells, Master. Ralph Lester",male,2,1,1,29103,23,,S 92 | 982,3,"Dyker, Mrs. Adolf Fredrik (Anna Elisabeth Judith Andersson)",female,22,1,0,347072,13.9,,S 93 | 983,3,"Pedersen, Mr. Olaf",male,,0,0,345498,7.775,,S 94 | 984,1,"Davidson, Mrs. Thornton (Orian Hays)",female,27,1,2,F.C. 12750,52,B71,S 95 | 985,3,"Guest, Mr. Robert",male,,0,0,376563,8.05,,S 96 | 986,1,"Birnbaum, Mr. Jakob",male,25,0,0,13905,26,,C 97 | 987,3,"Tenglin, Mr. Gunnar Isidor",male,25,0,0,350033,7.7958,,S 98 | 988,1,"Cavendish, Mrs. Tyrell William (Julia Florence Siegel)",female,76,1,0,19877,78.85,C46,S 99 | 989,3,"Makinen, Mr. Kalle Edvard",male,29,0,0,STON/O 2. 3101268,7.925,,S 100 | 990,3,"Braf, Miss. Elin Ester Maria",female,20,0,0,347471,7.8542,,S 101 | 991,3,"Nancarrow, Mr. William Henry",male,33,0,0,A./5. 3338,8.05,,S 102 | 992,1,"Stengel, Mrs. Charles Emil Henry (Annie May Morris)",female,43,1,0,11778,55.4417,C116,C 103 | 993,2,"Weisz, Mr. Leopold",male,27,1,0,228414,26,,S 104 | 994,3,"Foley, Mr. William",male,,0,0,365235,7.75,,Q 105 | 995,3,"Johansson Palmquist, Mr. Oskar Leander",male,26,0,0,347070,7.775,,S 106 | 996,3,"Thomas, Mrs. Alexander (Thamine Thelma"")""",female,16,1,1,2625,8.5167,,C 107 | 997,3,"Holthen, Mr. Johan Martin",male,28,0,0,C 4001,22.525,,S 108 | 998,3,"Buckley, Mr. Daniel",male,21,0,0,330920,7.8208,,Q 109 | 999,3,"Ryan, Mr. Edward",male,,0,0,383162,7.75,,Q 110 | 1000,3,"Willer, Mr. Aaron (Abi Weller"")""",male,,0,0,3410,8.7125,,S 111 | 1001,2,"Swane, Mr. George",male,18.5,0,0,248734,13,F,S 112 | 1002,2,"Stanton, Mr. Samuel Ward",male,41,0,0,237734,15.0458,,C 113 | 1003,3,"Shine, Miss. Ellen Natalia",female,,0,0,330968,7.7792,,Q 114 | 1004,1,"Evans, Miss. Edith Corse",female,36,0,0,PC 17531,31.6792,A29,C 115 | 1005,3,"Buckley, Miss. Katherine",female,18.5,0,0,329944,7.2833,,Q 116 | 1006,1,"Straus, Mrs. Isidor (Rosalie Ida Blun)",female,63,1,0,PC 17483,221.7792,C55 C57,S 117 | 1007,3,"Chronopoulos, Mr. Demetrios",male,18,1,0,2680,14.4542,,C 118 | 1008,3,"Thomas, Mr. John",male,,0,0,2681,6.4375,,C 119 | 1009,3,"Sandstrom, Miss. Beatrice Irene",female,1,1,1,PP 9549,16.7,G6,S 120 | 1010,1,"Beattie, Mr. Thomson",male,36,0,0,13050,75.2417,C6,C 121 | 1011,2,"Chapman, Mrs. John Henry (Sara Elizabeth Lawry)",female,29,1,0,SC/AH 29037,26,,S 122 | 1012,2,"Watt, Miss. Bertha J",female,12,0,0,C.A. 33595,15.75,,S 123 | 1013,3,"Kiernan, Mr. John",male,,1,0,367227,7.75,,Q 124 | 1014,1,"Schabert, Mrs. Paul (Emma Mock)",female,35,1,0,13236,57.75,C28,C 125 | 1015,3,"Carver, Mr. Alfred John",male,28,0,0,392095,7.25,,S 126 | 1016,3,"Kennedy, Mr. John",male,,0,0,368783,7.75,,Q 127 | 1017,3,"Cribb, Miss. Laura Alice",female,17,0,1,371362,16.1,,S 128 | 1018,3,"Brobeck, Mr. Karl Rudolf",male,22,0,0,350045,7.7958,,S 129 | 1019,3,"McCoy, Miss. Alicia",female,,2,0,367226,23.25,,Q 130 | 1020,2,"Bowenur, Mr. Solomon",male,42,0,0,211535,13,,S 131 | 1021,3,"Petersen, Mr. Marius",male,24,0,0,342441,8.05,,S 132 | 1022,3,"Spinner, Mr. Henry John",male,32,0,0,STON/OQ. 369943,8.05,,S 133 | 1023,1,"Gracie, Col. Archibald IV",male,53,0,0,113780,28.5,C51,C 134 | 1024,3,"Lefebre, Mrs. Frank (Frances)",female,,0,4,4133,25.4667,,S 135 | 1025,3,"Thomas, Mr. Charles P",male,,1,0,2621,6.4375,,C 136 | 1026,3,"Dintcheff, Mr. Valtcho",male,43,0,0,349226,7.8958,,S 137 | 1027,3,"Carlsson, Mr. Carl Robert",male,24,0,0,350409,7.8542,,S 138 | 1028,3,"Zakarian, Mr. Mapriededer",male,26.5,0,0,2656,7.225,,C 139 | 1029,2,"Schmidt, Mr. August",male,26,0,0,248659,13,,S 140 | 1030,3,"Drapkin, Miss. Jennie",female,23,0,0,SOTON/OQ 392083,8.05,,S 141 | 1031,3,"Goodwin, Mr. Charles Frederick",male,40,1,6,CA 2144,46.9,,S 142 | 1032,3,"Goodwin, Miss. Jessie Allis",female,10,5,2,CA 2144,46.9,,S 143 | 1033,1,"Daniels, Miss. Sarah",female,33,0,0,113781,151.55,,S 144 | 1034,1,"Ryerson, Mr. Arthur Larned",male,61,1,3,PC 17608,262.375,B57 B59 B63 B66,C 145 | 1035,2,"Beauchamp, Mr. Henry James",male,28,0,0,244358,26,,S 146 | 1036,1,"Lindeberg-Lind, Mr. Erik Gustaf (Mr Edward Lingrey"")""",male,42,0,0,17475,26.55,,S 147 | 1037,3,"Vander Planke, Mr. Julius",male,31,3,0,345763,18,,S 148 | 1038,1,"Hilliard, Mr. Herbert Henry",male,,0,0,17463,51.8625,E46,S 149 | 1039,3,"Davies, Mr. Evan",male,22,0,0,SC/A4 23568,8.05,,S 150 | 1040,1,"Crafton, Mr. John Bertram",male,,0,0,113791,26.55,,S 151 | 1041,2,"Lahtinen, Rev. William",male,30,1,1,250651,26,,S 152 | 1042,1,"Earnshaw, Mrs. Boulton (Olive Potter)",female,23,0,1,11767,83.1583,C54,C 153 | 1043,3,"Matinoff, Mr. Nicola",male,,0,0,349255,7.8958,,C 154 | 1044,3,"Storey, Mr. Thomas",male,60.5,0,0,3701,,,S 155 | 1045,3,"Klasen, Mrs. (Hulda Kristina Eugenia Lofqvist)",female,36,0,2,350405,12.1833,,S 156 | 1046,3,"Asplund, Master. Filip Oscar",male,13,4,2,347077,31.3875,,S 157 | 1047,3,"Duquemin, Mr. Joseph",male,24,0,0,S.O./P.P. 752,7.55,,S 158 | 1048,1,"Bird, Miss. Ellen",female,29,0,0,PC 17483,221.7792,C97,S 159 | 1049,3,"Lundin, Miss. Olga Elida",female,23,0,0,347469,7.8542,,S 160 | 1050,1,"Borebank, Mr. John James",male,42,0,0,110489,26.55,D22,S 161 | 1051,3,"Peacock, Mrs. Benjamin (Edith Nile)",female,26,0,2,SOTON/O.Q. 3101315,13.775,,S 162 | 1052,3,"Smyth, Miss. Julia",female,,0,0,335432,7.7333,,Q 163 | 1053,3,"Touma, Master. Georges Youssef",male,7,1,1,2650,15.2458,,C 164 | 1054,2,"Wright, Miss. Marion",female,26,0,0,220844,13.5,,S 165 | 1055,3,"Pearce, Mr. Ernest",male,,0,0,343271,7,,S 166 | 1056,2,"Peruschitz, Rev. Joseph Maria",male,41,0,0,237393,13,,S 167 | 1057,3,"Kink-Heilmann, Mrs. Anton (Luise Heilmann)",female,26,1,1,315153,22.025,,S 168 | 1058,1,"Brandeis, Mr. Emil",male,48,0,0,PC 17591,50.4958,B10,C 169 | 1059,3,"Ford, Mr. Edward Watson",male,18,2,2,W./C. 6608,34.375,,S 170 | 1060,1,"Cassebeer, Mrs. Henry Arthur Jr (Eleanor Genevieve Fosdick)",female,,0,0,17770,27.7208,,C 171 | 1061,3,"Hellstrom, Miss. Hilda Maria",female,22,0,0,7548,8.9625,,S 172 | 1062,3,"Lithman, Mr. Simon",male,,0,0,S.O./P.P. 251,7.55,,S 173 | 1063,3,"Zakarian, Mr. Ortin",male,27,0,0,2670,7.225,,C 174 | 1064,3,"Dyker, Mr. Adolf Fredrik",male,23,1,0,347072,13.9,,S 175 | 1065,3,"Torfa, Mr. Assad",male,,0,0,2673,7.2292,,C 176 | 1066,3,"Asplund, Mr. Carl Oscar Vilhelm Gustafsson",male,40,1,5,347077,31.3875,,S 177 | 1067,2,"Brown, Miss. Edith Eileen",female,15,0,2,29750,39,,S 178 | 1068,2,"Sincock, Miss. Maude",female,20,0,0,C.A. 33112,36.75,,S 179 | 1069,1,"Stengel, Mr. Charles Emil Henry",male,54,1,0,11778,55.4417,C116,C 180 | 1070,2,"Becker, Mrs. Allen Oliver (Nellie E Baumgardner)",female,36,0,3,230136,39,F4,S 181 | 1071,1,"Compton, Mrs. Alexander Taylor (Mary Eliza Ingersoll)",female,64,0,2,PC 17756,83.1583,E45,C 182 | 1072,2,"McCrie, Mr. James Matthew",male,30,0,0,233478,13,,S 183 | 1073,1,"Compton, Mr. Alexander Taylor Jr",male,37,1,1,PC 17756,83.1583,E52,C 184 | 1074,1,"Marvin, Mrs. Daniel Warner (Mary Graham Carmichael Farquarson)",female,18,1,0,113773,53.1,D30,S 185 | 1075,3,"Lane, Mr. Patrick",male,,0,0,7935,7.75,,Q 186 | 1076,1,"Douglas, Mrs. Frederick Charles (Mary Helene Baxter)",female,27,1,1,PC 17558,247.5208,B58 B60,C 187 | 1077,2,"Maybery, Mr. Frank Hubert",male,40,0,0,239059,16,,S 188 | 1078,2,"Phillips, Miss. Alice Frances Louisa",female,21,0,1,S.O./P.P. 2,21,,S 189 | 1079,3,"Davies, Mr. Joseph",male,17,2,0,A/4 48873,8.05,,S 190 | 1080,3,"Sage, Miss. Ada",female,,8,2,CA. 2343,69.55,,S 191 | 1081,2,"Veal, Mr. James",male,40,0,0,28221,13,,S 192 | 1082,2,"Angle, Mr. William A",male,34,1,0,226875,26,,S 193 | 1083,1,"Salomon, Mr. Abraham L",male,,0,0,111163,26,,S 194 | 1084,3,"van Billiard, Master. Walter John",male,11.5,1,1,A/5. 851,14.5,,S 195 | 1085,2,"Lingane, Mr. John",male,61,0,0,235509,12.35,,Q 196 | 1086,2,"Drew, Master. Marshall Brines",male,8,0,2,28220,32.5,,S 197 | 1087,3,"Karlsson, Mr. Julius Konrad Eugen",male,33,0,0,347465,7.8542,,S 198 | 1088,1,"Spedden, Master. Robert Douglas",male,6,0,2,16966,134.5,E34,C 199 | 1089,3,"Nilsson, Miss. Berta Olivia",female,18,0,0,347066,7.775,,S 200 | 1090,2,"Baimbrigge, Mr. Charles Robert",male,23,0,0,C.A. 31030,10.5,,S 201 | 1091,3,"Rasmussen, Mrs. (Lena Jacobsen Solvang)",female,,0,0,65305,8.1125,,S 202 | 1092,3,"Murphy, Miss. Nora",female,,0,0,36568,15.5,,Q 203 | 1093,3,"Danbom, Master. Gilbert Sigvard Emanuel",male,0.33,0,2,347080,14.4,,S 204 | 1094,1,"Astor, Col. John Jacob",male,47,1,0,PC 17757,227.525,C62 C64,C 205 | 1095,2,"Quick, Miss. Winifred Vera",female,8,1,1,26360,26,,S 206 | 1096,2,"Andrew, Mr. Frank Thomas",male,25,0,0,C.A. 34050,10.5,,S 207 | 1097,1,"Omont, Mr. Alfred Fernand",male,,0,0,F.C. 12998,25.7417,,C 208 | 1098,3,"McGowan, Miss. Katherine",female,35,0,0,9232,7.75,,Q 209 | 1099,2,"Collett, Mr. Sidney C Stuart",male,24,0,0,28034,10.5,,S 210 | 1100,1,"Rosenbaum, Miss. Edith Louise",female,33,0,0,PC 17613,27.7208,A11,C 211 | 1101,3,"Delalic, Mr. Redjo",male,25,0,0,349250,7.8958,,S 212 | 1102,3,"Andersen, Mr. Albert Karvin",male,32,0,0,C 4001,22.525,,S 213 | 1103,3,"Finoli, Mr. Luigi",male,,0,0,SOTON/O.Q. 3101308,7.05,,S 214 | 1104,2,"Deacon, Mr. Percy William",male,17,0,0,S.O.C. 14879,73.5,,S 215 | 1105,2,"Howard, Mrs. Benjamin (Ellen Truelove Arman)",female,60,1,0,24065,26,,S 216 | 1106,3,"Andersson, Miss. Ida Augusta Margareta",female,38,4,2,347091,7.775,,S 217 | 1107,1,"Head, Mr. Christopher",male,42,0,0,113038,42.5,B11,S 218 | 1108,3,"Mahon, Miss. Bridget Delia",female,,0,0,330924,7.8792,,Q 219 | 1109,1,"Wick, Mr. George Dennick",male,57,1,1,36928,164.8667,,S 220 | 1110,1,"Widener, Mrs. George Dunton (Eleanor Elkins)",female,50,1,1,113503,211.5,C80,C 221 | 1111,3,"Thomson, Mr. Alexander Morrison",male,,0,0,32302,8.05,,S 222 | 1112,2,"Duran y More, Miss. Florentina",female,30,1,0,SC/PARIS 2148,13.8583,,C 223 | 1113,3,"Reynolds, Mr. Harold J",male,21,0,0,342684,8.05,,S 224 | 1114,2,"Cook, Mrs. (Selena Rogers)",female,22,0,0,W./C. 14266,10.5,F33,S 225 | 1115,3,"Karlsson, Mr. Einar Gervasius",male,21,0,0,350053,7.7958,,S 226 | 1116,1,"Candee, Mrs. Edward (Helen Churchill Hungerford)",female,53,0,0,PC 17606,27.4458,,C 227 | 1117,3,"Moubarek, Mrs. George (Omine Amenia"" Alexander)""",female,,0,2,2661,15.2458,,C 228 | 1118,3,"Asplund, Mr. Johan Charles",male,23,0,0,350054,7.7958,,S 229 | 1119,3,"McNeill, Miss. Bridget",female,,0,0,370368,7.75,,Q 230 | 1120,3,"Everett, Mr. Thomas James",male,40.5,0,0,C.A. 6212,15.1,,S 231 | 1121,2,"Hocking, Mr. Samuel James Metcalfe",male,36,0,0,242963,13,,S 232 | 1122,2,"Sweet, Mr. George Frederick",male,14,0,0,220845,65,,S 233 | 1123,1,"Willard, Miss. Constance",female,21,0,0,113795,26.55,,S 234 | 1124,3,"Wiklund, Mr. Karl Johan",male,21,1,0,3101266,6.4958,,S 235 | 1125,3,"Linehan, Mr. Michael",male,,0,0,330971,7.8792,,Q 236 | 1126,1,"Cumings, Mr. John Bradley",male,39,1,0,PC 17599,71.2833,C85,C 237 | 1127,3,"Vendel, Mr. Olof Edvin",male,20,0,0,350416,7.8542,,S 238 | 1128,1,"Warren, Mr. Frank Manley",male,64,1,0,110813,75.25,D37,C 239 | 1129,3,"Baccos, Mr. Raffull",male,20,0,0,2679,7.225,,C 240 | 1130,2,"Hiltunen, Miss. Marta",female,18,1,1,250650,13,,S 241 | 1131,1,"Douglas, Mrs. Walter Donald (Mahala Dutton)",female,48,1,0,PC 17761,106.425,C86,C 242 | 1132,1,"Lindstrom, Mrs. Carl Johan (Sigrid Posse)",female,55,0,0,112377,27.7208,,C 243 | 1133,2,"Christy, Mrs. (Alice Frances)",female,45,0,2,237789,30,,S 244 | 1134,1,"Spedden, Mr. Frederic Oakley",male,45,1,1,16966,134.5,E34,C 245 | 1135,3,"Hyman, Mr. Abraham",male,,0,0,3470,7.8875,,S 246 | 1136,3,"Johnston, Master. William Arthur Willie""""",male,,1,2,W./C. 6607,23.45,,S 247 | 1137,1,"Kenyon, Mr. Frederick R",male,41,1,0,17464,51.8625,D21,S 248 | 1138,2,"Karnes, Mrs. J Frank (Claire Bennett)",female,22,0,0,F.C.C. 13534,21,,S 249 | 1139,2,"Drew, Mr. James Vivian",male,42,1,1,28220,32.5,,S 250 | 1140,2,"Hold, Mrs. Stephen (Annie Margaret Hill)",female,29,1,0,26707,26,,S 251 | 1141,3,"Khalil, Mrs. Betros (Zahie Maria"" Elias)""",female,,1,0,2660,14.4542,,C 252 | 1142,2,"West, Miss. Barbara J",female,0.92,1,2,C.A. 34651,27.75,,S 253 | 1143,3,"Abrahamsson, Mr. Abraham August Johannes",male,20,0,0,SOTON/O2 3101284,7.925,,S 254 | 1144,1,"Clark, Mr. Walter Miller",male,27,1,0,13508,136.7792,C89,C 255 | 1145,3,"Salander, Mr. Karl Johan",male,24,0,0,7266,9.325,,S 256 | 1146,3,"Wenzel, Mr. Linhart",male,32.5,0,0,345775,9.5,,S 257 | 1147,3,"MacKay, Mr. George William",male,,0,0,C.A. 42795,7.55,,S 258 | 1148,3,"Mahon, Mr. John",male,,0,0,AQ/4 3130,7.75,,Q 259 | 1149,3,"Niklasson, Mr. Samuel",male,28,0,0,363611,8.05,,S 260 | 1150,2,"Bentham, Miss. Lilian W",female,19,0,0,28404,13,,S 261 | 1151,3,"Midtsjo, Mr. Karl Albert",male,21,0,0,345501,7.775,,S 262 | 1152,3,"de Messemaeker, Mr. Guillaume Joseph",male,36.5,1,0,345572,17.4,,S 263 | 1153,3,"Nilsson, Mr. August Ferdinand",male,21,0,0,350410,7.8542,,S 264 | 1154,2,"Wells, Mrs. Arthur Henry (Addie"" Dart Trevaskis)""",female,29,0,2,29103,23,,S 265 | 1155,3,"Klasen, Miss. Gertrud Emilia",female,1,1,1,350405,12.1833,,S 266 | 1156,2,"Portaluppi, Mr. Emilio Ilario Giuseppe",male,30,0,0,C.A. 34644,12.7375,,C 267 | 1157,3,"Lyntakoff, Mr. Stanko",male,,0,0,349235,7.8958,,S 268 | 1158,1,"Chisholm, Mr. Roderick Robert Crispin",male,,0,0,112051,0,,S 269 | 1159,3,"Warren, Mr. Charles William",male,,0,0,C.A. 49867,7.55,,S 270 | 1160,3,"Howard, Miss. May Elizabeth",female,,0,0,A. 2. 39186,8.05,,S 271 | 1161,3,"Pokrnic, Mr. Mate",male,17,0,0,315095,8.6625,,S 272 | 1162,1,"McCaffry, Mr. Thomas Francis",male,46,0,0,13050,75.2417,C6,C 273 | 1163,3,"Fox, Mr. Patrick",male,,0,0,368573,7.75,,Q 274 | 1164,1,"Clark, Mrs. Walter Miller (Virginia McDowell)",female,26,1,0,13508,136.7792,C89,C 275 | 1165,3,"Lennon, Miss. Mary",female,,1,0,370371,15.5,,Q 276 | 1166,3,"Saade, Mr. Jean Nassr",male,,0,0,2676,7.225,,C 277 | 1167,2,"Bryhl, Miss. Dagmar Jenny Ingeborg ",female,20,1,0,236853,26,,S 278 | 1168,2,"Parker, Mr. Clifford Richard",male,28,0,0,SC 14888,10.5,,S 279 | 1169,2,"Faunthorpe, Mr. Harry",male,40,1,0,2926,26,,S 280 | 1170,2,"Ware, Mr. John James",male,30,1,0,CA 31352,21,,S 281 | 1171,2,"Oxenham, Mr. Percy Thomas",male,22,0,0,W./C. 14260,10.5,,S 282 | 1172,3,"Oreskovic, Miss. Jelka",female,23,0,0,315085,8.6625,,S 283 | 1173,3,"Peacock, Master. Alfred Edward",male,0.75,1,1,SOTON/O.Q. 3101315,13.775,,S 284 | 1174,3,"Fleming, Miss. Honora",female,,0,0,364859,7.75,,Q 285 | 1175,3,"Touma, Miss. Maria Youssef",female,9,1,1,2650,15.2458,,C 286 | 1176,3,"Rosblom, Miss. Salli Helena",female,2,1,1,370129,20.2125,,S 287 | 1177,3,"Dennis, Mr. William",male,36,0,0,A/5 21175,7.25,,S 288 | 1178,3,"Franklin, Mr. Charles (Charles Fardon)",male,,0,0,SOTON/O.Q. 3101314,7.25,,S 289 | 1179,1,"Snyder, Mr. John Pillsbury",male,24,1,0,21228,82.2667,B45,S 290 | 1180,3,"Mardirosian, Mr. Sarkis",male,,0,0,2655,7.2292,F E46,C 291 | 1181,3,"Ford, Mr. Arthur",male,,0,0,A/5 1478,8.05,,S 292 | 1182,1,"Rheims, Mr. George Alexander Lucien",male,,0,0,PC 17607,39.6,,S 293 | 1183,3,"Daly, Miss. Margaret Marcella Maggie""""",female,30,0,0,382650,6.95,,Q 294 | 1184,3,"Nasr, Mr. Mustafa",male,,0,0,2652,7.2292,,C 295 | 1185,1,"Dodge, Dr. Washington",male,53,1,1,33638,81.8583,A34,S 296 | 1186,3,"Wittevrongel, Mr. Camille",male,36,0,0,345771,9.5,,S 297 | 1187,3,"Angheloff, Mr. Minko",male,26,0,0,349202,7.8958,,S 298 | 1188,2,"Laroche, Miss. Louise",female,1,1,2,SC/Paris 2123,41.5792,,C 299 | 1189,3,"Samaan, Mr. Hanna",male,,2,0,2662,21.6792,,C 300 | 1190,1,"Loring, Mr. Joseph Holland",male,30,0,0,113801,45.5,,S 301 | 1191,3,"Johansson, Mr. Nils",male,29,0,0,347467,7.8542,,S 302 | 1192,3,"Olsson, Mr. Oscar Wilhelm",male,32,0,0,347079,7.775,,S 303 | 1193,2,"Malachard, Mr. Noel",male,,0,0,237735,15.0458,D,C 304 | 1194,2,"Phillips, Mr. Escott Robert",male,43,0,1,S.O./P.P. 2,21,,S 305 | 1195,3,"Pokrnic, Mr. Tome",male,24,0,0,315092,8.6625,,S 306 | 1196,3,"McCarthy, Miss. Catherine Katie""""",female,,0,0,383123,7.75,,Q 307 | 1197,1,"Crosby, Mrs. Edward Gifford (Catherine Elizabeth Halstead)",female,64,1,1,112901,26.55,B26,S 308 | 1198,1,"Allison, Mr. Hudson Joshua Creighton",male,30,1,2,113781,151.55,C22 C26,S 309 | 1199,3,"Aks, Master. Philip Frank",male,0.83,0,1,392091,9.35,,S 310 | 1200,1,"Hays, Mr. Charles Melville",male,55,1,1,12749,93.5,B69,S 311 | 1201,3,"Hansen, Mrs. Claus Peter (Jennie L Howard)",female,45,1,0,350026,14.1083,,S 312 | 1202,3,"Cacic, Mr. Jego Grga",male,18,0,0,315091,8.6625,,S 313 | 1203,3,"Vartanian, Mr. David",male,22,0,0,2658,7.225,,C 314 | 1204,3,"Sadowitz, Mr. Harry",male,,0,0,LP 1588,7.575,,S 315 | 1205,3,"Carr, Miss. Jeannie",female,37,0,0,368364,7.75,,Q 316 | 1206,1,"White, Mrs. John Stuart (Ella Holmes)",female,55,0,0,PC 17760,135.6333,C32,C 317 | 1207,3,"Hagardon, Miss. Kate",female,17,0,0,AQ/3. 30631,7.7333,,Q 318 | 1208,1,"Spencer, Mr. William Augustus",male,57,1,0,PC 17569,146.5208,B78,C 319 | 1209,2,"Rogers, Mr. Reginald Harry",male,19,0,0,28004,10.5,,S 320 | 1210,3,"Jonsson, Mr. Nils Hilding",male,27,0,0,350408,7.8542,,S 321 | 1211,2,"Jefferys, Mr. Ernest Wilfred",male,22,2,0,C.A. 31029,31.5,,S 322 | 1212,3,"Andersson, Mr. Johan Samuel",male,26,0,0,347075,7.775,,S 323 | 1213,3,"Krekorian, Mr. Neshan",male,25,0,0,2654,7.2292,F E57,C 324 | 1214,2,"Nesson, Mr. Israel",male,26,0,0,244368,13,F2,S 325 | 1215,1,"Rowe, Mr. Alfred G",male,33,0,0,113790,26.55,,S 326 | 1216,1,"Kreuchen, Miss. Emilie",female,39,0,0,24160,211.3375,,S 327 | 1217,3,"Assam, Mr. Ali",male,23,0,0,SOTON/O.Q. 3101309,7.05,,S 328 | 1218,2,"Becker, Miss. Ruth Elizabeth",female,12,2,1,230136,39,F4,S 329 | 1219,1,"Rosenshine, Mr. George (Mr George Thorne"")""",male,46,0,0,PC 17585,79.2,,C 330 | 1220,2,"Clarke, Mr. Charles Valentine",male,29,1,0,2003,26,,S 331 | 1221,2,"Enander, Mr. Ingvar",male,21,0,0,236854,13,,S 332 | 1222,2,"Davies, Mrs. John Morgan (Elizabeth Agnes Mary White) ",female,48,0,2,C.A. 33112,36.75,,S 333 | 1223,1,"Dulles, Mr. William Crothers",male,39,0,0,PC 17580,29.7,A18,C 334 | 1224,3,"Thomas, Mr. Tannous",male,,0,0,2684,7.225,,C 335 | 1225,3,"Nakid, Mrs. Said (Waika Mary"" Mowad)""",female,19,1,1,2653,15.7417,,C 336 | 1226,3,"Cor, Mr. Ivan",male,27,0,0,349229,7.8958,,S 337 | 1227,1,"Maguire, Mr. John Edward",male,30,0,0,110469,26,C106,S 338 | 1228,2,"de Brito, Mr. Jose Joaquim",male,32,0,0,244360,13,,S 339 | 1229,3,"Elias, Mr. Joseph",male,39,0,2,2675,7.2292,,C 340 | 1230,2,"Denbury, Mr. Herbert",male,25,0,0,C.A. 31029,31.5,,S 341 | 1231,3,"Betros, Master. Seman",male,,0,0,2622,7.2292,,C 342 | 1232,2,"Fillbrook, Mr. Joseph Charles",male,18,0,0,C.A. 15185,10.5,,S 343 | 1233,3,"Lundstrom, Mr. Thure Edvin",male,32,0,0,350403,7.5792,,S 344 | 1234,3,"Sage, Mr. John George",male,,1,9,CA. 2343,69.55,,S 345 | 1235,1,"Cardeza, Mrs. James Warburton Martinez (Charlotte Wardle Drake)",female,58,0,1,PC 17755,512.3292,B51 B53 B55,C 346 | 1236,3,"van Billiard, Master. James William",male,,1,1,A/5. 851,14.5,,S 347 | 1237,3,"Abelseth, Miss. Karen Marie",female,16,0,0,348125,7.65,,S 348 | 1238,2,"Botsford, Mr. William Hull",male,26,0,0,237670,13,,S 349 | 1239,3,"Whabee, Mrs. George Joseph (Shawneene Abi-Saab)",female,38,0,0,2688,7.2292,,C 350 | 1240,2,"Giles, Mr. Ralph",male,24,0,0,248726,13.5,,S 351 | 1241,2,"Walcroft, Miss. Nellie",female,31,0,0,F.C.C. 13528,21,,S 352 | 1242,1,"Greenfield, Mrs. Leo David (Blanche Strouse)",female,45,0,1,PC 17759,63.3583,D10 D12,C 353 | 1243,2,"Stokes, Mr. Philip Joseph",male,25,0,0,F.C.C. 13540,10.5,,S 354 | 1244,2,"Dibden, Mr. William",male,18,0,0,S.O.C. 14879,73.5,,S 355 | 1245,2,"Herman, Mr. Samuel",male,49,1,2,220845,65,,S 356 | 1246,3,"Dean, Miss. Elizabeth Gladys Millvina""""",female,0.17,1,2,C.A. 2315,20.575,,S 357 | 1247,1,"Julian, Mr. Henry Forbes",male,50,0,0,113044,26,E60,S 358 | 1248,1,"Brown, Mrs. John Murray (Caroline Lane Lamson)",female,59,2,0,11769,51.4792,C101,S 359 | 1249,3,"Lockyer, Mr. Edward",male,,0,0,1222,7.8792,,S 360 | 1250,3,"O'Keefe, Mr. Patrick",male,,0,0,368402,7.75,,Q 361 | 1251,3,"Lindell, Mrs. Edvard Bengtsson (Elin Gerda Persson)",female,30,1,0,349910,15.55,,S 362 | 1252,3,"Sage, Master. William Henry",male,14.5,8,2,CA. 2343,69.55,,S 363 | 1253,2,"Mallet, Mrs. Albert (Antoinette Magnin)",female,24,1,1,S.C./PARIS 2079,37.0042,,C 364 | 1254,2,"Ware, Mrs. John James (Florence Louise Long)",female,31,0,0,CA 31352,21,,S 365 | 1255,3,"Strilic, Mr. Ivan",male,27,0,0,315083,8.6625,,S 366 | 1256,1,"Harder, Mrs. George Achilles (Dorothy Annan)",female,25,1,0,11765,55.4417,E50,C 367 | 1257,3,"Sage, Mrs. John (Annie Bullen)",female,,1,9,CA. 2343,69.55,,S 368 | 1258,3,"Caram, Mr. Joseph",male,,1,0,2689,14.4583,,C 369 | 1259,3,"Riihivouri, Miss. Susanna Juhantytar Sanni""""",female,22,0,0,3101295,39.6875,,S 370 | 1260,1,"Gibson, Mrs. Leonard (Pauline C Boeson)",female,45,0,1,112378,59.4,,C 371 | 1261,2,"Pallas y Castello, Mr. Emilio",male,29,0,0,SC/PARIS 2147,13.8583,,C 372 | 1262,2,"Giles, Mr. Edgar",male,21,1,0,28133,11.5,,S 373 | 1263,1,"Wilson, Miss. Helen Alice",female,31,0,0,16966,134.5,E39 E41,C 374 | 1264,1,"Ismay, Mr. Joseph Bruce",male,49,0,0,112058,0,B52 B54 B56,S 375 | 1265,2,"Harbeck, Mr. William H",male,44,0,0,248746,13,,S 376 | 1266,1,"Dodge, Mrs. Washington (Ruth Vidaver)",female,54,1,1,33638,81.8583,A34,S 377 | 1267,1,"Bowen, Miss. Grace Scott",female,45,0,0,PC 17608,262.375,,C 378 | 1268,3,"Kink, Miss. Maria",female,22,2,0,315152,8.6625,,S 379 | 1269,2,"Cotterill, Mr. Henry Harry""""",male,21,0,0,29107,11.5,,S 380 | 1270,1,"Hipkins, Mr. William Edward",male,55,0,0,680,50,C39,S 381 | 1271,3,"Asplund, Master. Carl Edgar",male,5,4,2,347077,31.3875,,S 382 | 1272,3,"O'Connor, Mr. Patrick",male,,0,0,366713,7.75,,Q 383 | 1273,3,"Foley, Mr. Joseph",male,26,0,0,330910,7.8792,,Q 384 | 1274,3,"Risien, Mrs. Samuel (Emma)",female,,0,0,364498,14.5,,S 385 | 1275,3,"McNamee, Mrs. Neal (Eileen O'Leary)",female,19,1,0,376566,16.1,,S 386 | 1276,2,"Wheeler, Mr. Edwin Frederick""""",male,,0,0,SC/PARIS 2159,12.875,,S 387 | 1277,2,"Herman, Miss. Kate",female,24,1,2,220845,65,,S 388 | 1278,3,"Aronsson, Mr. Ernst Axel Algot",male,24,0,0,349911,7.775,,S 389 | 1279,2,"Ashby, Mr. John",male,57,0,0,244346,13,,S 390 | 1280,3,"Canavan, Mr. Patrick",male,21,0,0,364858,7.75,,Q 391 | 1281,3,"Palsson, Master. Paul Folke",male,6,3,1,349909,21.075,,S 392 | 1282,1,"Payne, Mr. Vivian Ponsonby",male,23,0,0,12749,93.5,B24,S 393 | 1283,1,"Lines, Mrs. Ernest H (Elizabeth Lindsey James)",female,51,0,1,PC 17592,39.4,D28,S 394 | 1284,3,"Abbott, Master. Eugene Joseph",male,13,0,2,C.A. 2673,20.25,,S 395 | 1285,2,"Gilbert, Mr. William",male,47,0,0,C.A. 30769,10.5,,S 396 | 1286,3,"Kink-Heilmann, Mr. Anton",male,29,3,1,315153,22.025,,S 397 | 1287,1,"Smith, Mrs. Lucien Philip (Mary Eloise Hughes)",female,18,1,0,13695,60,C31,S 398 | 1288,3,"Colbert, Mr. Patrick",male,24,0,0,371109,7.25,,Q 399 | 1289,1,"Frolicher-Stehli, Mrs. Maxmillian (Margaretha Emerentia Stehli)",female,48,1,1,13567,79.2,B41,C 400 | 1290,3,"Larsson-Rondberg, Mr. Edvard A",male,22,0,0,347065,7.775,,S 401 | 1291,3,"Conlon, Mr. Thomas Henry",male,31,0,0,21332,7.7333,,Q 402 | 1292,1,"Bonnell, Miss. Caroline",female,30,0,0,36928,164.8667,C7,S 403 | 1293,2,"Gale, Mr. Harry",male,38,1,0,28664,21,,S 404 | 1294,1,"Gibson, Miss. Dorothy Winifred",female,22,0,1,112378,59.4,,C 405 | 1295,1,"Carrau, Mr. Jose Pedro",male,17,0,0,113059,47.1,,S 406 | 1296,1,"Frauenthal, Mr. Isaac Gerald",male,43,1,0,17765,27.7208,D40,C 407 | 1297,2,"Nourney, Mr. Alfred (Baron von Drachstedt"")""",male,20,0,0,SC/PARIS 2166,13.8625,D38,C 408 | 1298,2,"Ware, Mr. William Jeffery",male,23,1,0,28666,10.5,,S 409 | 1299,1,"Widener, Mr. George Dunton",male,50,1,1,113503,211.5,C80,C 410 | 1300,3,"Riordan, Miss. Johanna Hannah""""",female,,0,0,334915,7.7208,,Q 411 | 1301,3,"Peacock, Miss. Treasteall",female,3,1,1,SOTON/O.Q. 3101315,13.775,,S 412 | 1302,3,"Naughton, Miss. Hannah",female,,0,0,365237,7.75,,Q 413 | 1303,1,"Minahan, Mrs. William Edward (Lillian E Thorpe)",female,37,1,0,19928,90,C78,Q 414 | 1304,3,"Henriksson, Miss. Jenny Lovisa",female,28,0,0,347086,7.775,,S 415 | 1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.05,,S 416 | 1306,1,"Oliva y Ocana, Dona. Fermina",female,39,0,0,PC 17758,108.9,C105,C 417 | 1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.25,,S 418 | 1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.05,,S 419 | 1309,3,"Peter, Master. Michael J",male,,1,1,2668,22.3583,,C 420 | -------------------------------------------------------------------------------- /ML algorithm/4-Decission & Random Tree/kyphosis.csv: -------------------------------------------------------------------------------- 1 | "Kyphosis","Age","Number","Start" 2 | "absent",71,3,5 3 | "absent",158,3,14 4 | "present",128,4,5 5 | "absent",2,5,1 6 | "absent",1,4,15 7 | "absent",1,2,16 8 | "absent",61,2,17 9 | "absent",37,3,16 10 | "absent",113,2,16 11 | "present",59,6,12 12 | "present",82,5,14 13 | "absent",148,3,16 14 | "absent",18,5,2 15 | "absent",1,4,12 16 | "absent",168,3,18 17 | "absent",1,3,16 18 | "absent",78,6,15 19 | "absent",175,5,13 20 | "absent",80,5,16 21 | "absent",27,4,9 22 | "absent",22,2,16 23 | "present",105,6,5 24 | "present",96,3,12 25 | "absent",131,2,3 26 | "present",15,7,2 27 | "absent",9,5,13 28 | "absent",8,3,6 29 | "absent",100,3,14 30 | "absent",4,3,16 31 | "absent",151,2,16 32 | "absent",31,3,16 33 | "absent",125,2,11 34 | "absent",130,5,13 35 | "absent",112,3,16 36 | "absent",140,5,11 37 | "absent",93,3,16 38 | "absent",1,3,9 39 | "present",52,5,6 40 | "absent",20,6,9 41 | "present",91,5,12 42 | "present",73,5,1 43 | "absent",35,3,13 44 | "absent",143,9,3 45 | "absent",61,4,1 46 | "absent",97,3,16 47 | "present",139,3,10 48 | "absent",136,4,15 49 | "absent",131,5,13 50 | "present",121,3,3 51 | "absent",177,2,14 52 | "absent",68,5,10 53 | "absent",9,2,17 54 | "present",139,10,6 55 | "absent",2,2,17 56 | "absent",140,4,15 57 | "absent",72,5,15 58 | "absent",2,3,13 59 | "present",120,5,8 60 | "absent",51,7,9 61 | "absent",102,3,13 62 | "present",130,4,1 63 | "present",114,7,8 64 | "absent",81,4,1 65 | "absent",118,3,16 66 | "absent",118,4,16 67 | "absent",17,4,10 68 | "absent",195,2,17 69 | "absent",159,4,13 70 | "absent",18,4,11 71 | "absent",15,5,16 72 | "absent",158,5,14 73 | "absent",127,4,12 74 | "absent",87,4,16 75 | "absent",206,4,10 76 | "absent",11,3,15 77 | "absent",178,4,15 78 | "present",157,3,13 79 | "absent",26,7,13 80 | "absent",120,2,13 81 | "present",42,7,6 82 | "absent",36,4,13 83 | -------------------------------------------------------------------------------- /ML algorithm/5-SVM/.ipynb_checkpoints/SVM-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "1f60e40b", 6 | "metadata": {}, 7 | "source": [ 8 | "# SVM" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "8a0e0607", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "import numpy as np\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "import seaborn as sns\n", 22 | "%matplotlib inline" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "id": "e74c73e1", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "from sklearn.datasets import load_breast_cancer" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "id": "555f2622", 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "cancer = load_breast_cancer()" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 4, 48 | "id": "83bac0dc", 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/plain": [ 54 | "dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])" 55 | ] 56 | }, 57 | "execution_count": 4, 58 | "metadata": {}, 59 | "output_type": "execute_result" 60 | } 61 | ], 62 | "source": [ 63 | "cancer.keys()" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 7, 69 | "id": "6e407f3e", 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "name": "stdout", 74 | "output_type": "stream", 75 | "text": [ 76 | ".. _breast_cancer_dataset:\n", 77 | "\n", 78 | "Breast cancer wisconsin (diagnostic) dataset\n", 79 | "--------------------------------------------\n", 80 | "\n", 81 | "**Data Set Characteristics:**\n", 82 | "\n", 83 | " :Number of Instances: 569\n", 84 | "\n", 85 | " :Number of Attributes: 30 numeric, predictive attributes and the class\n", 86 | "\n", 87 | " :Attribute Information:\n", 88 | " - radius (mean of distances from center to points on the perimeter)\n", 89 | " - texture (standard deviation of gray-scale values)\n", 90 | " - perimeter\n", 91 | " - area\n", 92 | " - smoothness (local variation in radius lengths)\n", 93 | " - compactness (perimeter^2 / area - 1.0)\n", 94 | " - concavity (severity of concave portions of the contour)\n", 95 | " - concave points (number of concave portions of the contour)\n", 96 | " - symmetry\n", 97 | " - fractal dimension (\"coastline approximation\" - 1)\n", 98 | "\n", 99 | " The mean, standard error, and \"worst\" or largest (mean of the three\n", 100 | " worst/largest values) of these features were computed for each image,\n", 101 | " resulting in 30 features. For instance, field 0 is Mean Radius, field\n", 102 | " 10 is Radius SE, field 20 is Worst Radius.\n", 103 | "\n", 104 | " - class:\n", 105 | " - WDBC-Malignant\n", 106 | " - WDBC-Benign\n", 107 | "\n", 108 | " :Summary Statistics:\n", 109 | "\n", 110 | " ===================================== ====== ======\n", 111 | " Min Max\n", 112 | " ===================================== ====== ======\n", 113 | " radius (mean): 6.981 28.11\n", 114 | " texture (mean): 9.71 39.28\n", 115 | " perimeter (mean): 43.79 188.5\n", 116 | " area (mean): 143.5 2501.0\n", 117 | " smoothness (mean): 0.053 0.163\n", 118 | " compactness (mean): 0.019 0.345\n", 119 | " concavity (mean): 0.0 0.427\n", 120 | " concave points (mean): 0.0 0.201\n", 121 | " symmetry (mean): 0.106 0.304\n", 122 | " fractal dimension (mean): 0.05 0.097\n", 123 | " radius (standard error): 0.112 2.873\n", 124 | " texture (standard error): 0.36 4.885\n", 125 | " perimeter (standard error): 0.757 21.98\n", 126 | " area (standard error): 6.802 542.2\n", 127 | " smoothness (standard error): 0.002 0.031\n", 128 | " compactness (standard error): 0.002 0.135\n", 129 | " concavity (standard error): 0.0 0.396\n", 130 | " concave points (standard error): 0.0 0.053\n", 131 | " symmetry (standard error): 0.008 0.079\n", 132 | " fractal dimension (standard error): 0.001 0.03\n", 133 | " radius (worst): 7.93 36.04\n", 134 | " texture (worst): 12.02 49.54\n", 135 | " perimeter (worst): 50.41 251.2\n", 136 | " area (worst): 185.2 4254.0\n", 137 | " smoothness (worst): 0.071 0.223\n", 138 | " compactness (worst): 0.027 1.058\n", 139 | " concavity (worst): 0.0 1.252\n", 140 | " concave points (worst): 0.0 0.291\n", 141 | " symmetry (worst): 0.156 0.664\n", 142 | " fractal dimension (worst): 0.055 0.208\n", 143 | " ===================================== ====== ======\n", 144 | "\n", 145 | " :Missing Attribute Values: None\n", 146 | "\n", 147 | " :Class Distribution: 212 - Malignant, 357 - Benign\n", 148 | "\n", 149 | " :Creator: Dr. William H. Wolberg, W. Nick Street, Olvi L. Mangasarian\n", 150 | "\n", 151 | " :Donor: Nick Street\n", 152 | "\n", 153 | " :Date: November, 1995\n", 154 | "\n", 155 | "This is a copy of UCI ML Breast Cancer Wisconsin (Diagnostic) datasets.\n", 156 | "https://goo.gl/U2Uwz2\n", 157 | "\n", 158 | "Features are computed from a digitized image of a fine needle\n", 159 | "aspirate (FNA) of a breast mass. They describe\n", 160 | "characteristics of the cell nuclei present in the image.\n", 161 | "\n", 162 | "Separating plane described above was obtained using\n", 163 | "Multisurface Method-Tree (MSM-T) [K. P. Bennett, \"Decision Tree\n", 164 | "Construction Via Linear Programming.\" Proceedings of the 4th\n", 165 | "Midwest Artificial Intelligence and Cognitive Science Society,\n", 166 | "pp. 97-101, 1992], a classification method which uses linear\n", 167 | "programming to construct a decision tree. Relevant features\n", 168 | "were selected using an exhaustive search in the space of 1-4\n", 169 | "features and 1-3 separating planes.\n", 170 | "\n", 171 | "The actual linear program used to obtain the separating plane\n", 172 | "in the 3-dimensional space is that described in:\n", 173 | "[K. P. Bennett and O. L. Mangasarian: \"Robust Linear\n", 174 | "Programming Discrimination of Two Linearly Inseparable Sets\",\n", 175 | "Optimization Methods and Software 1, 1992, 23-34].\n", 176 | "\n", 177 | "This database is also available through the UW CS ftp server:\n", 178 | "\n", 179 | "ftp ftp.cs.wisc.edu\n", 180 | "cd math-prog/cpo-dataset/machine-learn/WDBC/\n", 181 | "\n", 182 | ".. topic:: References\n", 183 | "\n", 184 | " - W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction \n", 185 | " for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on \n", 186 | " Electronic Imaging: Science and Technology, volume 1905, pages 861-870,\n", 187 | " San Jose, CA, 1993.\n", 188 | " - O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and \n", 189 | " prognosis via linear programming. Operations Research, 43(4), pages 570-577, \n", 190 | " July-August 1995.\n", 191 | " - W.H. Wolberg, W.N. Street, and O.L. Mangasarian. Machine learning techniques\n", 192 | " to diagnose breast cancer from fine-needle aspirates. Cancer Letters 77 (1994) \n", 193 | " 163-171.\n" 194 | ] 195 | } 196 | ], 197 | "source": [ 198 | "print(cancer['DESCR'])" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 11, 204 | "id": "029871e7", 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "data": { 209 | "text/html": [ 210 | "
\n", 211 | "\n", 224 | "\n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst radiusworst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimension
017.9910.38122.801001.00.118400.277600.30010.147100.24190.07871...25.3817.33184.602019.00.16220.66560.71190.26540.46010.11890
120.5717.77132.901326.00.084740.078640.08690.070170.18120.05667...24.9923.41158.801956.00.12380.18660.24160.18600.27500.08902
219.6921.25130.001203.00.109600.159900.19740.127900.20690.05999...23.5725.53152.501709.00.14440.42450.45040.24300.36130.08758
311.4220.3877.58386.10.142500.283900.24140.105200.25970.09744...14.9126.5098.87567.70.20980.86630.68690.25750.66380.17300
420.2914.34135.101297.00.100300.132800.19800.104300.18090.05883...22.5416.67152.201575.00.13740.20500.40000.16250.23640.07678
\n", 374 | "

5 rows × 30 columns

\n", 375 | "
" 376 | ], 377 | "text/plain": [ 378 | " mean radius mean texture mean perimeter mean area mean smoothness \\\n", 379 | "0 17.99 10.38 122.80 1001.0 0.11840 \n", 380 | "1 20.57 17.77 132.90 1326.0 0.08474 \n", 381 | "2 19.69 21.25 130.00 1203.0 0.10960 \n", 382 | "3 11.42 20.38 77.58 386.1 0.14250 \n", 383 | "4 20.29 14.34 135.10 1297.0 0.10030 \n", 384 | "\n", 385 | " mean compactness mean concavity mean concave points mean symmetry \\\n", 386 | "0 0.27760 0.3001 0.14710 0.2419 \n", 387 | "1 0.07864 0.0869 0.07017 0.1812 \n", 388 | "2 0.15990 0.1974 0.12790 0.2069 \n", 389 | "3 0.28390 0.2414 0.10520 0.2597 \n", 390 | "4 0.13280 0.1980 0.10430 0.1809 \n", 391 | "\n", 392 | " mean fractal dimension ... worst radius worst texture worst perimeter \\\n", 393 | "0 0.07871 ... 25.38 17.33 184.60 \n", 394 | "1 0.05667 ... 24.99 23.41 158.80 \n", 395 | "2 0.05999 ... 23.57 25.53 152.50 \n", 396 | "3 0.09744 ... 14.91 26.50 98.87 \n", 397 | "4 0.05883 ... 22.54 16.67 152.20 \n", 398 | "\n", 399 | " worst area worst smoothness worst compactness worst concavity \\\n", 400 | "0 2019.0 0.1622 0.6656 0.7119 \n", 401 | "1 1956.0 0.1238 0.1866 0.2416 \n", 402 | "2 1709.0 0.1444 0.4245 0.4504 \n", 403 | "3 567.7 0.2098 0.8663 0.6869 \n", 404 | "4 1575.0 0.1374 0.2050 0.4000 \n", 405 | "\n", 406 | " worst concave points worst symmetry worst fractal dimension \n", 407 | "0 0.2654 0.4601 0.11890 \n", 408 | "1 0.1860 0.2750 0.08902 \n", 409 | "2 0.2430 0.3613 0.08758 \n", 410 | "3 0.2575 0.6638 0.17300 \n", 411 | "4 0.1625 0.2364 0.07678 \n", 412 | "\n", 413 | "[5 rows x 30 columns]" 414 | ] 415 | }, 416 | "execution_count": 11, 417 | "metadata": {}, 418 | "output_type": "execute_result" 419 | } 420 | ], 421 | "source": [ 422 | "df_feat=pd.DataFrame(cancer['data'],columns=cancer['feature_names'])\n", 423 | "df_feat.head()" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 12, 429 | "id": "498e3a05", 430 | "metadata": {}, 431 | "outputs": [ 432 | { 433 | "name": "stdout", 434 | "output_type": "stream", 435 | "text": [ 436 | "\n", 437 | "RangeIndex: 569 entries, 0 to 568\n", 438 | "Data columns (total 30 columns):\n", 439 | " # Column Non-Null Count Dtype \n", 440 | "--- ------ -------------- ----- \n", 441 | " 0 mean radius 569 non-null float64\n", 442 | " 1 mean texture 569 non-null float64\n", 443 | " 2 mean perimeter 569 non-null float64\n", 444 | " 3 mean area 569 non-null float64\n", 445 | " 4 mean smoothness 569 non-null float64\n", 446 | " 5 mean compactness 569 non-null float64\n", 447 | " 6 mean concavity 569 non-null float64\n", 448 | " 7 mean concave points 569 non-null float64\n", 449 | " 8 mean symmetry 569 non-null float64\n", 450 | " 9 mean fractal dimension 569 non-null float64\n", 451 | " 10 radius error 569 non-null float64\n", 452 | " 11 texture error 569 non-null float64\n", 453 | " 12 perimeter error 569 non-null float64\n", 454 | " 13 area error 569 non-null float64\n", 455 | " 14 smoothness error 569 non-null float64\n", 456 | " 15 compactness error 569 non-null float64\n", 457 | " 16 concavity error 569 non-null float64\n", 458 | " 17 concave points error 569 non-null float64\n", 459 | " 18 symmetry error 569 non-null float64\n", 460 | " 19 fractal dimension error 569 non-null float64\n", 461 | " 20 worst radius 569 non-null float64\n", 462 | " 21 worst texture 569 non-null float64\n", 463 | " 22 worst perimeter 569 non-null float64\n", 464 | " 23 worst area 569 non-null float64\n", 465 | " 24 worst smoothness 569 non-null float64\n", 466 | " 25 worst compactness 569 non-null float64\n", 467 | " 26 worst concavity 569 non-null float64\n", 468 | " 27 worst concave points 569 non-null float64\n", 469 | " 28 worst symmetry 569 non-null float64\n", 470 | " 29 worst fractal dimension 569 non-null float64\n", 471 | "dtypes: float64(30)\n", 472 | "memory usage: 133.5 KB\n" 473 | ] 474 | } 475 | ], 476 | "source": [ 477 | "df_feat.info()" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": 14, 483 | "id": "a6bf042a", 484 | "metadata": {}, 485 | "outputs": [], 486 | "source": [ 487 | "from sklearn.model_selection import train_test_split" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": 16, 493 | "id": "c72ed091", 494 | "metadata": {}, 495 | "outputs": [], 496 | "source": [ 497 | "X=df_feat\n", 498 | "y=cancer['target']\n", 499 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": 17, 505 | "id": "12b80c92", 506 | "metadata": {}, 507 | "outputs": [ 508 | { 509 | "data": { 510 | "text/plain": [ 511 | "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,\n", 512 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n", 513 | " 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,\n", 514 | " 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,\n", 515 | " 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,\n", 516 | " 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,\n", 517 | " 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,\n", 518 | " 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,\n", 519 | " 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,\n", 520 | " 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,\n", 521 | " 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,\n", 522 | " 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 523 | " 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,\n", 524 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,\n", 525 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,\n", 526 | " 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,\n", 527 | " 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,\n", 528 | " 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,\n", 529 | " 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,\n", 530 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1,\n", 531 | " 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,\n", 532 | " 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,\n", 533 | " 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,\n", 534 | " 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,\n", 535 | " 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", 536 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1])" 537 | ] 538 | }, 539 | "execution_count": 17, 540 | "metadata": {}, 541 | "output_type": "execute_result" 542 | } 543 | ], 544 | "source": [ 545 | "cancer['target']" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": 21, 551 | "id": "ea46ff2a", 552 | "metadata": {}, 553 | "outputs": [], 554 | "source": [ 555 | "from sklearn.svm import SVC" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": 25, 561 | "id": "8fe55202", 562 | "metadata": {}, 563 | "outputs": [], 564 | "source": [ 565 | "model = SVC()" 566 | ] 567 | }, 568 | { 569 | "cell_type": "code", 570 | "execution_count": 26, 571 | "id": "8dd2a400", 572 | "metadata": {}, 573 | "outputs": [ 574 | { 575 | "data": { 576 | "text/plain": [ 577 | "SVC()" 578 | ] 579 | }, 580 | "execution_count": 26, 581 | "metadata": {}, 582 | "output_type": "execute_result" 583 | } 584 | ], 585 | "source": [ 586 | "model.fit(X_train,y_train)" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": 27, 592 | "id": "fab8056d", 593 | "metadata": {}, 594 | "outputs": [], 595 | "source": [ 596 | "pred=model.predict(X_test)" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": 29, 602 | "id": "2690bbf2", 603 | "metadata": {}, 604 | "outputs": [], 605 | "source": [ 606 | "from sklearn.metrics import confusion_matrix" 607 | ] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "execution_count": 30, 612 | "id": "7c7c1d44", 613 | "metadata": {}, 614 | "outputs": [], 615 | "source": [ 616 | "from sklearn.metrics import classification_report" 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": 31, 622 | "id": "0414cbc7", 623 | "metadata": {}, 624 | "outputs": [ 625 | { 626 | "name": "stdout", 627 | "output_type": "stream", 628 | "text": [ 629 | "[[ 56 10]\n", 630 | " [ 3 102]]\n", 631 | "\n", 632 | "\n", 633 | " precision recall f1-score support\n", 634 | "\n", 635 | " 0 0.95 0.85 0.90 66\n", 636 | " 1 0.91 0.97 0.94 105\n", 637 | "\n", 638 | " accuracy 0.92 171\n", 639 | " macro avg 0.93 0.91 0.92 171\n", 640 | "weighted avg 0.93 0.92 0.92 171\n", 641 | "\n" 642 | ] 643 | } 644 | ], 645 | "source": [ 646 | "print(confusion_matrix(y_test,pred))\n", 647 | "print('\\n')\n", 648 | "print(classification_report(y_test,pred))" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": 33, 654 | "id": "95d1c4f0", 655 | "metadata": {}, 656 | "outputs": [], 657 | "source": [ 658 | "from sklearn.model_selection import GridSearchCV" 659 | ] 660 | }, 661 | { 662 | "cell_type": "code", 663 | "execution_count": null, 664 | "id": "73b79c38", 665 | "metadata": {}, 666 | "outputs": [], 667 | "source": [] 668 | } 669 | ], 670 | "metadata": { 671 | "kernelspec": { 672 | "display_name": "Python 3", 673 | "language": "python", 674 | "name": "python3" 675 | }, 676 | "language_info": { 677 | "codemirror_mode": { 678 | "name": "ipython", 679 | "version": 3 680 | }, 681 | "file_extension": ".py", 682 | "mimetype": "text/x-python", 683 | "name": "python", 684 | "nbconvert_exporter": "python", 685 | "pygments_lexer": "ipython3", 686 | "version": "3.8.10" 687 | } 688 | }, 689 | "nbformat": 4, 690 | "nbformat_minor": 5 691 | } 692 | -------------------------------------------------------------------------------- /ML algorithm/5-SVM/SVM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "1f60e40b", 6 | "metadata": {}, 7 | "source": [ 8 | "# SVM" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "8a0e0607", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "import numpy as np\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "import seaborn as sns\n", 22 | "%matplotlib inline" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "id": "e74c73e1", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "from sklearn.datasets import load_breast_cancer" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "id": "555f2622", 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "cancer = load_breast_cancer()" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 4, 48 | "id": "83bac0dc", 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/plain": [ 54 | "dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])" 55 | ] 56 | }, 57 | "execution_count": 4, 58 | "metadata": {}, 59 | "output_type": "execute_result" 60 | } 61 | ], 62 | "source": [ 63 | "cancer.keys()" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 7, 69 | "id": "6e407f3e", 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "name": "stdout", 74 | "output_type": "stream", 75 | "text": [ 76 | ".. _breast_cancer_dataset:\n", 77 | "\n", 78 | "Breast cancer wisconsin (diagnostic) dataset\n", 79 | "--------------------------------------------\n", 80 | "\n", 81 | "**Data Set Characteristics:**\n", 82 | "\n", 83 | " :Number of Instances: 569\n", 84 | "\n", 85 | " :Number of Attributes: 30 numeric, predictive attributes and the class\n", 86 | "\n", 87 | " :Attribute Information:\n", 88 | " - radius (mean of distances from center to points on the perimeter)\n", 89 | " - texture (standard deviation of gray-scale values)\n", 90 | " - perimeter\n", 91 | " - area\n", 92 | " - smoothness (local variation in radius lengths)\n", 93 | " - compactness (perimeter^2 / area - 1.0)\n", 94 | " - concavity (severity of concave portions of the contour)\n", 95 | " - concave points (number of concave portions of the contour)\n", 96 | " - symmetry\n", 97 | " - fractal dimension (\"coastline approximation\" - 1)\n", 98 | "\n", 99 | " The mean, standard error, and \"worst\" or largest (mean of the three\n", 100 | " worst/largest values) of these features were computed for each image,\n", 101 | " resulting in 30 features. For instance, field 0 is Mean Radius, field\n", 102 | " 10 is Radius SE, field 20 is Worst Radius.\n", 103 | "\n", 104 | " - class:\n", 105 | " - WDBC-Malignant\n", 106 | " - WDBC-Benign\n", 107 | "\n", 108 | " :Summary Statistics:\n", 109 | "\n", 110 | " ===================================== ====== ======\n", 111 | " Min Max\n", 112 | " ===================================== ====== ======\n", 113 | " radius (mean): 6.981 28.11\n", 114 | " texture (mean): 9.71 39.28\n", 115 | " perimeter (mean): 43.79 188.5\n", 116 | " area (mean): 143.5 2501.0\n", 117 | " smoothness (mean): 0.053 0.163\n", 118 | " compactness (mean): 0.019 0.345\n", 119 | " concavity (mean): 0.0 0.427\n", 120 | " concave points (mean): 0.0 0.201\n", 121 | " symmetry (mean): 0.106 0.304\n", 122 | " fractal dimension (mean): 0.05 0.097\n", 123 | " radius (standard error): 0.112 2.873\n", 124 | " texture (standard error): 0.36 4.885\n", 125 | " perimeter (standard error): 0.757 21.98\n", 126 | " area (standard error): 6.802 542.2\n", 127 | " smoothness (standard error): 0.002 0.031\n", 128 | " compactness (standard error): 0.002 0.135\n", 129 | " concavity (standard error): 0.0 0.396\n", 130 | " concave points (standard error): 0.0 0.053\n", 131 | " symmetry (standard error): 0.008 0.079\n", 132 | " fractal dimension (standard error): 0.001 0.03\n", 133 | " radius (worst): 7.93 36.04\n", 134 | " texture (worst): 12.02 49.54\n", 135 | " perimeter (worst): 50.41 251.2\n", 136 | " area (worst): 185.2 4254.0\n", 137 | " smoothness (worst): 0.071 0.223\n", 138 | " compactness (worst): 0.027 1.058\n", 139 | " concavity (worst): 0.0 1.252\n", 140 | " concave points (worst): 0.0 0.291\n", 141 | " symmetry (worst): 0.156 0.664\n", 142 | " fractal dimension (worst): 0.055 0.208\n", 143 | " ===================================== ====== ======\n", 144 | "\n", 145 | " :Missing Attribute Values: None\n", 146 | "\n", 147 | " :Class Distribution: 212 - Malignant, 357 - Benign\n", 148 | "\n", 149 | " :Creator: Dr. William H. Wolberg, W. Nick Street, Olvi L. Mangasarian\n", 150 | "\n", 151 | " :Donor: Nick Street\n", 152 | "\n", 153 | " :Date: November, 1995\n", 154 | "\n", 155 | "This is a copy of UCI ML Breast Cancer Wisconsin (Diagnostic) datasets.\n", 156 | "https://goo.gl/U2Uwz2\n", 157 | "\n", 158 | "Features are computed from a digitized image of a fine needle\n", 159 | "aspirate (FNA) of a breast mass. They describe\n", 160 | "characteristics of the cell nuclei present in the image.\n", 161 | "\n", 162 | "Separating plane described above was obtained using\n", 163 | "Multisurface Method-Tree (MSM-T) [K. P. Bennett, \"Decision Tree\n", 164 | "Construction Via Linear Programming.\" Proceedings of the 4th\n", 165 | "Midwest Artificial Intelligence and Cognitive Science Society,\n", 166 | "pp. 97-101, 1992], a classification method which uses linear\n", 167 | "programming to construct a decision tree. Relevant features\n", 168 | "were selected using an exhaustive search in the space of 1-4\n", 169 | "features and 1-3 separating planes.\n", 170 | "\n", 171 | "The actual linear program used to obtain the separating plane\n", 172 | "in the 3-dimensional space is that described in:\n", 173 | "[K. P. Bennett and O. L. Mangasarian: \"Robust Linear\n", 174 | "Programming Discrimination of Two Linearly Inseparable Sets\",\n", 175 | "Optimization Methods and Software 1, 1992, 23-34].\n", 176 | "\n", 177 | "This database is also available through the UW CS ftp server:\n", 178 | "\n", 179 | "ftp ftp.cs.wisc.edu\n", 180 | "cd math-prog/cpo-dataset/machine-learn/WDBC/\n", 181 | "\n", 182 | ".. topic:: References\n", 183 | "\n", 184 | " - W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction \n", 185 | " for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on \n", 186 | " Electronic Imaging: Science and Technology, volume 1905, pages 861-870,\n", 187 | " San Jose, CA, 1993.\n", 188 | " - O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and \n", 189 | " prognosis via linear programming. Operations Research, 43(4), pages 570-577, \n", 190 | " July-August 1995.\n", 191 | " - W.H. Wolberg, W.N. Street, and O.L. Mangasarian. Machine learning techniques\n", 192 | " to diagnose breast cancer from fine-needle aspirates. Cancer Letters 77 (1994) \n", 193 | " 163-171.\n" 194 | ] 195 | } 196 | ], 197 | "source": [ 198 | "print(cancer['DESCR'])" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 11, 204 | "id": "029871e7", 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "data": { 209 | "text/html": [ 210 | "
\n", 211 | "\n", 224 | "\n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst radiusworst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimension
017.9910.38122.801001.00.118400.277600.30010.147100.24190.07871...25.3817.33184.602019.00.16220.66560.71190.26540.46010.11890
120.5717.77132.901326.00.084740.078640.08690.070170.18120.05667...24.9923.41158.801956.00.12380.18660.24160.18600.27500.08902
219.6921.25130.001203.00.109600.159900.19740.127900.20690.05999...23.5725.53152.501709.00.14440.42450.45040.24300.36130.08758
311.4220.3877.58386.10.142500.283900.24140.105200.25970.09744...14.9126.5098.87567.70.20980.86630.68690.25750.66380.17300
420.2914.34135.101297.00.100300.132800.19800.104300.18090.05883...22.5416.67152.201575.00.13740.20500.40000.16250.23640.07678
\n", 374 | "

5 rows × 30 columns

\n", 375 | "
" 376 | ], 377 | "text/plain": [ 378 | " mean radius mean texture mean perimeter mean area mean smoothness \\\n", 379 | "0 17.99 10.38 122.80 1001.0 0.11840 \n", 380 | "1 20.57 17.77 132.90 1326.0 0.08474 \n", 381 | "2 19.69 21.25 130.00 1203.0 0.10960 \n", 382 | "3 11.42 20.38 77.58 386.1 0.14250 \n", 383 | "4 20.29 14.34 135.10 1297.0 0.10030 \n", 384 | "\n", 385 | " mean compactness mean concavity mean concave points mean symmetry \\\n", 386 | "0 0.27760 0.3001 0.14710 0.2419 \n", 387 | "1 0.07864 0.0869 0.07017 0.1812 \n", 388 | "2 0.15990 0.1974 0.12790 0.2069 \n", 389 | "3 0.28390 0.2414 0.10520 0.2597 \n", 390 | "4 0.13280 0.1980 0.10430 0.1809 \n", 391 | "\n", 392 | " mean fractal dimension ... worst radius worst texture worst perimeter \\\n", 393 | "0 0.07871 ... 25.38 17.33 184.60 \n", 394 | "1 0.05667 ... 24.99 23.41 158.80 \n", 395 | "2 0.05999 ... 23.57 25.53 152.50 \n", 396 | "3 0.09744 ... 14.91 26.50 98.87 \n", 397 | "4 0.05883 ... 22.54 16.67 152.20 \n", 398 | "\n", 399 | " worst area worst smoothness worst compactness worst concavity \\\n", 400 | "0 2019.0 0.1622 0.6656 0.7119 \n", 401 | "1 1956.0 0.1238 0.1866 0.2416 \n", 402 | "2 1709.0 0.1444 0.4245 0.4504 \n", 403 | "3 567.7 0.2098 0.8663 0.6869 \n", 404 | "4 1575.0 0.1374 0.2050 0.4000 \n", 405 | "\n", 406 | " worst concave points worst symmetry worst fractal dimension \n", 407 | "0 0.2654 0.4601 0.11890 \n", 408 | "1 0.1860 0.2750 0.08902 \n", 409 | "2 0.2430 0.3613 0.08758 \n", 410 | "3 0.2575 0.6638 0.17300 \n", 411 | "4 0.1625 0.2364 0.07678 \n", 412 | "\n", 413 | "[5 rows x 30 columns]" 414 | ] 415 | }, 416 | "execution_count": 11, 417 | "metadata": {}, 418 | "output_type": "execute_result" 419 | } 420 | ], 421 | "source": [ 422 | "df_feat=pd.DataFrame(cancer['data'],columns=cancer['feature_names'])\n", 423 | "df_feat.head()" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 12, 429 | "id": "498e3a05", 430 | "metadata": {}, 431 | "outputs": [ 432 | { 433 | "name": "stdout", 434 | "output_type": "stream", 435 | "text": [ 436 | "\n", 437 | "RangeIndex: 569 entries, 0 to 568\n", 438 | "Data columns (total 30 columns):\n", 439 | " # Column Non-Null Count Dtype \n", 440 | "--- ------ -------------- ----- \n", 441 | " 0 mean radius 569 non-null float64\n", 442 | " 1 mean texture 569 non-null float64\n", 443 | " 2 mean perimeter 569 non-null float64\n", 444 | " 3 mean area 569 non-null float64\n", 445 | " 4 mean smoothness 569 non-null float64\n", 446 | " 5 mean compactness 569 non-null float64\n", 447 | " 6 mean concavity 569 non-null float64\n", 448 | " 7 mean concave points 569 non-null float64\n", 449 | " 8 mean symmetry 569 non-null float64\n", 450 | " 9 mean fractal dimension 569 non-null float64\n", 451 | " 10 radius error 569 non-null float64\n", 452 | " 11 texture error 569 non-null float64\n", 453 | " 12 perimeter error 569 non-null float64\n", 454 | " 13 area error 569 non-null float64\n", 455 | " 14 smoothness error 569 non-null float64\n", 456 | " 15 compactness error 569 non-null float64\n", 457 | " 16 concavity error 569 non-null float64\n", 458 | " 17 concave points error 569 non-null float64\n", 459 | " 18 symmetry error 569 non-null float64\n", 460 | " 19 fractal dimension error 569 non-null float64\n", 461 | " 20 worst radius 569 non-null float64\n", 462 | " 21 worst texture 569 non-null float64\n", 463 | " 22 worst perimeter 569 non-null float64\n", 464 | " 23 worst area 569 non-null float64\n", 465 | " 24 worst smoothness 569 non-null float64\n", 466 | " 25 worst compactness 569 non-null float64\n", 467 | " 26 worst concavity 569 non-null float64\n", 468 | " 27 worst concave points 569 non-null float64\n", 469 | " 28 worst symmetry 569 non-null float64\n", 470 | " 29 worst fractal dimension 569 non-null float64\n", 471 | "dtypes: float64(30)\n", 472 | "memory usage: 133.5 KB\n" 473 | ] 474 | } 475 | ], 476 | "source": [ 477 | "df_feat.info()" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": 14, 483 | "id": "a6bf042a", 484 | "metadata": {}, 485 | "outputs": [], 486 | "source": [ 487 | "from sklearn.model_selection import train_test_split" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": 16, 493 | "id": "c72ed091", 494 | "metadata": {}, 495 | "outputs": [], 496 | "source": [ 497 | "X=df_feat\n", 498 | "y=cancer['target']\n", 499 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": 17, 505 | "id": "12b80c92", 506 | "metadata": {}, 507 | "outputs": [ 508 | { 509 | "data": { 510 | "text/plain": [ 511 | "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,\n", 512 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n", 513 | " 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,\n", 514 | " 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,\n", 515 | " 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,\n", 516 | " 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,\n", 517 | " 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,\n", 518 | " 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,\n", 519 | " 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,\n", 520 | " 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,\n", 521 | " 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,\n", 522 | " 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 523 | " 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,\n", 524 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,\n", 525 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,\n", 526 | " 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,\n", 527 | " 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,\n", 528 | " 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,\n", 529 | " 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,\n", 530 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1,\n", 531 | " 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,\n", 532 | " 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,\n", 533 | " 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,\n", 534 | " 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,\n", 535 | " 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", 536 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1])" 537 | ] 538 | }, 539 | "execution_count": 17, 540 | "metadata": {}, 541 | "output_type": "execute_result" 542 | } 543 | ], 544 | "source": [ 545 | "cancer['target']" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": 21, 551 | "id": "ea46ff2a", 552 | "metadata": {}, 553 | "outputs": [], 554 | "source": [ 555 | "from sklearn.svm import SVC" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": 25, 561 | "id": "8fe55202", 562 | "metadata": {}, 563 | "outputs": [], 564 | "source": [ 565 | "model = SVC()" 566 | ] 567 | }, 568 | { 569 | "cell_type": "code", 570 | "execution_count": 26, 571 | "id": "8dd2a400", 572 | "metadata": {}, 573 | "outputs": [ 574 | { 575 | "data": { 576 | "text/plain": [ 577 | "SVC()" 578 | ] 579 | }, 580 | "execution_count": 26, 581 | "metadata": {}, 582 | "output_type": "execute_result" 583 | } 584 | ], 585 | "source": [ 586 | "model.fit(X_train,y_train)" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": 27, 592 | "id": "fab8056d", 593 | "metadata": {}, 594 | "outputs": [], 595 | "source": [ 596 | "pred=model.predict(X_test)" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": 29, 602 | "id": "2690bbf2", 603 | "metadata": {}, 604 | "outputs": [], 605 | "source": [ 606 | "from sklearn.metrics import confusion_matrix" 607 | ] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "execution_count": 30, 612 | "id": "7c7c1d44", 613 | "metadata": {}, 614 | "outputs": [], 615 | "source": [ 616 | "from sklearn.metrics import classification_report" 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": 31, 622 | "id": "0414cbc7", 623 | "metadata": {}, 624 | "outputs": [ 625 | { 626 | "name": "stdout", 627 | "output_type": "stream", 628 | "text": [ 629 | "[[ 56 10]\n", 630 | " [ 3 102]]\n", 631 | "\n", 632 | "\n", 633 | " precision recall f1-score support\n", 634 | "\n", 635 | " 0 0.95 0.85 0.90 66\n", 636 | " 1 0.91 0.97 0.94 105\n", 637 | "\n", 638 | " accuracy 0.92 171\n", 639 | " macro avg 0.93 0.91 0.92 171\n", 640 | "weighted avg 0.93 0.92 0.92 171\n", 641 | "\n" 642 | ] 643 | } 644 | ], 645 | "source": [ 646 | "print(confusion_matrix(y_test,pred))\n", 647 | "print('\\n')\n", 648 | "print(classification_report(y_test,pred))" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": 33, 654 | "id": "95d1c4f0", 655 | "metadata": {}, 656 | "outputs": [], 657 | "source": [ 658 | "from sklearn.model_selection import GridSearchCV" 659 | ] 660 | }, 661 | { 662 | "cell_type": "code", 663 | "execution_count": null, 664 | "id": "73b79c38", 665 | "metadata": {}, 666 | "outputs": [], 667 | "source": [] 668 | } 669 | ], 670 | "metadata": { 671 | "kernelspec": { 672 | "display_name": "Python 3", 673 | "language": "python", 674 | "name": "python3" 675 | }, 676 | "language_info": { 677 | "codemirror_mode": { 678 | "name": "ipython", 679 | "version": 3 680 | }, 681 | "file_extension": ".py", 682 | "mimetype": "text/x-python", 683 | "name": "python", 684 | "nbconvert_exporter": "python", 685 | "pygments_lexer": "ipython3", 686 | "version": "3.8.10" 687 | } 688 | }, 689 | "nbformat": 4, 690 | "nbformat_minor": 5 691 | } 692 | -------------------------------------------------------------------------------- /ML algorithm/8-PrincipalComponentAnalysis/PCA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishikkkkaaaa/Python-ML/8c64a5d82314a5603e2e892ba991fabdee8932f2/ML algorithm/8-PrincipalComponentAnalysis/PCA.png -------------------------------------------------------------------------------- /ML algorithm/9-Recommender System/u.item: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishikkkkaaaa/Python-ML/8c64a5d82314a5603e2e892ba991fabdee8932f2/ML algorithm/9-Recommender System/u.item -------------------------------------------------------------------------------- /ML basics/1_NUMPY.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Numpy" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "*Numpy arryas are the main way we will use numpy*\n", 15 | "\n", 16 | "*They come in two favors: **Vectors & Matrices** *\n", 17 | "\n", 18 | "*Vectors are 1D and Matrix are 2D(can still have one row one col)*" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/plain": [ 29 | "[1, 2, 3]" 30 | ] 31 | }, 32 | "execution_count": 1, 33 | "metadata": {}, 34 | "output_type": "execute_result" 35 | } 36 | ], 37 | "source": [ 38 | "list=[1,2,3]\n", 39 | "list" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "import numpy as np" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 4, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/plain": [ 59 | "array([1, 2, 3])" 60 | ] 61 | }, 62 | "execution_count": 4, 63 | "metadata": {}, 64 | "output_type": "execute_result" 65 | } 66 | ], 67 | "source": [ 68 | "arr=np.array(list)\n", 69 | "arr" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "data": { 79 | "text/plain": [ 80 | "array([[1, 2, 3],\n", 81 | " [4, 5, 6],\n", 82 | " [7, 8, 9]])" 83 | ] 84 | }, 85 | "execution_count": 5, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "my_math=[[1,2,3],[4,5,6],[7,8,9]]\n", 92 | "np.array(my_math)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "**Arange** is one of the most useful function for *quicly generating an array*" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 6, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])" 111 | ] 112 | }, 113 | "execution_count": 6, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "np.arange(0,10)\n", 120 | "#indexing does upto 10, but does not include 10" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 8, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "text/plain": [ 131 | "array([ 0, 2, 4, 6, 8, 10])" 132 | ] 133 | }, 134 | "execution_count": 8, 135 | "metadata": {}, 136 | "output_type": "execute_result" 137 | } 138 | ], 139 | "source": [ 140 | "np.arange(0,11,2)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 9, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "data": { 150 | "text/plain": [ 151 | "array([0., 0., 0.])" 152 | ] 153 | }, 154 | "execution_count": 9, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "np.zeros(3)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 10, 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "data": { 170 | "text/plain": [ 171 | "array([[0., 0., 0.],\n", 172 | " [0., 0., 0.]])" 173 | ] 174 | }, 175 | "execution_count": 10, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "np.zeros((2,3))" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 11, 187 | "metadata": {}, 188 | "outputs": [ 189 | { 190 | "data": { 191 | "text/plain": [ 192 | "array([1., 1., 1., 1.])" 193 | ] 194 | }, 195 | "execution_count": 11, 196 | "metadata": {}, 197 | "output_type": "execute_result" 198 | } 199 | ], 200 | "source": [ 201 | "np.ones(4)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | " Creating **Identity Matrix**" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "2D square matrix, having ones in diagonal" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 12, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "data": { 225 | "text/plain": [ 226 | "array([[1., 0., 0., 0.],\n", 227 | " [0., 1., 0., 0.],\n", 228 | " [0., 0., 1., 0.],\n", 229 | " [0., 0., 0., 1.]])" 230 | ] 231 | }, 232 | "execution_count": 12, 233 | "metadata": {}, 234 | "output_type": "execute_result" 235 | } 236 | ], 237 | "source": [ 238 | "np.eye(4)" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 13, 244 | "metadata": {}, 245 | "outputs": [ 246 | { 247 | "data": { 248 | "text/plain": [ 249 | "array([0.31860116, 0.15041774, 0.47305949, 0.91984763, 0.31191998])" 250 | ] 251 | }, 252 | "execution_count": 13, 253 | "metadata": {}, 254 | "output_type": "execute_result" 255 | } 256 | ], 257 | "source": [ 258 | "np.random.rand(5)" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 14, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "data": { 268 | "text/plain": [ 269 | "array([[0.30265439, 0.94887264, 0.42351647],\n", 270 | " [0.80734252, 0.38626543, 0.92528118],\n", 271 | " [0.39704804, 0.01872057, 0.19964596]])" 272 | ] 273 | }, 274 | "execution_count": 14, 275 | "metadata": {}, 276 | "output_type": "execute_result" 277 | } 278 | ], 279 | "source": [ 280 | "np.random.rand(3,3)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 15, 286 | "metadata": {}, 287 | "outputs": [ 288 | { 289 | "data": { 290 | "text/plain": [ 291 | "42" 292 | ] 293 | }, 294 | "execution_count": 15, 295 | "metadata": {}, 296 | "output_type": "execute_result" 297 | } 298 | ], 299 | "source": [ 300 | "np.random.randint(1,100)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 16, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "array([23, 95, 15, 74, 54, 62, 5, 70, 80, 76])" 312 | ] 313 | }, 314 | "execution_count": 16, 315 | "metadata": {}, 316 | "output_type": "execute_result" 317 | } 318 | ], 319 | "source": [ 320 | "np.random.randint(1,100,10)" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 17, 326 | "metadata": {}, 327 | "outputs": [ 328 | { 329 | "data": { 330 | "text/plain": [ 331 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", 332 | " 17, 18, 19, 20, 21, 22, 23, 24])" 333 | ] 334 | }, 335 | "execution_count": 17, 336 | "metadata": {}, 337 | "output_type": "execute_result" 338 | } 339 | ], 340 | "source": [ 341 | "arr=np.arange(25)\n", 342 | "arr" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 18, 348 | "metadata": {}, 349 | "outputs": [ 350 | { 351 | "data": { 352 | "text/plain": [ 353 | "array([38, 48, 32, 12, 44, 3, 3, 12, 16, 8])" 354 | ] 355 | }, 356 | "execution_count": 18, 357 | "metadata": {}, 358 | "output_type": "execute_result" 359 | } 360 | ], 361 | "source": [ 362 | "random_arr=np.random.randint(0,50,10)\n", 363 | "random_arr" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": 19, 369 | "metadata": {}, 370 | "outputs": [ 371 | { 372 | "data": { 373 | "text/plain": [ 374 | "array([[ 0, 1, 2, 3, 4],\n", 375 | " [ 5, 6, 7, 8, 9],\n", 376 | " [10, 11, 12, 13, 14],\n", 377 | " [15, 16, 17, 18, 19],\n", 378 | " [20, 21, 22, 23, 24]])" 379 | ] 380 | }, 381 | "execution_count": 19, 382 | "metadata": {}, 383 | "output_type": "execute_result" 384 | } 385 | ], 386 | "source": [ 387 | "arr.reshape(5,5)" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": 20, 393 | "metadata": {}, 394 | "outputs": [ 395 | { 396 | "data": { 397 | "text/plain": [ 398 | "48" 399 | ] 400 | }, 401 | "execution_count": 20, 402 | "metadata": {}, 403 | "output_type": "execute_result" 404 | } 405 | ], 406 | "source": [ 407 | "random_arr.max()" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": 21, 413 | "metadata": {}, 414 | "outputs": [ 415 | { 416 | "data": { 417 | "text/plain": [ 418 | "3" 419 | ] 420 | }, 421 | "execution_count": 21, 422 | "metadata": {}, 423 | "output_type": "execute_result" 424 | } 425 | ], 426 | "source": [ 427 | "random_arr.min()" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": 22, 433 | "metadata": {}, 434 | "outputs": [ 435 | { 436 | "data": { 437 | "text/plain": [ 438 | "1" 439 | ] 440 | }, 441 | "execution_count": 22, 442 | "metadata": {}, 443 | "output_type": "execute_result" 444 | } 445 | ], 446 | "source": [ 447 | "random_arr.argmax()" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": 23, 453 | "metadata": {}, 454 | "outputs": [ 455 | { 456 | "data": { 457 | "text/plain": [ 458 | "(25,)" 459 | ] 460 | }, 461 | "execution_count": 23, 462 | "metadata": {}, 463 | "output_type": "execute_result" 464 | } 465 | ], 466 | "source": [ 467 | "#shape of a vector!\n", 468 | "arr.shape" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": 24, 474 | "metadata": {}, 475 | "outputs": [ 476 | { 477 | "data": { 478 | "text/plain": [ 479 | "array([[ 0, 1, 2, 3, 4],\n", 480 | " [ 5, 6, 7, 8, 9],\n", 481 | " [10, 11, 12, 13, 14],\n", 482 | " [15, 16, 17, 18, 19],\n", 483 | " [20, 21, 22, 23, 24]])" 484 | ] 485 | }, 486 | "execution_count": 24, 487 | "metadata": {}, 488 | "output_type": "execute_result" 489 | } 490 | ], 491 | "source": [ 492 | "#reshaping an array\n", 493 | "arr=arr.reshape(5,5)\n", 494 | "arr" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 25, 500 | "metadata": {}, 501 | "outputs": [ 502 | { 503 | "data": { 504 | "text/plain": [ 505 | "(5, 5)" 506 | ] 507 | }, 508 | "execution_count": 25, 509 | "metadata": {}, 510 | "output_type": "execute_result" 511 | } 512 | ], 513 | "source": [ 514 | "arr.shape" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": 26, 520 | "metadata": {}, 521 | "outputs": [ 522 | { 523 | "data": { 524 | "text/plain": [ 525 | "dtype('int64')" 526 | ] 527 | }, 528 | "execution_count": 26, 529 | "metadata": {}, 530 | "output_type": "execute_result" 531 | } 532 | ], 533 | "source": [ 534 | "arr.dtype" 535 | ] 536 | }, 537 | { 538 | "cell_type": "markdown", 539 | "metadata": {}, 540 | "source": [ 541 | "Indexing and Selection!!" 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": 27, 547 | "metadata": {}, 548 | "outputs": [ 549 | { 550 | "data": { 551 | "text/plain": [ 552 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 553 | ] 554 | }, 555 | "execution_count": 27, 556 | "metadata": {}, 557 | "output_type": "execute_result" 558 | } 559 | ], 560 | "source": [ 561 | "arr=np.arange(0,11)\n", 562 | "arr" 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "execution_count": 28, 568 | "metadata": {}, 569 | "outputs": [ 570 | { 571 | "data": { 572 | "text/plain": [ 573 | "4" 574 | ] 575 | }, 576 | "execution_count": 28, 577 | "metadata": {}, 578 | "output_type": "execute_result" 579 | } 580 | ], 581 | "source": [ 582 | "arr[4]" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": 29, 588 | "metadata": {}, 589 | "outputs": [ 590 | { 591 | "data": { 592 | "text/plain": [ 593 | "array([1, 2, 3, 4])" 594 | ] 595 | }, 596 | "execution_count": 29, 597 | "metadata": {}, 598 | "output_type": "execute_result" 599 | } 600 | ], 601 | "source": [ 602 | "arr[1:5]" 603 | ] 604 | }, 605 | { 606 | "cell_type": "code", 607 | "execution_count": 30, 608 | "metadata": {}, 609 | "outputs": [ 610 | { 611 | "data": { 612 | "text/plain": [ 613 | "array([0, 1, 2, 3, 4, 5])" 614 | ] 615 | }, 616 | "execution_count": 30, 617 | "metadata": {}, 618 | "output_type": "execute_result" 619 | } 620 | ], 621 | "source": [ 622 | "slice=arr[0:6]\n", 623 | "slice\n" 624 | ] 625 | }, 626 | { 627 | "cell_type": "code", 628 | "execution_count": 31, 629 | "metadata": {}, 630 | "outputs": [ 631 | { 632 | "data": { 633 | "text/plain": [ 634 | "array([0, 1, 2, 3, 4, 5])" 635 | ] 636 | }, 637 | "execution_count": 31, 638 | "metadata": {}, 639 | "output_type": "execute_result" 640 | } 641 | ], 642 | "source": [ 643 | "slice[:]" 644 | ] 645 | }, 646 | { 647 | "cell_type": "code", 648 | "execution_count": 32, 649 | "metadata": {}, 650 | "outputs": [ 651 | { 652 | "data": { 653 | "text/plain": [ 654 | "array([99, 99, 99, 99, 99, 99])" 655 | ] 656 | }, 657 | "execution_count": 32, 658 | "metadata": {}, 659 | "output_type": "execute_result" 660 | } 661 | ], 662 | "source": [ 663 | "slice[:]=99\n", 664 | "slice\n" 665 | ] 666 | }, 667 | { 668 | "cell_type": "code", 669 | "execution_count": 33, 670 | "metadata": {}, 671 | "outputs": [ 672 | { 673 | "data": { 674 | "text/plain": [ 675 | "array([99, 99, 99, 99, 99, 99, 6, 7, 8, 9, 10])" 676 | ] 677 | }, 678 | "execution_count": 33, 679 | "metadata": {}, 680 | "output_type": "execute_result" 681 | } 682 | ], 683 | "source": [ 684 | "arr\n" 685 | ] 686 | }, 687 | { 688 | "cell_type": "code", 689 | "execution_count": 34, 690 | "metadata": {}, 691 | "outputs": [], 692 | "source": [ 693 | "arr_copy=arr.copy()" 694 | ] 695 | }, 696 | { 697 | "cell_type": "code", 698 | "execution_count": 35, 699 | "metadata": {}, 700 | "outputs": [ 701 | { 702 | "data": { 703 | "text/plain": [ 704 | "array([99, 99, 99, 99, 99, 99, 6, 7, 8, 9, 10])" 705 | ] 706 | }, 707 | "execution_count": 35, 708 | "metadata": {}, 709 | "output_type": "execute_result" 710 | } 711 | ], 712 | "source": [ 713 | "arr" 714 | ] 715 | }, 716 | { 717 | "cell_type": "code", 718 | "execution_count": 36, 719 | "metadata": {}, 720 | "outputs": [ 721 | { 722 | "data": { 723 | "text/plain": [ 724 | "array([99, 99, 99, 99, 99, 99, 6, 7, 8, 9, 10])" 725 | ] 726 | }, 727 | "execution_count": 36, 728 | "metadata": {}, 729 | "output_type": "execute_result" 730 | } 731 | ], 732 | "source": [ 733 | "arr_copy" 734 | ] 735 | }, 736 | { 737 | "cell_type": "code", 738 | "execution_count": 37, 739 | "metadata": {}, 740 | "outputs": [], 741 | "source": [ 742 | "arr_copy[:]=100" 743 | ] 744 | }, 745 | { 746 | "cell_type": "code", 747 | "execution_count": 38, 748 | "metadata": {}, 749 | "outputs": [ 750 | { 751 | "data": { 752 | "text/plain": [ 753 | "array([99, 99, 99, 99, 99, 99, 6, 7, 8, 9, 10])" 754 | ] 755 | }, 756 | "execution_count": 38, 757 | "metadata": {}, 758 | "output_type": "execute_result" 759 | } 760 | ], 761 | "source": [ 762 | "arr\n" 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": 39, 768 | "metadata": {}, 769 | "outputs": [ 770 | { 771 | "data": { 772 | "text/plain": [ 773 | "array([100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100])" 774 | ] 775 | }, 776 | "execution_count": 39, 777 | "metadata": {}, 778 | "output_type": "execute_result" 779 | } 780 | ], 781 | "source": [ 782 | "arr_copy" 783 | ] 784 | }, 785 | { 786 | "cell_type": "markdown", 787 | "metadata": {}, 788 | "source": [ 789 | "**Indexing for 2D array**" 790 | ] 791 | }, 792 | { 793 | "cell_type": "code", 794 | "execution_count": 40, 795 | "metadata": {}, 796 | "outputs": [ 797 | { 798 | "data": { 799 | "text/plain": [ 800 | "array([[ 5, 10, 15],\n", 801 | " [35, 40, 45]])" 802 | ] 803 | }, 804 | "execution_count": 40, 805 | "metadata": {}, 806 | "output_type": "execute_result" 807 | } 808 | ], 809 | "source": [ 810 | "arr_2d=np.array([[5,10,15],[35,40,45]])\n", 811 | "arr_2d" 812 | ] 813 | }, 814 | { 815 | "cell_type": "code", 816 | "execution_count": 42, 817 | "metadata": {}, 818 | "outputs": [ 819 | { 820 | "data": { 821 | "text/plain": [ 822 | "10" 823 | ] 824 | }, 825 | "execution_count": 42, 826 | "metadata": {}, 827 | "output_type": "execute_result" 828 | } 829 | ], 830 | "source": [ 831 | "arr_2d[0][1]" 832 | ] 833 | }, 834 | { 835 | "cell_type": "code", 836 | "execution_count": 43, 837 | "metadata": {}, 838 | "outputs": [ 839 | { 840 | "data": { 841 | "text/plain": [ 842 | "array([ 5, 10, 15])" 843 | ] 844 | }, 845 | "execution_count": 43, 846 | "metadata": {}, 847 | "output_type": "execute_result" 848 | } 849 | ], 850 | "source": [ 851 | "arr_2d[0]" 852 | ] 853 | }, 854 | { 855 | "cell_type": "code", 856 | "execution_count": 44, 857 | "metadata": {}, 858 | "outputs": [ 859 | { 860 | "data": { 861 | "text/plain": [ 862 | "array([[10, 15]])" 863 | ] 864 | }, 865 | "execution_count": 44, 866 | "metadata": {}, 867 | "output_type": "execute_result" 868 | } 869 | ], 870 | "source": [ 871 | "arr_2d[:1,1:]" 872 | ] 873 | }, 874 | { 875 | "cell_type": "code", 876 | "execution_count": 45, 877 | "metadata": {}, 878 | "outputs": [], 879 | "source": [ 880 | "arr=np.arange(1,11)" 881 | ] 882 | }, 883 | { 884 | "cell_type": "code", 885 | "execution_count": 46, 886 | "metadata": {}, 887 | "outputs": [ 888 | { 889 | "data": { 890 | "text/plain": [ 891 | "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 892 | ] 893 | }, 894 | "execution_count": 46, 895 | "metadata": {}, 896 | "output_type": "execute_result" 897 | } 898 | ], 899 | "source": [ 900 | "arr" 901 | ] 902 | }, 903 | { 904 | "cell_type": "code", 905 | "execution_count": 48, 906 | "metadata": {}, 907 | "outputs": [ 908 | { 909 | "data": { 910 | "text/plain": [ 911 | "array([False, False, False, False, False, True, True, True, True,\n", 912 | " True])" 913 | ] 914 | }, 915 | "execution_count": 48, 916 | "metadata": {}, 917 | "output_type": "execute_result" 918 | } 919 | ], 920 | "source": [ 921 | "bool_arr=arr>5\n", 922 | "bool_arr" 923 | ] 924 | }, 925 | { 926 | "cell_type": "markdown", 927 | "metadata": {}, 928 | "source": [ 929 | "We can use the above code, for **Conditional selection!**" 930 | ] 931 | }, 932 | { 933 | "cell_type": "code", 934 | "execution_count": 49, 935 | "metadata": {}, 936 | "outputs": [ 937 | { 938 | "data": { 939 | "text/plain": [ 940 | "array([ 6, 7, 8, 9, 10])" 941 | ] 942 | }, 943 | "execution_count": 49, 944 | "metadata": {}, 945 | "output_type": "execute_result" 946 | } 947 | ], 948 | "source": [ 949 | "arr[bool_arr]\n", 950 | "#prints to true values" 951 | ] 952 | }, 953 | { 954 | "cell_type": "code", 955 | "execution_count": 51, 956 | "metadata": {}, 957 | "outputs": [ 958 | { 959 | "data": { 960 | "text/plain": [ 961 | "array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9],\n", 962 | " [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],\n", 963 | " [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],\n", 964 | " [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],\n", 965 | " [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]])" 966 | ] 967 | }, 968 | "execution_count": 51, 969 | "metadata": {}, 970 | "output_type": "execute_result" 971 | } 972 | ], 973 | "source": [ 974 | "arr_2d=np.arange(50).reshape(5,10)\n", 975 | "arr_2d" 976 | ] 977 | }, 978 | { 979 | "cell_type": "markdown", 980 | "metadata": {}, 981 | "source": [ 982 | "# Numpy Operations!" 983 | ] 984 | }, 985 | { 986 | "cell_type": "markdown", 987 | "metadata": {}, 988 | "source": [ 989 | "Arraye with Array" 990 | ] 991 | }, 992 | { 993 | "cell_type": "code", 994 | "execution_count": 52, 995 | "metadata": {}, 996 | "outputs": [ 997 | { 998 | "data": { 999 | "text/plain": [ 1000 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 1001 | ] 1002 | }, 1003 | "execution_count": 52, 1004 | "metadata": {}, 1005 | "output_type": "execute_result" 1006 | } 1007 | ], 1008 | "source": [ 1009 | "arr=np.arange(0,11)\n", 1010 | "arr" 1011 | ] 1012 | }, 1013 | { 1014 | "cell_type": "code", 1015 | "execution_count": 53, 1016 | "metadata": {}, 1017 | "outputs": [ 1018 | { 1019 | "data": { 1020 | "text/plain": [ 1021 | "array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20])" 1022 | ] 1023 | }, 1024 | "execution_count": 53, 1025 | "metadata": {}, 1026 | "output_type": "execute_result" 1027 | } 1028 | ], 1029 | "source": [ 1030 | "arr+arr" 1031 | ] 1032 | }, 1033 | { 1034 | "cell_type": "code", 1035 | "execution_count": 54, 1036 | "metadata": {}, 1037 | "outputs": [ 1038 | { 1039 | "data": { 1040 | "text/plain": [ 1041 | "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" 1042 | ] 1043 | }, 1044 | "execution_count": 54, 1045 | "metadata": {}, 1046 | "output_type": "execute_result" 1047 | } 1048 | ], 1049 | "source": [ 1050 | "arr-arr" 1051 | ] 1052 | }, 1053 | { 1054 | "cell_type": "code", 1055 | "execution_count": 55, 1056 | "metadata": {}, 1057 | "outputs": [ 1058 | { 1059 | "data": { 1060 | "text/plain": [ 1061 | "array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100])" 1062 | ] 1063 | }, 1064 | "execution_count": 55, 1065 | "metadata": {}, 1066 | "output_type": "execute_result" 1067 | } 1068 | ], 1069 | "source": [ 1070 | "arr*arr" 1071 | ] 1072 | }, 1073 | { 1074 | "cell_type": "code", 1075 | "execution_count": 56, 1076 | "metadata": {}, 1077 | "outputs": [ 1078 | { 1079 | "data": { 1080 | "text/plain": [ 1081 | "array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110])" 1082 | ] 1083 | }, 1084 | "execution_count": 56, 1085 | "metadata": {}, 1086 | "output_type": "execute_result" 1087 | } 1088 | ], 1089 | "source": [ 1090 | "arr+100" 1091 | ] 1092 | }, 1093 | { 1094 | "cell_type": "code", 1095 | "execution_count": 57, 1096 | "metadata": {}, 1097 | "outputs": [ 1098 | { 1099 | "data": { 1100 | "text/plain": [ 1101 | "array([-100, -99, -98, -97, -96, -95, -94, -93, -92, -91, -90])" 1102 | ] 1103 | }, 1104 | "execution_count": 57, 1105 | "metadata": {}, 1106 | "output_type": "execute_result" 1107 | } 1108 | ], 1109 | "source": [ 1110 | "arr-100" 1111 | ] 1112 | }, 1113 | { 1114 | "cell_type": "code", 1115 | "execution_count": 59, 1116 | "metadata": {}, 1117 | "outputs": [ 1118 | { 1119 | "ename": "ZeroDivisionError", 1120 | "evalue": "division by zero", 1121 | "output_type": "error", 1122 | "traceback": [ 1123 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 1124 | "\u001b[0;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", 1125 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;36m1\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 1126 | "\u001b[0;31mZeroDivisionError\u001b[0m: division by zero" 1127 | ] 1128 | } 1129 | ], 1130 | "source": [ 1131 | "1/0" 1132 | ] 1133 | }, 1134 | { 1135 | "cell_type": "code", 1136 | "execution_count": 60, 1137 | "metadata": {}, 1138 | "outputs": [ 1139 | { 1140 | "name": "stderr", 1141 | "output_type": "stream", 1142 | "text": [ 1143 | ":2: RuntimeWarning: invalid value encountered in true_divide\n", 1144 | " arr/arr\n" 1145 | ] 1146 | }, 1147 | { 1148 | "data": { 1149 | "text/plain": [ 1150 | "array([nan, 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])" 1151 | ] 1152 | }, 1153 | "execution_count": 60, 1154 | "metadata": {}, 1155 | "output_type": "execute_result" 1156 | } 1157 | ], 1158 | "source": [ 1159 | "#numpy wont give you the above error\n", 1160 | "arr/arr" 1161 | ] 1162 | }, 1163 | { 1164 | "cell_type": "code", 1165 | "execution_count": 61, 1166 | "metadata": {}, 1167 | "outputs": [ 1168 | { 1169 | "data": { 1170 | "text/plain": [ 1171 | "array([0. , 1. , 1.41421356, 1.73205081, 2. ,\n", 1172 | " 2.23606798, 2.44948974, 2.64575131, 2.82842712, 3. ,\n", 1173 | " 3.16227766])" 1174 | ] 1175 | }, 1176 | "execution_count": 61, 1177 | "metadata": {}, 1178 | "output_type": "execute_result" 1179 | } 1180 | ], 1181 | "source": [ 1182 | "np.sqrt(arr)" 1183 | ] 1184 | }, 1185 | { 1186 | "cell_type": "code", 1187 | "execution_count": 63, 1188 | "metadata": {}, 1189 | "outputs": [ 1190 | { 1191 | "data": { 1192 | "text/plain": [ 1193 | "array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,\n", 1194 | " 5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,\n", 1195 | " 2.98095799e+03, 8.10308393e+03, 2.20264658e+04])" 1196 | ] 1197 | }, 1198 | "execution_count": 63, 1199 | "metadata": {}, 1200 | "output_type": "execute_result" 1201 | } 1202 | ], 1203 | "source": [ 1204 | "np.exp(arr)" 1205 | ] 1206 | }, 1207 | { 1208 | "cell_type": "code", 1209 | "execution_count": 65, 1210 | "metadata": {}, 1211 | "outputs": [ 1212 | { 1213 | "data": { 1214 | "text/plain": [ 1215 | "10" 1216 | ] 1217 | }, 1218 | "execution_count": 65, 1219 | "metadata": {}, 1220 | "output_type": "execute_result" 1221 | } 1222 | ], 1223 | "source": [ 1224 | "arr.max()" 1225 | ] 1226 | }, 1227 | { 1228 | "cell_type": "code", 1229 | "execution_count": 66, 1230 | "metadata": {}, 1231 | "outputs": [ 1232 | { 1233 | "data": { 1234 | "text/plain": [ 1235 | "10" 1236 | ] 1237 | }, 1238 | "execution_count": 66, 1239 | "metadata": {}, 1240 | "output_type": "execute_result" 1241 | } 1242 | ], 1243 | "source": [ 1244 | "np.max(arr)" 1245 | ] 1246 | }, 1247 | { 1248 | "cell_type": "code", 1249 | "execution_count": null, 1250 | "metadata": {}, 1251 | "outputs": [], 1252 | "source": [] 1253 | } 1254 | ], 1255 | "metadata": { 1256 | "kernelspec": { 1257 | "display_name": "Python 3", 1258 | "language": "python", 1259 | "name": "python3" 1260 | }, 1261 | "language_info": { 1262 | "codemirror_mode": { 1263 | "name": "ipython", 1264 | "version": 3 1265 | }, 1266 | "file_extension": ".py", 1267 | "mimetype": "text/x-python", 1268 | "name": "python", 1269 | "nbconvert_exporter": "python", 1270 | "pygments_lexer": "ipython3", 1271 | "version": "3.8.5" 1272 | } 1273 | }, 1274 | "nbformat": 4, 1275 | "nbformat_minor": 4 1276 | } 1277 | -------------------------------------------------------------------------------- /ML basics/Excel_Sample.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishikkkkaaaa/Python-ML/8c64a5d82314a5603e2e892ba991fabdee8932f2/ML basics/Excel_Sample.xlsx -------------------------------------------------------------------------------- /ML basics/Name: -------------------------------------------------------------------------------- 1 | a,b,c,d 2 | 0,1,2,3 3 | 4,5,6,7 4 | 8,9,10,11 5 | 12,13,14,15 6 | -------------------------------------------------------------------------------- /ML basics/SMALL PROJECTS/PANDAS/Ecommerce Purchases Exercise -checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "# Ecommerce Purchases Exercise" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import pandas as pd" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 3, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "ecom=pd.read_csv('Ecommerce Purchases')" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 4, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/html": [ 37 | "
\n", 38 | "\n", 51 | "\n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | "
AddressLotAM or PMBrowser InfoCompanyCredit CardCC Exp DateCC Security CodeCC ProviderEmailJobIP AddressLanguagePurchase Price
016629 Pace Camp Apt. 448\\nAlexisborough, NE 77...46 inPMOpera/9.56.(X11; Linux x86_64; sl-SI) Presto/2...Martinez-Herman601192906112340602/20900JCB 16 digitpdunlap@yahoo.comScientist, product/process development149.146.147.205el98.14
19374 Jasmine Spurs Suite 508\\nSouth John, TN 8...28 rnPMOpera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr...Fletcher, Richards and Whitaker333775816964535611/18561Mastercardanthony41@reed.comDrilling engineer15.160.41.51fr70.73
2Unit 0065 Box 5052\\nDPO AP 2745094 vEPMMozilla/5.0 (compatible; MSIE 9.0; Windows NT ...Simpson, Williams and Pham67595766612508/19699JCB 16 digitamymiller@morales-harrison.comCustomer service manager132.207.160.22de0.95
37780 Julia Fords\\nNew Stacy, WA 4579836 vmPMMozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ...Williams, Marshall and Buchanan601157850443071002/24384Discoverbrent16@olson-robinson.infoDrilling engineer30.250.74.19es78.04
423012 Munoz Drive Suite 337\\nNew Cynthia, TX 5...20 IEAMOpera/9.58.(X11; Linux x86_64; it-IT) Presto/2...Brown, Watson and Andrews601145662320799810/25678Diners Club / Carte Blanchechristopherwright@gmail.comFine artist24.140.33.94es77.82
\n", 159 | "
" 160 | ], 161 | "text/plain": [ 162 | " Address Lot AM or PM \\\n", 163 | "0 16629 Pace Camp Apt. 448\\nAlexisborough, NE 77... 46 in PM \n", 164 | "1 9374 Jasmine Spurs Suite 508\\nSouth John, TN 8... 28 rn PM \n", 165 | "2 Unit 0065 Box 5052\\nDPO AP 27450 94 vE PM \n", 166 | "3 7780 Julia Fords\\nNew Stacy, WA 45798 36 vm PM \n", 167 | "4 23012 Munoz Drive Suite 337\\nNew Cynthia, TX 5... 20 IE AM \n", 168 | "\n", 169 | " Browser Info \\\n", 170 | "0 Opera/9.56.(X11; Linux x86_64; sl-SI) Presto/2... \n", 171 | "1 Opera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr... \n", 172 | "2 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ... \n", 173 | "3 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ... \n", 174 | "4 Opera/9.58.(X11; Linux x86_64; it-IT) Presto/2... \n", 175 | "\n", 176 | " Company Credit Card CC Exp Date \\\n", 177 | "0 Martinez-Herman 6011929061123406 02/20 \n", 178 | "1 Fletcher, Richards and Whitaker 3337758169645356 11/18 \n", 179 | "2 Simpson, Williams and Pham 675957666125 08/19 \n", 180 | "3 Williams, Marshall and Buchanan 6011578504430710 02/24 \n", 181 | "4 Brown, Watson and Andrews 6011456623207998 10/25 \n", 182 | "\n", 183 | " CC Security Code CC Provider \\\n", 184 | "0 900 JCB 16 digit \n", 185 | "1 561 Mastercard \n", 186 | "2 699 JCB 16 digit \n", 187 | "3 384 Discover \n", 188 | "4 678 Diners Club / Carte Blanche \n", 189 | "\n", 190 | " Email Job \\\n", 191 | "0 pdunlap@yahoo.com Scientist, product/process development \n", 192 | "1 anthony41@reed.com Drilling engineer \n", 193 | "2 amymiller@morales-harrison.com Customer service manager \n", 194 | "3 brent16@olson-robinson.info Drilling engineer \n", 195 | "4 christopherwright@gmail.com Fine artist \n", 196 | "\n", 197 | " IP Address Language Purchase Price \n", 198 | "0 149.146.147.205 el 98.14 \n", 199 | "1 15.160.41.51 fr 70.73 \n", 200 | "2 132.207.160.22 de 0.95 \n", 201 | "3 30.250.74.19 es 78.04 \n", 202 | "4 24.140.33.94 es 77.82 " 203 | ] 204 | }, 205 | "execution_count": 4, 206 | "metadata": {}, 207 | "output_type": "execute_result" 208 | } 209 | ], 210 | "source": [ 211 | "ecom.head()" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "** How many rows and columns are there? **" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 6, 224 | "metadata": {}, 225 | "outputs": [ 226 | { 227 | "name": "stdout", 228 | "output_type": "stream", 229 | "text": [ 230 | "\n", 231 | "RangeIndex: 10000 entries, 0 to 9999\n", 232 | "Data columns (total 14 columns):\n", 233 | " # Column Non-Null Count Dtype \n", 234 | "--- ------ -------------- ----- \n", 235 | " 0 Address 10000 non-null object \n", 236 | " 1 Lot 10000 non-null object \n", 237 | " 2 AM or PM 10000 non-null object \n", 238 | " 3 Browser Info 10000 non-null object \n", 239 | " 4 Company 10000 non-null object \n", 240 | " 5 Credit Card 10000 non-null int64 \n", 241 | " 6 CC Exp Date 10000 non-null object \n", 242 | " 7 CC Security Code 10000 non-null int64 \n", 243 | " 8 CC Provider 10000 non-null object \n", 244 | " 9 Email 10000 non-null object \n", 245 | " 10 Job 10000 non-null object \n", 246 | " 11 IP Address 10000 non-null object \n", 247 | " 12 Language 10000 non-null object \n", 248 | " 13 Purchase Price 10000 non-null float64\n", 249 | "dtypes: float64(1), int64(2), object(11)\n", 250 | "memory usage: 1.1+ MB\n" 251 | ] 252 | } 253 | ], 254 | "source": [ 255 | "ecom.info()" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "** What is the average Purchase Price? **" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 7, 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "data": { 272 | "text/plain": [ 273 | "50.34730200000025" 274 | ] 275 | }, 276 | "execution_count": 7, 277 | "metadata": {}, 278 | "output_type": "execute_result" 279 | } 280 | ], 281 | "source": [ 282 | "ecom['Purchase Price'].mean()" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "** What were the highest and lowest purchase prices? **" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 8, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "data": { 299 | "text/plain": [ 300 | "99.99" 301 | ] 302 | }, 303 | "execution_count": 8, 304 | "metadata": {}, 305 | "output_type": "execute_result" 306 | } 307 | ], 308 | "source": [ 309 | "ecom['Purchase Price'].max()" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 9, 315 | "metadata": {}, 316 | "outputs": [ 317 | { 318 | "data": { 319 | "text/plain": [ 320 | "0.0" 321 | ] 322 | }, 323 | "execution_count": 9, 324 | "metadata": {}, 325 | "output_type": "execute_result" 326 | } 327 | ], 328 | "source": [ 329 | "ecom['Purchase Price'].min()" 330 | ] 331 | }, 332 | { 333 | "cell_type": "markdown", 334 | "metadata": {}, 335 | "source": [ 336 | "** How many people have English 'en' as their Language of choice on the website? **" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 11, 342 | "metadata": {}, 343 | "outputs": [ 344 | { 345 | "data": { 346 | "text/plain": [ 347 | "0 False\n", 348 | "1 False\n", 349 | "2 False\n", 350 | "3 False\n", 351 | "4 False\n", 352 | " ... \n", 353 | "9995 False\n", 354 | "9996 False\n", 355 | "9997 False\n", 356 | "9998 False\n", 357 | "9999 False\n", 358 | "Name: Language, Length: 10000, dtype: bool" 359 | ] 360 | }, 361 | "execution_count": 11, 362 | "metadata": {}, 363 | "output_type": "execute_result" 364 | } 365 | ], 366 | "source": [ 367 | "ecom['Language']=='en'" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 13, 373 | "metadata": {}, 374 | "outputs": [ 375 | { 376 | "data": { 377 | "text/plain": [ 378 | "Address 1098\n", 379 | "Lot 1098\n", 380 | "AM or PM 1098\n", 381 | "Browser Info 1098\n", 382 | "Company 1098\n", 383 | "Credit Card 1098\n", 384 | "CC Exp Date 1098\n", 385 | "CC Security Code 1098\n", 386 | "CC Provider 1098\n", 387 | "Email 1098\n", 388 | "Job 1098\n", 389 | "IP Address 1098\n", 390 | "Language 1098\n", 391 | "Purchase Price 1098\n", 392 | "dtype: int64" 393 | ] 394 | }, 395 | "execution_count": 13, 396 | "metadata": {}, 397 | "output_type": "execute_result" 398 | } 399 | ], 400 | "source": [ 401 | "ecom[ecom['Language']=='en'].count()" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "** How many people have the job title of \"Lawyer\" ? **\n" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 15, 414 | "metadata": {}, 415 | "outputs": [ 416 | { 417 | "name": "stdout", 418 | "output_type": "stream", 419 | "text": [ 420 | "\n", 421 | "Int64Index: 30 entries, 470 to 9979\n", 422 | "Data columns (total 14 columns):\n", 423 | " # Column Non-Null Count Dtype \n", 424 | "--- ------ -------------- ----- \n", 425 | " 0 Address 30 non-null object \n", 426 | " 1 Lot 30 non-null object \n", 427 | " 2 AM or PM 30 non-null object \n", 428 | " 3 Browser Info 30 non-null object \n", 429 | " 4 Company 30 non-null object \n", 430 | " 5 Credit Card 30 non-null int64 \n", 431 | " 6 CC Exp Date 30 non-null object \n", 432 | " 7 CC Security Code 30 non-null int64 \n", 433 | " 8 CC Provider 30 non-null object \n", 434 | " 9 Email 30 non-null object \n", 435 | " 10 Job 30 non-null object \n", 436 | " 11 IP Address 30 non-null object \n", 437 | " 12 Language 30 non-null object \n", 438 | " 13 Purchase Price 30 non-null float64\n", 439 | "dtypes: float64(1), int64(2), object(11)\n", 440 | "memory usage: 3.5+ KB\n" 441 | ] 442 | } 443 | ], 444 | "source": [ 445 | "ecom[ecom['Job']=='Lawyer'].info()" 446 | ] 447 | }, 448 | { 449 | "cell_type": "markdown", 450 | "metadata": {}, 451 | "source": [ 452 | "** How many people made the purchase during the AM and how many people made the purchase during PM ? **\n", 453 | "\n" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 24, 459 | "metadata": {}, 460 | "outputs": [ 461 | { 462 | "data": { 463 | "text/plain": [ 464 | "PM 5068\n", 465 | "AM 4932\n", 466 | "Name: AM or PM, dtype: int64" 467 | ] 468 | }, 469 | "execution_count": 24, 470 | "metadata": {}, 471 | "output_type": "execute_result" 472 | } 473 | ], 474 | "source": [ 475 | "ecom['AM or PM'].value_counts()" 476 | ] 477 | }, 478 | { 479 | "cell_type": "markdown", 480 | "metadata": {}, 481 | "source": [ 482 | "** What are the 5 most common Job Titles? **" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": 26, 488 | "metadata": {}, 489 | "outputs": [ 490 | { 491 | "data": { 492 | "text/plain": [ 493 | "Interior and spatial designer 31\n", 494 | "Lawyer 30\n", 495 | "Social researcher 28\n", 496 | "Research officer, political party 27\n", 497 | "Designer, jewellery 27\n", 498 | "Name: Job, dtype: int64" 499 | ] 500 | }, 501 | "execution_count": 26, 502 | "metadata": {}, 503 | "output_type": "execute_result" 504 | } 505 | ], 506 | "source": [ 507 | "ecom['Job'].value_counts().head(5)" 508 | ] 509 | }, 510 | { 511 | "cell_type": "markdown", 512 | "metadata": {}, 513 | "source": [ 514 | "** Someone made a purchase that came from Lot: \"90 WT\" , what was the Purchase Price for this transaction? **" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": 27, 520 | "metadata": {}, 521 | "outputs": [ 522 | { 523 | "data": { 524 | "text/plain": [ 525 | "513 75.1\n", 526 | "Name: Purchase Price, dtype: float64" 527 | ] 528 | }, 529 | "execution_count": 27, 530 | "metadata": {}, 531 | "output_type": "execute_result" 532 | } 533 | ], 534 | "source": [ 535 | "ecom[ecom['Lot']=='90 WT']['Purchase Price']" 536 | ] 537 | }, 538 | { 539 | "cell_type": "markdown", 540 | "metadata": {}, 541 | "source": [ 542 | "** What is the email of the person with the following Credit Card Number: 4926535242672853 **" 543 | ] 544 | }, 545 | { 546 | "cell_type": "code", 547 | "execution_count": 31, 548 | "metadata": {}, 549 | "outputs": [ 550 | { 551 | "data": { 552 | "text/plain": [ 553 | "1234 bondellen@williams-garza.com\n", 554 | "Name: Email, dtype: object" 555 | ] 556 | }, 557 | "execution_count": 31, 558 | "metadata": {}, 559 | "output_type": "execute_result" 560 | } 561 | ], 562 | "source": [ 563 | "ecom[ecom['Credit Card']==4926535242672853]['Email']" 564 | ] 565 | }, 566 | { 567 | "cell_type": "markdown", 568 | "metadata": {}, 569 | "source": [ 570 | "** How many people have American Express as their Credit Card Provider *and* made a purchase above $95 ?**" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": 32, 576 | "metadata": {}, 577 | "outputs": [ 578 | { 579 | "data": { 580 | "text/plain": [ 581 | "Address 39\n", 582 | "Lot 39\n", 583 | "AM or PM 39\n", 584 | "Browser Info 39\n", 585 | "Company 39\n", 586 | "Credit Card 39\n", 587 | "CC Exp Date 39\n", 588 | "CC Security Code 39\n", 589 | "CC Provider 39\n", 590 | "Email 39\n", 591 | "Job 39\n", 592 | "IP Address 39\n", 593 | "Language 39\n", 594 | "Purchase Price 39\n", 595 | "dtype: int64" 596 | ] 597 | }, 598 | "execution_count": 32, 599 | "metadata": {}, 600 | "output_type": "execute_result" 601 | } 602 | ], 603 | "source": [ 604 | "ecom[(ecom['CC Provider']=='American Express') & (ecom['Purchase Price']>95)].count()" 605 | ] 606 | }, 607 | { 608 | "cell_type": "markdown", 609 | "metadata": {}, 610 | "source": [ 611 | "** How many people have a credit card that expires in 2025? **" 612 | ] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": 37, 617 | "metadata": {}, 618 | "outputs": [ 619 | { 620 | "data": { 621 | "text/plain": [ 622 | "1033" 623 | ] 624 | }, 625 | "execution_count": 37, 626 | "metadata": {}, 627 | "output_type": "execute_result" 628 | } 629 | ], 630 | "source": [ 631 | "sum(ecom['CC Exp Date'].apply(lambda x: x[3:]) == '25')" 632 | ] 633 | }, 634 | { 635 | "cell_type": "markdown", 636 | "metadata": {}, 637 | "source": [ 638 | "** What are the top 5 most popular email providers/hosts (e.g. gmail.com, yahoo.com, etc...) **" 639 | ] 640 | }, 641 | { 642 | "cell_type": "code", 643 | "execution_count": 39, 644 | "metadata": {}, 645 | "outputs": [ 646 | { 647 | "data": { 648 | "text/plain": [ 649 | "hotmail.com 1638\n", 650 | "yahoo.com 1616\n", 651 | "gmail.com 1605\n", 652 | "smith.com 42\n", 653 | "williams.com 37\n", 654 | "Name: Email, dtype: int64" 655 | ] 656 | }, 657 | "execution_count": 39, 658 | "metadata": {}, 659 | "output_type": "execute_result" 660 | } 661 | ], 662 | "source": [ 663 | "ecom['Email'].apply(lambda x:x.split('@')[1]).value_counts().head(5)" 664 | ] 665 | } 666 | ], 667 | "metadata": { 668 | "kernelspec": { 669 | "display_name": "Python 3", 670 | "language": "python", 671 | "name": "python3" 672 | }, 673 | "language_info": { 674 | "codemirror_mode": { 675 | "name": "ipython", 676 | "version": 3 677 | }, 678 | "file_extension": ".py", 679 | "mimetype": "text/x-python", 680 | "name": "python", 681 | "nbconvert_exporter": "python", 682 | "pygments_lexer": "ipython3", 683 | "version": "3.8.5" 684 | } 685 | }, 686 | "nbformat": 4, 687 | "nbformat_minor": 1 688 | } 689 | -------------------------------------------------------------------------------- /ML basics/SMALL PROJECTS/PANDAS/SF Salaries Exercise.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# SF Salaries Exercise \n", 8 | "\n", 9 | "[SF Salaries Dataset](https://www.kaggle.com/kaggle/sf-salaries) " 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 10, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "** Read Salaries.csv as a dataframe called sal.**" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 15, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "sal=pd.read_csv('salaries.csv')" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "** Check the head of the DataFrame. **" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 16, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/html": [ 52 | "
\n", 53 | "\n", 66 | "\n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
\n", 168 | "
" 169 | ], 170 | "text/plain": [ 171 | " Id EmployeeName JobTitle \\\n", 172 | "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", 173 | "1 2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", 174 | "2 3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", 175 | "3 4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", 176 | "4 5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", 177 | "\n", 178 | " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", 179 | "0 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n", 180 | "1 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", 181 | "2 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n", 182 | "3 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n", 183 | "4 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n", 184 | "\n", 185 | " Year Notes Agency Status \n", 186 | "0 2011 NaN San Francisco NaN \n", 187 | "1 2011 NaN San Francisco NaN \n", 188 | "2 2011 NaN San Francisco NaN \n", 189 | "3 2011 NaN San Francisco NaN \n", 190 | "4 2011 NaN San Francisco NaN " 191 | ] 192 | }, 193 | "execution_count": 16, 194 | "metadata": {}, 195 | "output_type": "execute_result" 196 | } 197 | ], 198 | "source": [ 199 | "sal.head()" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "** Use the .info() method to find out how many entries there are.**" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 17, 212 | "metadata": {}, 213 | "outputs": [ 214 | { 215 | "name": "stdout", 216 | "output_type": "stream", 217 | "text": [ 218 | "\n", 219 | "RangeIndex: 148654 entries, 0 to 148653\n", 220 | "Data columns (total 13 columns):\n", 221 | " # Column Non-Null Count Dtype \n", 222 | "--- ------ -------------- ----- \n", 223 | " 0 Id 148654 non-null int64 \n", 224 | " 1 EmployeeName 148654 non-null object \n", 225 | " 2 JobTitle 148654 non-null object \n", 226 | " 3 BasePay 148045 non-null float64\n", 227 | " 4 OvertimePay 148650 non-null float64\n", 228 | " 5 OtherPay 148650 non-null float64\n", 229 | " 6 Benefits 112491 non-null float64\n", 230 | " 7 TotalPay 148654 non-null float64\n", 231 | " 8 TotalPayBenefits 148654 non-null float64\n", 232 | " 9 Year 148654 non-null int64 \n", 233 | " 10 Notes 0 non-null float64\n", 234 | " 11 Agency 148654 non-null object \n", 235 | " 12 Status 0 non-null float64\n", 236 | "dtypes: float64(8), int64(2), object(3)\n", 237 | "memory usage: 14.7+ MB\n" 238 | ] 239 | } 240 | ], 241 | "source": [ 242 | "sal.info()" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "**What is the average BasePay ?**" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 18, 255 | "metadata": {}, 256 | "outputs": [ 257 | { 258 | "data": { 259 | "text/plain": [ 260 | "66325.44884050643" 261 | ] 262 | }, 263 | "execution_count": 18, 264 | "metadata": {}, 265 | "output_type": "execute_result" 266 | } 267 | ], 268 | "source": [ 269 | "sal['BasePay'].mean()" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "** What is the highest amount of OvertimePay in the dataset ? **" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 19, 282 | "metadata": {}, 283 | "outputs": [ 284 | { 285 | "data": { 286 | "text/plain": [ 287 | "245131.88" 288 | ] 289 | }, 290 | "execution_count": 19, 291 | "metadata": {}, 292 | "output_type": "execute_result" 293 | } 294 | ], 295 | "source": [ 296 | "sal['OvertimePay'].max()" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "** What is the job title of JOSEPH DRISCOLL ? Note: Use all caps, otherwise you may get an answer that doesn't match up (there is also a lowercase Joseph Driscoll). **" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 31, 309 | "metadata": {}, 310 | "outputs": [ 311 | { 312 | "data": { 313 | "text/plain": [ 314 | "24 CAPTAIN, FIRE SUPPRESSION\n", 315 | "Name: JobTitle, dtype: object" 316 | ] 317 | }, 318 | "execution_count": 31, 319 | "metadata": {}, 320 | "output_type": "execute_result" 321 | } 322 | ], 323 | "source": [ 324 | "sal[sal['EmployeeName']=='JOSEPH DRISCOLL']['JobTitle']" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "** How much does JOSEPH DRISCOLL make (including benefits)? **" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 32, 337 | "metadata": {}, 338 | "outputs": [ 339 | { 340 | "data": { 341 | "text/plain": [ 342 | "24 270324.91\n", 343 | "Name: TotalPayBenefits, dtype: float64" 344 | ] 345 | }, 346 | "execution_count": 32, 347 | "metadata": {}, 348 | "output_type": "execute_result" 349 | } 350 | ], 351 | "source": [ 352 | "sal[sal['EmployeeName']=='JOSEPH DRISCOLL']['TotalPayBenefits']" 353 | ] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "metadata": {}, 358 | "source": [ 359 | "** What is the name of highest paid person (including benefits)?**" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": 41, 365 | "metadata": {}, 366 | "outputs": [ 367 | { 368 | "data": { 369 | "text/plain": [ 370 | "0 True\n", 371 | "1 False\n", 372 | "2 False\n", 373 | "3 False\n", 374 | "4 False\n", 375 | " ... \n", 376 | "148649 False\n", 377 | "148650 False\n", 378 | "148651 False\n", 379 | "148652 False\n", 380 | "148653 False\n", 381 | "Name: TotalPayBenefits, Length: 148654, dtype: bool" 382 | ] 383 | }, 384 | "execution_count": 41, 385 | "metadata": {}, 386 | "output_type": "execute_result" 387 | } 388 | ], 389 | "source": [ 390 | "sal['TotalPayBenefits']==sal['TotalPayBenefits'].max()" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": 42, 396 | "metadata": {}, 397 | "outputs": [ 398 | { 399 | "data": { 400 | "text/html": [ 401 | "
\n", 402 | "\n", 415 | "\n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.0400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
\n", 453 | "
" 454 | ], 455 | "text/plain": [ 456 | " Id EmployeeName JobTitle \\\n", 457 | "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", 458 | "\n", 459 | " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", 460 | "0 167411.18 0.0 400184.25 NaN 567595.43 567595.43 \n", 461 | "\n", 462 | " Year Notes Agency Status \n", 463 | "0 2011 NaN San Francisco NaN " 464 | ] 465 | }, 466 | "execution_count": 42, 467 | "metadata": {}, 468 | "output_type": "execute_result" 469 | } 470 | ], 471 | "source": [ 472 | "sal[sal['TotalPayBenefits']==sal['TotalPayBenefits'].max()]" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": 44, 478 | "metadata": {}, 479 | "outputs": [ 480 | { 481 | "data": { 482 | "text/plain": [ 483 | "0 NATHANIEL FORD\n", 484 | "Name: EmployeeName, dtype: object" 485 | ] 486 | }, 487 | "execution_count": 44, 488 | "metadata": {}, 489 | "output_type": "execute_result" 490 | } 491 | ], 492 | "source": [ 493 | "sal[sal['TotalPayBenefits']==sal['TotalPayBenefits'].max()]['EmployeeName']" 494 | ] 495 | }, 496 | { 497 | "cell_type": "markdown", 498 | "metadata": {}, 499 | "source": [ 500 | "** What is the name of lowest paid person (including benefits)?**" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": 47, 506 | "metadata": {}, 507 | "outputs": [ 508 | { 509 | "data": { 510 | "text/plain": [ 511 | "148653 Joe Lopez\n", 512 | "Name: EmployeeName, dtype: object" 513 | ] 514 | }, 515 | "execution_count": 47, 516 | "metadata": {}, 517 | "output_type": "execute_result" 518 | } 519 | ], 520 | "source": [ 521 | "sal[sal['TotalPayBenefits']==sal['TotalPayBenefits'].min()]['EmployeeName']" 522 | ] 523 | }, 524 | { 525 | "cell_type": "markdown", 526 | "metadata": {}, 527 | "source": [ 528 | "** What was the average (mean) BasePay of all employees per year? (2011-2014) ? **" 529 | ] 530 | }, 531 | { 532 | "cell_type": "code", 533 | "execution_count": 48, 534 | "metadata": {}, 535 | "outputs": [ 536 | { 537 | "data": { 538 | "text/plain": [ 539 | "Year\n", 540 | "2011 63595.956517\n", 541 | "2012 65436.406857\n", 542 | "2013 69630.030216\n", 543 | "2014 66564.421924\n", 544 | "Name: BasePay, dtype: float64" 545 | ] 546 | }, 547 | "execution_count": 48, 548 | "metadata": {}, 549 | "output_type": "execute_result" 550 | } 551 | ], 552 | "source": [ 553 | "sal.groupby('Year').mean()['BasePay']" 554 | ] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": 51, 559 | "metadata": {}, 560 | "outputs": [ 561 | { 562 | "data": { 563 | "text/plain": [ 564 | "array([2011, 2012, 2013, 2014])" 565 | ] 566 | }, 567 | "execution_count": 51, 568 | "metadata": {}, 569 | "output_type": "execute_result" 570 | } 571 | ], 572 | "source": [ 573 | "sal['Year'].unique()" 574 | ] 575 | }, 576 | { 577 | "cell_type": "markdown", 578 | "metadata": {}, 579 | "source": [ 580 | "** How many unique job titles are there? **" 581 | ] 582 | }, 583 | { 584 | "cell_type": "code", 585 | "execution_count": 52, 586 | "metadata": {}, 587 | "outputs": [ 588 | { 589 | "data": { 590 | "text/plain": [ 591 | "2159" 592 | ] 593 | }, 594 | "execution_count": 52, 595 | "metadata": {}, 596 | "output_type": "execute_result" 597 | } 598 | ], 599 | "source": [ 600 | "sal['JobTitle'].nunique()" 601 | ] 602 | }, 603 | { 604 | "cell_type": "markdown", 605 | "metadata": {}, 606 | "source": [ 607 | "** What are the top 5 most common jobs? **" 608 | ] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "execution_count": 54, 613 | "metadata": {}, 614 | "outputs": [ 615 | { 616 | "data": { 617 | "text/plain": [ 618 | "Transit Operator 7036\n", 619 | "Special Nurse 4389\n", 620 | "Registered Nurse 3736\n", 621 | "Public Svc Aide-Public Works 2518\n", 622 | "Police Officer 3 2421\n", 623 | "Name: JobTitle, dtype: int64" 624 | ] 625 | }, 626 | "execution_count": 54, 627 | "metadata": {}, 628 | "output_type": "execute_result" 629 | } 630 | ], 631 | "source": [ 632 | "sal['JobTitle'].value_counts().head(5)" 633 | ] 634 | }, 635 | { 636 | "cell_type": "markdown", 637 | "metadata": {}, 638 | "source": [ 639 | "** How many Job Titles were represented by only one person in 2013? (e.g. Job Titles with only one occurence in 2013?) **" 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": 55, 645 | "metadata": {}, 646 | "outputs": [ 647 | { 648 | "data": { 649 | "text/plain": [ 650 | "0 False\n", 651 | "1 False\n", 652 | "2 False\n", 653 | "3 False\n", 654 | "4 False\n", 655 | " ... \n", 656 | "148649 False\n", 657 | "148650 False\n", 658 | "148651 False\n", 659 | "148652 False\n", 660 | "148653 False\n", 661 | "Name: Year, Length: 148654, dtype: bool" 662 | ] 663 | }, 664 | "execution_count": 55, 665 | "metadata": {}, 666 | "output_type": "execute_result" 667 | } 668 | ], 669 | "source": [ 670 | "sal['Year']==2013" 671 | ] 672 | }, 673 | { 674 | "cell_type": "code", 675 | "execution_count": 60, 676 | "metadata": {}, 677 | "outputs": [ 678 | { 679 | "data": { 680 | "text/plain": [ 681 | "202" 682 | ] 683 | }, 684 | "execution_count": 60, 685 | "metadata": {}, 686 | "output_type": "execute_result" 687 | } 688 | ], 689 | "source": [ 690 | "sum(sal[sal['Year']==2013]['JobTitle'].value_counts() == 1)" 691 | ] 692 | }, 693 | { 694 | "cell_type": "markdown", 695 | "metadata": {}, 696 | "source": [ 697 | "** How many people have the word Chief in their job title? (This is pretty tricky) **" 698 | ] 699 | }, 700 | { 701 | "cell_type": "code", 702 | "execution_count": 64, 703 | "metadata": {}, 704 | "outputs": [], 705 | "source": [ 706 | "#custom function\n", 707 | "def Chief_S(title):\n", 708 | " if 'chief' in title.lower().split():\n", 709 | " return True\n", 710 | " else:\n", 711 | " return False" 712 | ] 713 | }, 714 | { 715 | "cell_type": "code", 716 | "execution_count": 65, 717 | "metadata": {}, 718 | "outputs": [ 719 | { 720 | "data": { 721 | "text/plain": [ 722 | "477" 723 | ] 724 | }, 725 | "execution_count": 65, 726 | "metadata": {}, 727 | "output_type": "execute_result" 728 | } 729 | ], 730 | "source": [ 731 | "sum(sal['JobTitle'].apply(Chief_S))\n", 732 | "#alt-> sum(sal['JobTitle'].apply(lambda x:Chief_S(x)))" 733 | ] 734 | }, 735 | { 736 | "cell_type": "markdown", 737 | "metadata": {}, 738 | "source": [ 739 | "** Is there a correlation between length of the Job Title string and Salary? **" 740 | ] 741 | }, 742 | { 743 | "cell_type": "code", 744 | "execution_count": 66, 745 | "metadata": {}, 746 | "outputs": [], 747 | "source": [ 748 | "sal['title_len']=sal['JobTitle'].apply(len)" 749 | ] 750 | }, 751 | { 752 | "cell_type": "code", 753 | "execution_count": 68, 754 | "metadata": {}, 755 | "outputs": [ 756 | { 757 | "data": { 758 | "text/html": [ 759 | "
\n", 760 | "\n", 773 | "\n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | "
title_len
title_len1.0
\n", 787 | "
" 788 | ], 789 | "text/plain": [ 790 | " title_len\n", 791 | "title_len 1.0" 792 | ] 793 | }, 794 | "execution_count": 68, 795 | "metadata": {}, 796 | "output_type": "execute_result" 797 | } 798 | ], 799 | "source": [ 800 | "sal[['title_len','JobTitle']].corr()" 801 | ] 802 | } 803 | ], 804 | "metadata": { 805 | "kernelspec": { 806 | "display_name": "Python 3", 807 | "language": "python", 808 | "name": "python3" 809 | }, 810 | "language_info": { 811 | "codemirror_mode": { 812 | "name": "ipython", 813 | "version": 3 814 | }, 815 | "file_extension": ".py", 816 | "mimetype": "text/x-python", 817 | "name": "python", 818 | "nbconvert_exporter": "python", 819 | "pygments_lexer": "ipython3", 820 | "version": "3.8.5" 821 | } 822 | }, 823 | "nbformat": 4, 824 | "nbformat_minor": 1 825 | } 826 | -------------------------------------------------------------------------------- /ML basics/df2: -------------------------------------------------------------------------------- 1 | a,b,c,d 2 | 0.039761986133905136,0.2185172274750622,0.10342298051665423,0.9579042338107532 3 | 0.9372879037285884,0.04156728027953449,0.8991254222382951,0.9776795571253272 4 | 0.7805044779316328,0.008947537857148302,0.5578084027546968,0.7975104497549266 5 | 0.6727174963492204,0.24786984946279625,0.2640713103088026,0.44435791644122935 6 | 0.05382860859967886,0.5201244020579979,0.5522642392797277,0.19000759632053632 7 | 0.2860433671280178,0.5934650440000543,0.9073072637456548,0.6378977150631427 8 | 0.4304355863327313,0.16623013749421356,0.4693825447762464,0.4977008828313123 9 | 0.3122955538295512,0.5028232900921878,0.8066087010958843,0.8505190941429479 10 | 0.1877648514121828,0.9970746427719338,0.8959552961495315,0.530390137569463 11 | 0.9081621790575398,0.23272641071536715,0.4141382611943452,0.4320069001558664 12 | -------------------------------------------------------------------------------- /ML basics/df3: -------------------------------------------------------------------------------- 1 | a,b,c,d 2 | 0.33627233637218457,0.3250110687231613,0.0010196408377848298,0.40140189720154196 3 | 0.9802649683525543,0.8318353550307083,0.7722883679048234,0.0764853766737329 4 | 0.4803872425787493,0.6868393727588189,0.0005746529724915961,0.7467584765703297 5 | 0.5021060966555528,0.305141589099338,0.7686084672112038,0.6546851999553737 6 | 0.856602037495124,0.17144842884142553,0.1579712921580272,0.321231483243839 7 | 0.705973481075289,0.6326888750520957,0.5040165014387529,0.313622137930175 8 | 0.16672441593320897,0.26973375516469766,0.6085693621704945,0.8744271309803985 9 | 0.054040593105187384,0.9664396927503365,0.06559570391295622,0.10944814243870982 10 | 0.7578390928377232,0.5238852910755655,0.5272102788067565,0.22693647922759852 11 | 0.4253833221643635,0.21370125617072533,0.04902015655465275,0.2875979408000324 12 | 0.17683680950823633,0.16653356599751545,0.7793735548048444,0.9364861878339583 13 | 0.22154585464980914,0.6608213750146527,0.8117815897698992,0.39256501309009495 14 | 0.0013503699623585996,0.45018589348741,0.07646946429641399,0.7533679412337956 15 | 0.5373231464576197,0.6237788989227707,0.07185135746180549,0.7512000510131361 16 | 0.03228400416591837,0.050181912070182855,0.8975624866722012,0.6141292789106866 17 | 0.48880423301062903,0.091374385826271,0.20380306481467603,0.17970794983894278 18 | 0.8211155924257206,0.19140670507340207,0.7569623666417657,0.5923832772664127 19 | 0.202698689394757,0.10397814468661737,0.3422364309240138,0.7876262313490752 20 | 0.18521752139529557,0.7515544279051898,0.6127372143065575,0.08842911001592957 21 | 0.8428103260014432,0.8423568831968558,0.20658856394692127,0.3899396257210873 22 | 0.8440331226076548,0.4499813871558678,0.13219758424637973,0.10393829250389419 23 | 0.16387799113366353,0.11235392778846287,0.2190873453625617,0.059326947648009165 24 | 0.33190219209756044,0.8080268099068179,0.5101723811744628,0.3875393176456404 25 | 0.5016420034152848,0.9413004247947276,0.3237529405161287,0.6909106696226429 26 | 0.37151955223345456,0.8763309746266572,0.14435751886650183,0.594585432452638 27 | 0.994328979706033,0.4147718333263406,0.4402798249423783,0.547651126220843 28 | 0.00679971308197036,0.4603723951885834,0.09186946680903141,0.5517256896624096 29 | 0.2038574471303466,0.30806922777314416,0.24075597273301397,0.9809566340486252 30 | 0.6057790651048847,0.380763712827924,0.7929227833553262,0.8916493533755211 31 | 0.4773044457751652,0.18348921227398018,0.8665347875764602,0.20115355252830047 32 | 0.5562623859121136,0.12741251095832018,0.5668972900386939,0.37582831435355124 33 | 0.6094551685345265,0.4238303848935505,0.06312888353038026,0.9296411624795633 34 | 0.2830144384308311,0.34994725136301763,0.7866534502340554,0.5685738387259147 35 | 0.6083096069069768,0.8547908027081947,0.1394453009020623,0.4875846563151508 36 | 0.7950019425815656,0.921310174399855,0.7131410421635475,0.6854997471830577 37 | 0.7029167242052079,0.9942208895868356,0.1578293248971958,0.7526027061070005 38 | 0.009322285637569871,0.8523851062431688,0.9869823482550834,0.34136544005312386 39 | 0.12852080402532928,0.596241240318469,0.3968413103480981,0.7848552860308265 40 | 0.002236448759129761,0.5637356470041152,0.8886645729272742,0.37386485230236577 41 | 0.7842827419476831,0.4992192667366049,0.45170232654581244,0.3770787550311161 42 | 0.9158907842654865,0.6409957558601469,0.49568773910025465,0.20171709368134993 43 | 0.19497218789426773,0.07799977284048609,0.45164626525047646,0.41569643117464683 44 | 0.18132400560860884,0.5661707175707383,0.6056319811909233,0.4165675770381041 45 | 0.38488773142764265,0.9821137122971909,0.5027171764378315,0.813578153903196 46 | 0.293214680847236,0.13178425655004344,0.653358739460729,0.9163419033861513 47 | 0.24049964437381832,0.25836815014457226,0.8576721350402634,0.6353489072771664 48 | 0.7845072207604514,0.5549383922577203,0.9430359879805484,0.020917179653205498 49 | 0.4890808014256227,0.6437666545983554,0.04664450518739238,0.9578885394454532 50 | 0.4228457628977761,0.8689120616410727,0.7972507965819937,0.019856966453678848 51 | 0.9241867489910823,0.41850080577561566,0.4394385762716323,0.8024008068527704 52 | 0.8305625919210394,0.3108978272568762,0.6129356703058603,0.8716646405910411 53 | 0.09594336918253688,0.019040152853489145,0.8979912879493811,0.84308872060182 54 | 0.009327986936045307,0.8866217728884342,0.4970262702960242,0.5585917930674684 55 | 0.7659452276041034,0.021081125490953756,0.05379451175469152,0.13182088392280134 56 | 0.5137324258205391,0.03507429771742998,0.6188835497462247,0.47412112348613267 57 | 0.11358486552277647,0.5406403944158276,0.0729449248495877,0.5527552831548942 58 | 0.08956748867615882,0.11654922503912224,0.46266652129506614,0.32642602885962135 59 | 0.4211171586740562,0.6632165454855101,0.6309662913224898,0.8286446436953286 60 | 0.5637355341677186,0.9629205938326909,0.48836102148628124,0.179301852727608 61 | 0.5665145754302383,0.47465892280959876,0.7069399194676959,0.6843231102049017 62 | 0.7919669843727317,0.41286717347824753,0.2199075673075842,0.7190515874041887 63 | 0.4540253976165326,0.8591098044317296,0.05641593355299912,0.052499941434040465 64 | 0.8676454047696701,0.8060309968112127,0.7976554814027406,0.3095319522235279 65 | 0.4034305635707125,0.03613418459915807,0.3325763107210705,0.9512794915551748 66 | 0.20653241823964685,0.6811942258264632,0.1388068478997746,0.907449529319709 67 | 0.9099121743964399,0.6749347933657847,0.7193643552406551,0.5879654995595851 68 | 0.9521397233570815,0.3402404046288864,0.7324666461948954,0.8387632211906879 69 | 0.8421351523246728,0.961220138564085,0.4778471184712483,0.5306040734200057 70 | 0.7258327483128492,0.12740590844845978,0.5227115974518366,0.9165137051199156 71 | 0.39726230939877916,0.9205270920019915,0.37890315076336945,0.8293212927684726 72 | 0.5096842992720936,0.34492156831348975,0.21972392428167264,0.3239206526071199 73 | 0.41923610692712787,0.20316889038447328,0.6723821379431006,0.8138336994323886 74 | 0.5401022043036029,0.9921912472869507,0.07155264815748197,0.22235205692332194 75 | 0.6326231970067655,0.6711395741968569,0.5362098362111336,0.1269112826214921 76 | 0.0767867395670696,0.3565016862470717,0.25388441759642566,0.4242950025417688 77 | 0.4874105739319312,0.25271393595678326,0.7120803949787771,0.6989636412956753 78 | 0.865351123549325,0.47276683251320106,0.349365609886252,0.970488584408135 79 | 0.17522331971527527,0.858030698062132,0.014161641133221714,0.9590157903088603 80 | 0.5219950450807564,0.564292482855931,0.991807593090961,0.8793987668720515 81 | 0.8541851689759502,0.4312804455508483,0.9057447902854223,0.8400308135512039 82 | 0.24962152163352236,0.29976093447523944,0.016659801635567795,0.6113981828515243 83 | 0.5256246254031416,0.7025797018077921,0.45370147271924066,0.21847018115707095 84 | 0.7773168862825681,0.5807690145037084,0.05917681562835819,0.12207149221630365 85 | 0.2969398693899853,0.7101837014089161,0.10264566154018484,0.03802663652391913 86 | 0.13174726635317047,0.4049640420658098,0.10265639488345568,0.6313356859556932 87 | 0.5774537613732307,0.7950091343310087,0.6254072054481303,0.782880581402316 88 | 0.4569088828401272,0.3368057405210615,0.7644526823895249,0.7169545580826315 89 | 0.04405209275176336,0.2643936018285221,0.9125333461775873,0.14914103967117964 90 | 0.9507443774090862,0.027600609976992874,0.8764198054026865,0.879444004820502 91 | 0.5842083305136452,0.6274659945637009,0.5367451676231368,0.392889215931417 92 | 0.8955122098248628,0.7537782106513566,0.1604911709350293,0.5859829839384009 93 | 0.5952961818963642,0.7468739882549703,0.24275681153732187,0.5290670495598715 94 | 0.8312535242920208,0.1917211521709924,0.9220747852006578,0.1970401675822976 95 | 0.46147635658104025,0.2036694441526835,0.9732520838928433,0.09224453800897414 96 | 0.09164896639336784,0.5623431916696897,0.03226422662199835,0.09150761256404727 97 | 0.0027739897107698086,0.4861197117786701,0.2969232876902048,0.14349432537135687 98 | 0.5654476076298837,0.02937985987233005,0.9763755947396335,0.8867719187512502 99 | 0.2970657038748554,0.6517747699408414,0.8441592978236552,0.9189007059914814 100 | 0.18759092285878176,0.5612541486109694,0.24432183733750223,0.8822158061277604 101 | 0.48605710009116976,0.2611925264780821,0.6175926575199631,0.8091933051351528 102 | 0.06034437375304058,0.5013711741578023,0.8991711485489406,0.2201862984008226 103 | 0.25535590055309276,0.3516679243024956,0.5364893477060845,0.03701102753929619 104 | 0.31328688355417955,0.4763862834554521,0.9202864658634526,0.7631840375981793 105 | 0.10431853303024763,0.2851353342832237,0.41775437331758203,0.516107005989879 106 | 0.42920953935242556,0.6462710684488812,0.5332567085391797,0.28323321173177096 107 | 0.6125472392222696,0.9333035762795235,0.3152303713785083,0.5077422637945377 108 | 0.4626467454001375,0.7057207724407146,0.947799261099005,0.41181935062478436 109 | 0.7069326445918329,0.6083604272727989,0.6411963088604483,0.8910570722746401 110 | 0.35738719547306785,0.1749638427392035,0.7126113738291732,0.3418569615350563 111 | 0.8996328040943408,0.07215553808738318,0.950727768044252,0.014300048486940553 112 | 0.04724141430866824,0.244668538365997,0.07762066561538628,0.7853375044908923 113 | 0.16085880428253185,0.5143846890922119,0.12297488332527828,0.8819848358310923 114 | 0.42142592069833285,0.12287317618823113,0.30114178431109306,0.7939797237336467 115 | 0.8815288737565792,0.39151559779951806,0.6958394710958453,0.7814010010773164 116 | 0.44394332894315436,0.07553068723802092,0.37275528784516443,0.7615758767045683 117 | 0.374409464028312,0.5818786972823738,0.08377247884294892,0.3683448657746946 118 | 0.9899294933222142,0.1187671466861393,0.4014778191591516,0.5685096489097043 119 | 0.06915770697644774,0.6090114308381936,0.6220271462009369,0.559585715262603 120 | 0.15955405478675377,0.7359109918618787,0.5084386068305188,0.7045890693080764 121 | 0.23296787478109893,0.03259270193505115,0.33995869443762594,0.056452241818247595 122 | 0.9886431070907975,0.7427660704163951,0.031265351045515644,0.7292813741972919 123 | 0.25761140232278557,0.9690281519031503,0.2205030584495753,0.1340893432229462 124 | 0.48712877508762054,0.4065577726204984,0.5439985356525793,0.6179781655968418 125 | 0.02453108907718138,0.7449006851219485,0.016648998371553758,0.7632656212788141 126 | 0.03249802874858365,0.029654017485838513,0.14598435219688843,0.6408019322014568 127 | 0.3532835865948927,0.4939428604587679,0.3473581714765762,0.9615931874740967 128 | 0.2918184026405345,0.8537398124259274,0.6155473635818071,0.3552247355799426 129 | 0.12703792681969273,0.12599178150105628,0.4336252645614862,0.3654410053985607 130 | 0.6810643435533861,0.3148121150987331,0.399335464667698,0.030530634936642875 131 | 0.9528797882146893,0.12908033520074214,0.8098651406333642,0.7013259143682808 132 | 0.23536709423598234,0.37478763941226545,0.19842775634944565,0.7562456005854551 133 | 0.4318457382401344,0.9967400188922231,0.6626892965429728,0.5111869508038237 134 | 0.634722769256735,0.33641812662999937,0.20032521735368292,0.5413886778285673 135 | 0.30105662499849395,0.8239463258938328,0.7637786846988948,0.6552935262376625 136 | 0.4441603799049635,0.3377263099523602,0.8155043986264426,0.06848219164654568 137 | 0.04548148661580664,0.2709726235565958,0.12465858380131734,0.6591615115575256 138 | 0.03471336985920148,0.7122901580430191,0.9701661055346469,0.6643970613424024 139 | 0.9520651381177343,0.3756720203771867,0.3496562090980858,0.32632321104344 140 | 0.8442220000866414,0.5107899461278133,0.902689007858361,0.5426324179894695 141 | 0.34697608473928676,0.29136838696844236,0.4082087104041593,0.512158775785004 142 | 0.28822069677111695,0.2140493866097244,0.8101917324399203,0.5005469421934069 143 | 0.8098131526492203,0.60964587312313,0.21065530014928446,0.8726083090048955 144 | 0.21337066838328622,0.9415404807508847,0.8702682174016022,0.6119646007822085 145 | 0.16678272406244066,0.5636359806161614,0.6838634496961317,0.17855139525600738 146 | 0.9606328566850059,0.10116986534500116,0.7352811690185582,0.7111515299969101 147 | 0.6948295769243019,0.5487648112166836,0.5971291356412485,0.5862243401316085 148 | 0.25466959663484046,0.03759741654816673,0.18492537417920019,0.3152077908652684 149 | 0.3538690888280669,0.6923581246513688,0.3413271435268187,0.028229200117313447 150 | 0.6643494763809131,0.46728315384847796,0.8177161446969633,0.6303374347843743 151 | 0.7550330801551735,0.5106606444950341,0.21877641466826825,0.22235314483650004 152 | 0.15145309679350605,0.0401938695342976,0.7530073889402137,0.3690417275588833 153 | 0.9760685110956365,0.2832833013287801,0.4101558258510867,0.8616901800434409 154 | 0.02985473595547594,0.7877747487646194,0.23130056297184654,0.9396871681614409 155 | 0.43602827118441545,0.49652538353669173,0.8350577337150469,0.7952136319405787 156 | 0.3093890857172835,0.46716935030046847,0.871500307938109,0.8657480183233166 157 | 0.2456914356877259,0.3990168265017442,0.30165988670453503,0.5018761389125409 158 | 0.773366902423847,0.7898400491523931,0.05466436722123824,0.6342304807902118 159 | 0.4355946549280614,0.21559267306867713,0.6945566395803673,0.06647684585216618 160 | 0.9531814883655881,0.5729141047206507,0.3284221987073983,0.019349445686161593 161 | 0.8554531512435155,0.4048714779813064,0.8191084513172708,0.8869425223890415 162 | 0.22378459698214903,0.6316513388678091,0.59867596778096,0.5453828996141791 163 | 0.7521968015369107,0.9383241204971419,0.26832367689036263,0.9236933301521059 164 | 0.2538420334987834,0.35565504542848503,0.9918313814463339,0.40459295128860495 165 | 0.8512943399155619,0.8145754646436317,0.2300391756142256,0.7616764915071941 166 | 0.9089776204805422,0.06301564577463226,0.6886391518337929,0.143418841185596 167 | 0.6668972843336031,0.9906466338305843,0.8830389070595589,0.12366815086187177 168 | 0.41012099619110054,0.7086901572451703,0.21852541360024869,0.3997955763014477 169 | 0.7880107069926059,0.7394352290465452,0.9389903778450293,0.7113562153904642 170 | 0.16317523834417125,0.7741475223419458,0.5724701320943584,0.859481296551717 171 | 0.5457473442888628,0.3729328111922159,0.51884039225534,0.7386824796906086 172 | 0.6908968027745845,0.08280627925444328,0.06723392896082436,0.24671328573661044 173 | 0.8520623845068883,0.5969710472617193,0.7697327278098809,0.19740658652708765 174 | 0.2574451556296956,0.6621468830829318,0.8412805097791126,0.8651817484405847 175 | 0.01981522137598979,0.24439952915330643,0.28254858839247365,0.04322511390909678 176 | 0.519024275152464,0.9831608667734769,0.7771341747545258,0.6718284665140184 177 | 0.00041033035366877524,0.12073612441607873,0.9580945140036871,0.49285436029614804 178 | 0.765613057681015,0.9840407827973151,0.4242176295457398,0.5253199938914017 179 | 0.20341140639091282,0.5991055566146712,0.7023042489065016,0.4747723187558055 180 | 0.7400900050414457,0.17271094754083793,0.4240999290757701,0.9793393433428702 181 | 0.25820029030355063,0.2728668186538884,0.6322054634098652,0.15238618337531795 182 | 0.5328537481137854,0.5311755568123224,0.7124676069252257,0.7543900408669565 183 | 0.40499751324417577,0.07053712315111249,0.5414349568194385,0.8602525352837128 184 | 0.7167405690532722,0.7655360893975195,0.5152671741703145,0.9190770436346825 185 | 0.88961057246162,0.6254855074738592,0.29409673513750034,0.7109777546908304 186 | 0.4293142758847388,0.07500440719979173,0.8765165097980312,0.21529622822921213 187 | 0.2589801994183899,0.06160881811943064,0.3056520716803025,0.982334083651339 188 | 0.7902310273315167,0.7894298793784357,0.48208921502203306,0.6202306441826341 189 | 0.22466284393364877,0.12148597193505006,0.46103311942117764,0.3483662428684401 190 | 0.57742763442928,0.11828978574400584,0.4131676012891534,0.609522879857421 191 | 0.37324999603891995,0.6762975414394721,0.9199544769836527,0.6727504995045568 192 | 0.711766017902915,0.7939550457590105,0.898307777950035,0.3031826214956438 193 | 0.5369879704146969,0.0672550683511125,0.521581223662644,0.8497801425248154 194 | 0.47219976377187045,0.18238353765328474,0.64134961311078,0.6349174852105679 195 | 0.14624800172865604,0.27381356996678785,0.8224393469115399,0.26937877539963895 196 | 0.6794486644723616,0.777012108386266,0.818370703325512,0.3194693652715377 197 | 0.26256894387243823,0.8155410401668229,0.7696362130828602,0.3430597160802066 198 | 0.9534675119281499,0.4104432184290736,0.8006056593860142,0.005469418655068337 199 | 0.22697530159965018,0.3562781425906293,0.42398767253513214,0.5660547467854499 200 | 0.46596218753247365,0.6166765020752157,0.6519869790728867,0.85236029707832 201 | 0.22480145227830328,0.8716277153913593,0.9538694917004743,0.036617369018068446 202 | 0.08734034737180296,0.6818598657649276,0.7118099804139644,0.027208495222140416 203 | 0.22675117581361248,0.5093225169066515,0.2527763043257748,0.9639863410064701 204 | 0.8588116851970607,0.010648950953201508,0.8614073660606832,0.6625653273689048 205 | 0.4032633523884259,0.7190336009781118,0.46849644053207895,0.6931989847582659 206 | 0.6483604621209498,0.5821755673849227,0.1163795863804884,0.8319955295555067 207 | 0.33164962908476237,0.7666367603030417,0.45294386557535204,0.9719452072879635 208 | 0.24813923094249446,0.16216162422596236,0.920552793506166,0.07422765299018297 209 | 0.7748669260198029,0.5082087357106482,0.3810680042430784,0.006379281376213353 210 | 0.0049458797302726065,0.5287299040999536,0.2220534845790877,0.35056975084558806 211 | 0.591143058163664,0.8281272713471354,0.10557105079938112,0.3960752060779493 212 | 0.46918815061405417,0.5598337169460946,0.6153953549781171,0.32641048588855914 213 | 0.5267835440102993,0.8921297487342501,0.7969345924884385,0.607928404254362 214 | 0.6575218281892343,0.6133344689697782,0.026674317422172078,0.6794473556002295 215 | 0.8668190580498522,0.46775642509925586,0.7806493405916398,0.8496739290235378 216 | 0.1244648752899119,0.36771182500646804,0.747766813278335,0.005027986269138474 217 | 0.326904788429768,0.8301066768156419,0.5583885615475986,0.35043091639101154 218 | 0.21878840733396754,0.30890306323298644,0.26063368013827903,0.19940888399650192 219 | 0.47725115799390283,0.4519491379395325,0.9390538584039441,0.28414276135065464 220 | 0.08375430918146376,0.7513028963268265,0.10330365544229436,0.8140028380808133 221 | 0.7518322565413479,0.35891329691832896,0.626080838727002,0.5718455198448357 222 | 0.8393551543401457,0.9798296662023601,0.1619441133529892,0.9050804752896568 223 | 0.2912547699822662,0.8102055466256184,0.029132319044678723,0.3158557431833995 224 | 0.9148656321018671,0.8358907745950482,0.13959370006216,0.11068322090759253 225 | 0.6462408716160324,0.2066776752496059,0.7556240037237695,0.08049990019889608 226 | 0.3229890299355197,0.4421092716998414,0.9774141928524789,0.4826916383029839 227 | 0.19899537213792595,0.6483881552956826,0.9603949241226026,0.7460398701642762 228 | 0.5929616245255648,0.5036138878742359,0.18643020378853825,0.3734615212449818 229 | 0.8606455272824358,0.9449017405763951,0.14989514119892955,0.40871761666733475 230 | 0.09117712354115515,0.6484436120663334,0.7448051068584821,0.4718552300107698 231 | 0.6481453770004487,0.10128509718921264,0.11964736864520609,0.8938592498582604 232 | 0.8586645297808357,0.43434004725542574,0.19382024972204337,0.4701398582696664 233 | 0.5752608568445848,0.5289492967771696,0.17095235409262333,0.808798455412136 234 | 0.04099322454733356,0.7027125073321747,0.3247808840325497,0.46652225478299536 235 | 0.8545900646258339,0.9000924844460604,0.6711822725580922,0.46373836343886 236 | 0.9784349555067512,0.47815086615571867,0.31653408382502835,0.8409104059461735 237 | 0.08965086313576731,0.35615798789552344,0.679037507855203,0.576862661578551 238 | 0.822147160688062,0.22044591928818968,0.622025707499033,0.6356217585107622 239 | 0.9659284010312518,0.7137133305963123,0.42468924235462546,0.8825389487233778 240 | 0.2652004177722498,0.5565624836551789,0.7138942985137977,0.04313867103058555 241 | 0.7204906388512048,0.19300261774017968,0.5553078337805508,0.848623095012623 242 | 0.3137020128453659,0.06213788668138398,0.1104521743306962,0.268482638609996 243 | 0.3339131616047203,0.22286051556127495,0.9302085063006681,0.18969921720557925 244 | 0.47371881012743333,0.27649916957853926,0.918582911002254,0.9077321559115206 245 | 0.16436753572470753,0.41350363461955386,0.1191110629612997,0.5546038843760451 246 | 0.4779404086003456,0.26699235009794486,0.6314602368060491,0.45015079807521685 247 | 0.7526728792121127,0.6235832861546013,0.07095039872912934,0.6533879557008555 248 | 0.5431474850322893,0.612183500299674,0.1687261650843036,0.3172179764612265 249 | 0.2273371617923714,0.865680153487757,0.5827248247756568,0.07328306898709702 250 | 0.12611491247624174,0.5706376312978149,0.28098697486020496,0.9582441234112288 251 | 0.5968739274269275,0.39542100773592714,0.9988692472082709,0.37827430309233656 252 | 0.7771754323352396,0.2697182152654869,0.6403988986023146,0.32137054416117183 253 | 0.593221449278064,0.6959371750998876,0.818237870222789,0.8425875763640115 254 | 0.6150323759787362,0.8300438488533514,0.011941853213758402,0.8647488663500092 255 | 0.2029326067858479,0.2501485915782097,0.20112813726763568,0.03721096985060468 256 | 0.6863936364279091,0.3761179178286703,0.28281992377497234,0.26245689109379067 257 | 0.4965672155028681,0.7675916426266497,0.38879712109112063,0.9408961704746303 258 | 0.14110932738953297,0.7667366428562539,0.5154150569336301,0.8871155170495236 259 | 0.32517727533261,0.13810804974144586,0.8817286203276805,0.8923770435680616 260 | 0.13118415251772886,0.4724813744322741,0.6965185434293946,0.8809271527639683 261 | 0.9545553958345672,0.6911555060264112,0.3461363476444005,0.4577135252289437 262 | 0.604008641577086,0.4049278320866372,0.5576335559750655,0.4584690061172756 263 | 0.3664279387231145,0.9210977637552976,0.9259396184708046,0.6187373590686066 264 | 0.6926921671269616,0.7056455178608063,0.49433124255882477,0.7895438902447587 265 | 0.5052823855197169,0.27106020463914704,0.8116754606279196,0.13565063043789083 266 | 0.7323902276376828,0.7698002360710043,0.713490159628528,0.1641449078306726 267 | 0.13259585151529407,0.6448586999650545,0.044208192437834914,0.7185432988678314 268 | 0.8081759656453317,0.45841661860325944,0.934434925986559,0.49426593400161334 269 | 0.5771885974706991,0.5881467177722365,0.6399022163604299,0.3771869575374359 270 | 0.48758352078031464,0.318129871239599,0.9047263034728386,0.04353003454869386 271 | 0.04224832101083087,0.7216713055488719,0.7702251485607872,0.14660520157118884 272 | 0.906805106288075,0.7142423782039156,0.6492463522633645,0.8089537150363935 273 | 0.22509844113441224,0.714232692976936,0.8863034807750806,0.27751663911557733 274 | 0.927654059080697,0.9472178956834688,0.3238547493736994,0.5313974196131103 275 | 0.7775026248564839,0.366416107883324,0.8913507785084809,0.7129585365523068 276 | 0.701804618572489,0.4754051551620687,0.1483109717640092,0.9629058012646603 277 | 0.6894417967365506,0.47535612758856627,0.27065181763195223,0.419800329085256 278 | 0.7953742832102751,0.017607321320034064,0.9239257373999976,0.9132899083842303 279 | 0.8191848786683833,0.7118868982916801,0.14719023292344702,0.1464878560040236 280 | 0.045339408502298784,0.07403816709500988,0.037202775722709025,0.5006006273772845 281 | 0.3441192304926166,0.8987140282588046,0.19702093237012608,0.513983027156217 282 | 0.09985412950064898,0.38986517472782023,0.40452962687964444,0.010392375900986517 283 | 0.9888796717166685,0.7490090895916841,0.2596294186935606,0.4584345985951731 284 | 0.45700114479701004,0.6806642081959364,0.3941616551891305,0.40932639017050887 285 | 0.22870742070902517,0.4961988524015096,0.04576544412910477,0.6340760637797336 286 | 0.2919710136894722,0.23278641925489285,0.4593479168264748,0.805534031204371 287 | 0.15472055351362923,0.1326050020325038,0.30005418331448175,0.6263173704010072 288 | 0.3855935973315787,0.1600210733653037,0.1192711036086127,0.6364894057985613 289 | 0.5165874702810029,0.10589536555440149,0.836027611899695,0.5425241910064632 290 | 0.06506145636136362,0.2624350464523646,0.60599389298748,0.20824553557216086 291 | 0.5270193642446159,0.8237728230700423,0.2892156336011714,0.8873126338662943 292 | 0.18919484363192918,0.27347313781194604,0.5558426186041985,0.26130285965720756 293 | 0.9560313727021582,0.2323345531125427,0.7109228911421767,0.005298262810604415 294 | 0.5869974306959455,0.551839108711318,0.9631838302064771,0.8748224414451556 295 | 0.8394707823008176,0.14649568514521594,0.42496652865473017,0.6479642065574542 296 | 0.17595067566465916,0.27079531300287973,0.7845760830918573,0.4745584618932701 297 | 0.48854926001759913,0.1258570249789137,0.8052540177863061,0.4192898556746709 298 | 0.6212556965708542,0.39723581820451603,0.07287399150297214,0.9018901762597358 299 | 0.808743928594653,0.22119133563026316,0.25823594549123996,0.30603153212577305 300 | 0.8124836819110055,0.7059634348701986,0.13599942020850242,0.1643630702649136 301 | 0.11270816568759079,0.8985217621210186,0.604783075454661,0.5780567207297427 302 | 0.2478577942115826,0.295514530036352,0.5819979510773367,0.4515276838478285 303 | 0.9341867123294322,0.5241038121822309,0.7349093167256558,0.10821307519543244 304 | 0.9694565801743523,0.7256657249365571,0.4405418454733141,0.3452875476762217 305 | 0.6670885234356302,0.7464035693791718,0.8968707150608508,0.23460678314729688 306 | 0.8787477148217263,0.3820328244947394,0.45637968898810977,0.6485079931308289 307 | 0.6424404543524919,0.5761307478342017,0.4331408926226967,0.8070040991923705 308 | 0.8211851866370176,0.4580361289778362,0.899692936602448,0.5713664527993445 309 | 0.21206397355409146,0.3912268103159252,0.9493983929247443,0.5071785967280376 310 | 0.19406700477640937,0.6150799842880748,0.2883587772964561,0.9708823993407812 311 | 0.8040167613971719,0.6547916899574816,0.5242140387878185,0.008780205484151038 312 | 0.22082426037740743,0.11535424028214303,0.08781152460137587,0.438316057668212 313 | 0.7728123968121355,0.8042828334383247,0.778690715189286,0.38988570049482363 314 | 0.010486111694698264,0.67277991682324,0.07681804627281064,0.9567609277214955 315 | 0.2668864381442586,0.1452568826511088,0.5707392691063333,0.928043972105489 316 | 0.0306102817857985,0.9055360655368343,0.034887490136228494,0.05714269034932418 317 | 0.8886345174201701,0.7309503027420534,0.4776862370499536,0.5670174424366347 318 | 0.8382970567348228,0.34554793003082007,0.3723476159629475,0.1813898979341001 319 | 0.9047848890963807,0.7989647025237637,0.1401466580893611,0.5676980198409542 320 | 0.32257033483181474,0.867562309325832,0.9853490867984394,0.3313521730872008 321 | 0.8833118951420463,0.22198267144590023,0.4053942167679826,0.7460413091565492 322 | 0.6440770031016559,0.6146438380554794,0.2702595340451578,0.6517116434459616 323 | 0.43700087729614623,0.1956640333780958,0.6307740960622968,0.13300572762864438 324 | 0.025545984518965392,0.13259337648090797,0.330639580102997,0.6783486119187357 325 | 0.11745760865685662,0.04655297692915128,0.01570496083384365,0.3324721623808766 326 | 0.9446708945818156,0.7922202276903331,0.13532115814545032,0.5680680471004171 327 | 0.9981105344574744,0.17895387698238285,0.41489085918398205,0.9787571737165199 328 | 0.2182957368236994,0.11369398768302719,0.4712035902042646,0.5810331859747123 329 | 0.3158753508670622,0.508766658109675,0.949177716201396,0.4525860098202471 330 | 0.5260068340155828,0.7165350507158287,0.4145499033093072,0.9405637896975582 331 | 0.6270735198783227,0.014296088209979407,0.3554936000082958,0.47110739130565116 332 | 0.49216742436131966,0.7918069585203404,0.045439755466691434,0.14126827560697552 333 | 0.9551074995452985,0.7609084606692946,0.7517853978688316,0.9362533190577099 334 | 0.26649052838492904,0.10660645060167617,0.6595673397416092,0.8182263625691235 335 | 0.7134956283945861,0.792888640733119,0.16544349103619738,0.461125700665701 336 | 0.21689092404412325,0.28764631665452123,0.9657542611199962,0.9395317857593563 337 | 0.771934364979116,0.47354176406587567,0.39974739561617967,0.4672920917655088 338 | 0.055858469316713455,0.5263535147518149,0.5470407265082073,0.9196145812835027 339 | 0.3233845430037011,0.3180589338229559,0.607742422700699,0.5865738777507155 340 | 0.7652566346579441,0.581821555249569,0.5172069398211208,0.18599163645542838 341 | 0.0034684801064233106,0.2225896795781036,0.9862480175146329,0.13982000478168466 342 | 0.5017790922024837,0.40773441324079573,0.05786902177165454,0.02346346342196437 343 | 0.5899299630888499,0.9347063659987582,0.10017779286159945,0.05031189728135643 344 | 0.7552541885045175,0.8692659914400955,0.1957645607498405,0.38721358997203004 345 | 0.020654345427079224,0.5187027132180971,0.9875380926266475,0.3013184844564526 346 | 0.6514969381102048,0.46072040727750285,0.594442754475653,0.6954838811441331 347 | 0.9185130194324117,0.04293106084778964,0.8363748903511994,0.9169946702125765 348 | 0.01575990734707533,0.6247692943585893,0.09038267623487206,0.05157924280558712 349 | 0.8607637728932239,0.9736616782275902,0.08111827087665802,0.8665656920735859 350 | 0.666744972708363,0.7696471975946282,0.6545167483950083,0.1836352141094666 351 | 0.14890415384126243,0.5882642074433254,0.47488324034977136,0.1401242764796421 352 | 0.9226772234533692,0.5110406596658441,0.5694788038165256,0.7467927879198694 353 | 0.21131539061769633,0.6825600857581313,0.7125768055425141,0.40832581447694716 354 | 0.6067351237433124,0.5936720758765013,0.12956326274296215,0.39895208390744263 355 | 0.07503423810335952,0.7692901398814894,0.5080248598788042,0.1132694830734321 356 | 0.6779853647179561,0.8785368744942339,0.2449159217780239,0.6984978366100005 357 | 0.5639290842821031,0.13429933931245086,0.41879712608263253,0.11511037033327509 358 | 0.4771948700443285,0.66869023375205,0.09436736601701601,0.2832346511771564 359 | 0.11389507788099285,0.9548819478014735,0.43162079915995966,0.13260141339156495 360 | 0.6933840471420535,0.8569772316175522,0.9438136873946976,0.2550298038407348 361 | 0.7667432308405324,0.7092661191253911,0.20467525417272814,0.00409058163964926 362 | 0.6033640307046173,0.5194782580436552,0.269802344368703,0.0252733853595154 363 | 0.7500958618175847,0.6816089568443191,0.06288441532040989,0.6625546198107677 364 | 0.14076814361824563,0.4000247200283372,0.3986400215241256,0.862359520497777 365 | 0.909050353083993,0.734140757406571,0.20627521456988696,0.022666933456822025 366 | 0.14164469015917447,0.46663482555229896,0.19226649392802975,0.6895759193711927 367 | 0.6384146831254898,0.8267776768248442,0.8248241916927097,0.9667913170456913 368 | 0.5995286449654652,0.6128661292930289,0.8076508959412709,0.5643091139270656 369 | 0.3660885061550925,0.6516894443881318,0.08279706016975263,0.02494681579030067 370 | 0.5343084901999381,0.4490041029823132,0.5485797015135121,0.8770746060099789 371 | 0.7591888502765555,0.7641772821881035,0.8627631867464572,0.790588149387367 372 | 0.11646298257714938,0.712586114834284,0.17850893843707638,0.0499046100449253 373 | 0.5736399771896279,0.24779945904696066,0.8353495305737935,0.6098353636118041 374 | 0.39995891099221725,0.38755138066544736,0.3206667889777627,0.8621821104916472 375 | 0.16963442693178976,0.1655588529460691,0.00471004269840658,0.03680381149038636 376 | 0.8913913191956618,0.4931553146014762,0.7212555101341884,0.535167989549179 377 | 0.19260952114367957,0.8894977792750658,0.9277896033411086,0.9266292288349561 378 | 0.8024400170720553,0.15728226403474244,0.6311310614764399,0.8940669250997342 379 | 0.13040641009710163,0.47598924174257407,0.4047476679695895,0.8151863456470257 380 | 0.9904753132378885,0.1929664786631008,0.6649932127466588,0.3890716243782475 381 | 0.06835950232326915,0.9179509323620376,0.9997085419499564,0.005016089384341504 382 | 0.2284112907168191,0.20349570495715097,0.9053407127073567,0.29129471127644957 383 | 0.23807412297897446,0.7433707240814856,0.2797207561532188,0.7055119549347723 384 | 0.2980015747932633,0.1528124911964729,0.2922351758608043,0.4744831732503145 385 | 0.9323151646558789,0.9016005665933837,0.04859295723828494,0.40406758360182427 386 | 0.0020015859464498353,0.8573856734531015,0.835630618879145,0.27134674911444545 387 | 0.6476158569481719,0.7619325435307918,0.9387844644429776,0.5025693523387598 388 | 0.35712967268125584,0.35535013097389045,0.6846636602836755,0.3529193027458817 389 | 0.9528744981293424,0.907004048138501,0.7922686555445306,0.6442300435823645 390 | 0.42594045350938714,0.7635686751066307,0.039866196100581064,0.8501629086487204 391 | 0.16151992349458166,0.027644573646974036,0.9940414223507111,0.30145531751034094 392 | 0.8192631972556896,0.5315304397102365,0.8819242518354289,0.7140853094035905 393 | 0.3640049949086881,0.8916802473396235,0.610690893660492,0.19860084943122835 394 | 0.03241401801078403,0.5997948103711187,0.3459077815858529,0.8429274697369962 395 | 0.860296620923188,0.7498450748611823,0.724915922543078,0.3674775151021594 396 | 0.12479747320689649,0.790080464590465,0.5590618642994262,0.695729010767191 397 | 0.4226547764778965,0.7662131047806842,0.0314974785100639,0.7126390471593306 398 | 0.717960945501675,0.44433663986840155,0.7338197075921654,0.6198705088373945 399 | 0.9130079792319507,0.388142201662786,0.9204914884121788,0.7782865287937031 400 | 0.9446157546353985,0.8229203898981865,0.37815408733881994,0.9474516986216488 401 | 0.4692270195228362,0.3624411531029099,0.9440792553003939,0.6632688289247249 402 | 0.35729593281341854,0.09094931962987118,0.6894993679786272,0.6604239544162929 403 | 0.8295230947269575,0.37987043680523425,0.21978418081773277,0.9845319112288327 404 | 0.9054097736788218,0.6795244920749846,0.8058227024795273,0.8620535192255319 405 | 0.47206954649038335,0.9877578985324961,0.31200381383209264,0.4274016880635737 406 | 0.3788613590960972,0.228409024641122,0.19313897647932965,0.534690513837352 407 | 0.80232246756088,0.2998305554602545,0.2302798655782391,0.3728021218773693 408 | 0.2304818792066189,0.6705989422124993,0.7782382571611371,0.4145383754064982 409 | 0.7865979675238535,0.6256642699276077,0.6975970325705699,0.3304703930316044 410 | 0.2804152526765058,0.7506842663153662,0.935091785948748,0.8731861911602196 411 | 0.7733320316872647,0.06830631213470884,0.727537718955761,0.8243956447692756 412 | 0.8000717021921332,0.9827480235866367,0.10931315443917511,0.7646403326087104 413 | 0.15901650211578955,0.9607869193069879,0.7672418492405033,0.8857690961012441 414 | 0.39077830713874184,0.26566502812783865,0.7247651263316303,0.870345905475203 415 | 0.468444105096292,0.5035851734032031,0.7273267277987141,0.4386595809446233 416 | 0.6082661742527247,0.8835487968537153,0.2648075784103565,0.5097528202982465 417 | 0.1688589216994002,0.4587933228957092,0.06822389196178069,0.6944061276912664 418 | 0.8845023251920414,0.35626742579837833,0.24854712468804263,0.9286656126601583 419 | 0.6919912900325229,0.525926779915432,0.5028362151137027,0.7758761895796565 420 | 0.09072890731788164,0.2754248703653789,0.8794450999976556,0.5929425050351603 421 | 0.8045417151161639,0.717332184240045,0.4009746132815435,0.3576329066088556 422 | 0.8592956140479793,0.8885466867456595,0.6652781416892141,0.46618925902384434 423 | 0.9406255815435112,0.9535348493796136,0.8566780449450918,0.09147572622405076 424 | 0.21134438575825976,0.37870907194098735,0.8918450302424455,0.8756071157265357 425 | 0.17056446131279757,0.5947167672181617,0.42713739170980713,0.8231253767701853 426 | 0.6816965030835896,0.9174242883534593,0.9433745514214174,0.3850430220570764 427 | 0.13200731308338776,0.9544252195264228,0.9253057443590936,0.17330980292668996 428 | 0.1797299073809655,0.5798737456579879,0.4904506542903413,0.07273516539361113 429 | 0.4686355912210618,0.15029444542816606,0.46278071678091204,0.8070448038422068 430 | 0.7574896574957928,0.982488939171607,0.48131438961476347,0.25693746632683634 431 | 0.9031624053831494,0.6540179859697732,0.5756477983878667,0.900018098502001 432 | 0.706778868235879,0.5290013506815173,0.27214577830929343,0.6480582221558611 433 | 0.5257555048131995,0.8202277041197588,0.9755252443363349,0.09150668990330679 434 | 0.2941118779304933,0.3160354013957346,0.07938351200005711,0.9284536477999036 435 | 0.3739355906627365,0.03654420156550575,0.6739743288835183,0.9528120972614034 436 | 0.29419616996955233,0.5180544099970837,0.9045835653031059,0.7490505883308898 437 | 0.5310119647704037,0.6208203267999307,0.4742606286491172,0.3676507758811156 438 | 0.5125560598459081,0.7230868177919499,0.21690326890395972,0.7029765452094612 439 | 0.2327168465092495,0.636427765358658,0.2795775063082444,0.2811043827126858 440 | 0.8979887687291225,0.1998743986080337,0.0861396746346903,0.9648494063439969 441 | 0.12944581215276862,0.7281358241811274,0.8211700518321551,0.14249279838367745 442 | 0.5248705106310002,0.05235202234421987,0.6947375170266925,0.3417365273756968 443 | 0.10595743196582197,0.07826617709938821,0.35337783398376776,0.4264573065236815 444 | 0.050009605714936844,0.018514896667227054,0.9413608402598351,0.791610587209869 445 | 0.17864320609241946,0.31880728783473555,0.6071840694245375,0.8248621694108086 446 | 0.9414648897887672,0.47850303480223844,0.5188440720457512,0.35654133260507215 447 | 0.12219561454685202,0.22693565011856753,0.7331870796269597,0.0028505501610355255 448 | 0.5716554932588412,0.6233114983745688,0.5790584042092602,0.2773603020533283 449 | 0.1730667946075105,0.4564347627258698,0.862108197216226,0.27326576204651987 450 | 0.10099838707719744,0.8963154205564153,0.2995006490737322,0.2507624147096721 451 | 0.2215100698131235,0.2096828648871747,0.1735729661494131,0.9037812640080347 452 | 0.3330121683438939,0.26772071803392583,0.1360347053953318,0.6143336229667071 453 | 0.8755253114603656,0.44786236422952297,0.5069228872958722,0.176168922805505 454 | 0.4360566521753333,0.8249506306465185,0.6928263688828447,0.9157745831809156 455 | 0.7044681125246586,0.7095402004346812,0.05729221320115585,0.03404868292678953 456 | 0.8839520510806439,0.6504324482442831,0.3794442442167103,0.21418711025841375 457 | 0.3967513210941327,0.3325872301281432,0.31405878752308436,0.07799771452864457 458 | 0.7015490555702932,0.5156822991121691,0.23180117678821388,0.7166130112807891 459 | 0.8214439604906325,0.4407717872048116,0.37329993356057734,0.06083737830173952 460 | 0.38149071192905015,0.7550155142103253,0.460697700554616,0.2796872439634027 461 | 0.4810015127837912,0.5713080743424898,0.9571267966357225,0.7911382384952027 462 | 0.13998304353038438,0.3347918696327902,0.39657017898620517,0.5270438421014269 463 | 0.4043555405999496,0.5788249986243309,0.12586900730635686,0.3861265257391172 464 | 0.8389573878855994,0.888492299987596,0.8540078803737782,0.25800593264854366 465 | 0.567489926995696,0.39357032152193006,0.5123037666069444,0.08297825613816934 466 | 0.6649010291920261,0.5047397206924729,0.44553780850164726,0.08797176467481449 467 | 0.7419214077622256,0.12624259595301301,0.9996070355531719,0.06564068719726601 468 | 0.32057166076285726,0.31659935778593506,0.1545928953280743,0.8581764856647075 469 | 0.09301334726876975,0.029335007802145707,0.6565556741127787,0.5739346723086256 470 | 0.04526245346044655,0.811778405905253,0.5066344307977078,0.8059965416628634 471 | 0.8019082608898042,0.10700897299954537,0.7204698407553198,0.7683031150791029 472 | 0.020961990206823322,0.42518192744827077,0.255974197387997,0.7306897745495752 473 | 0.277961915387847,0.8492620182849753,0.34537132250463576,0.15943780211188874 474 | 0.7189442674847957,0.8200795519063174,0.5656964635313514,0.3874546949307659 475 | 0.8268401141848842,0.23386593281804413,0.2845078924626273,0.9406006062017659 476 | 0.23039240497397429,0.761442391428125,0.9392894531292257,0.9085208491905314 477 | 0.29838729188215807,0.5779838410486858,0.8325117429343363,0.9899463893352769 478 | 0.8326874856620977,0.4784419176424144,0.4577629375969198,0.6361698618474226 479 | 0.22261035955598107,0.7416746800534105,0.2454421585188994,0.58078567230664 480 | 0.9571491019691408,0.6511369553101578,0.411185456458281,0.8662214279916548 481 | 0.2824314672551116,0.4507337840870643,0.7588600505511918,0.003965656234663606 482 | 0.07051188316677992,0.4761351008427914,0.5804476241986976,0.3893672504249296 483 | 0.6041561817793055,0.8835940935113349,0.8868809585790102,0.49618477971384356 484 | 0.6759397175203108,0.034009359158778896,0.45914373245374573,0.281948988195467 485 | 0.5848526420565103,0.0630186414631515,0.7352196908090805,0.8726649542656703 486 | 0.35400058269043266,0.3903304091964207,0.7778206460661615,0.2963873440073307 487 | 0.25238414291129874,0.5984939737976703,0.17745813410050737,0.05300754434656185 488 | 0.4954938296688921,0.05875264031148697,0.39350417074169364,0.4328594911628031 489 | 0.9197968567238212,0.5157119691395418,0.033718530566680616,0.5043298885605011 490 | 0.2989332113820844,0.7034343832601815,0.2026921006487793,0.6432289937302853 491 | 0.7779676202913468,0.610689301839738,0.9000420564430714,0.02779597067857109 492 | 0.6904833489696073,0.2790190885678663,0.09485751900588535,0.8814594722251355 493 | 0.5235313634791201,0.23610058713680604,0.9546586611291935,0.3497260033438957 494 | 0.11341445111776471,0.24283712840412264,0.9337047983116655,0.6507089882891222 495 | 0.6881809970467796,0.5354244135798761,0.07696211613601234,0.42662334179962424 496 | 0.6924697609000138,0.23785520913895875,0.8706017587442036,0.07581266371719819 497 | 0.528704746140301,0.22612181722433033,0.055834654502542325,0.1319617598033216 498 | 0.3247295924286646,0.21520112383699908,0.9353024163591054,0.7941147979952861 499 | 0.1180358736188245,0.2645743905840382,0.6292058447395664,0.8240620030300988 500 | 0.227021327823424,0.6602085012760409,0.8513526298900511,0.4786758954200979 501 | 0.4661570022497308,0.7529998155162079,0.11539087183657204,0.27971183920107534 502 | -------------------------------------------------------------------------------- /ML basics/example: -------------------------------------------------------------------------------- 1 | a,b,c,d 2 | 0,1,2,3 3 | 4,5,6,7 4 | 8,9,10,11 5 | 12,13,14,15 6 | -------------------------------------------------------------------------------- /ML basics/group.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishikkkkaaaa/Python-ML/8c64a5d82314a5603e2e892ba991fabdee8932f2/ML basics/group.png -------------------------------------------------------------------------------- /ML basics/multi_index_example: -------------------------------------------------------------------------------- 1 | first,bar,bar,baz,baz,foo,foo,qux,qux 2 | second,one,two,one,two,one,two,one,two 3 | ,,,,,,,, 4 | A,1.025984152081572,-0.1565979042889875,-0.031579143908112575,0.6498258334908454,2.154846443259472,-0.6102588558227414,-0.755325340010558,-0.34641850351854453 5 | B,0.1470267713241236,-0.47944803904109595,0.558769406443067,1.0248102783372157,-0.925874258809907,1.8628641384939535,-1.1338171615837889,0.6104779075384634 6 | C,0.3860303121135517,2.084018530338962,-0.37651867524923904,0.23033634359240704,0.6812092925867574,1.0351250747739213,-0.031160481493099617,1.9399323109926203 7 | -------------------------------------------------------------------------------- /ML basics/my_picture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishikkkkaaaa/Python-ML/8c64a5d82314a5603e2e892ba991fabdee8932f2/ML basics/my_picture.png --------------------------------------------------------------------------------