├── Confusion Matrix - Jupyter Notebook.pdf ├── DecisionTree - Jupyter Notebook.pdf ├── K-Means Clustering - Jupyter Notebook.pdf ├── KNN - Jupyter Notebook.pdf ├── Lab_1.ipynb ├── Linear Regression - Jupyter Notebook.pdf ├── Matpltlib - Jupyter Notebook.pdf ├── Multiple Regression - Jupyter Notebook.pdf ├── Numpy -1 - Jupyter Notebook.pdf ├── Numpy -2 - Jupyter Notebook.pdf ├── Numpy-3 - Jupyter Notebook.pdf ├── Numpy-4 - Jupyter Notebook.pdf ├── PANDASONE - Jupyter Notebook.pdf ├── PANDASTWO - Jupyter Notebook.pdf ├── Pandas CSV.pdf ├── Pandas Series & Data Frame.pdf ├── README.md ├── SEABORN - Jupyter Notebook.pdf ├── groupby - Jupyter Notebook.pdf ├── groupby.ipynb ├── groupby1 - Jupyter Notebook.pdf ├── handling missing data - Jupyter Notebook.pdf ├── homeprices.csv ├── income.csv ├── min -max-Normalization - Jupyter Notebook.pdf ├── multiple .homeprices.csv ├── naive_bayes - Jupyter Notebook.pdf ├── salaries.csv └── training-testing split - Jupyter Notebook.pdf /Confusion Matrix - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/Confusion Matrix - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /DecisionTree - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/DecisionTree - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /K-Means Clustering - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/K-Means Clustering - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /KNN - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/KNN - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /Lab_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github", 7 | "colab_type": "text" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "QK5LbElIDyEF" 17 | }, 18 | "source": [ 19 | "### ***Print Command***" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": { 26 | "colab": { 27 | "base_uri": "https://localhost:8080/" 28 | }, 29 | "id": "XCgsvkPw1lpD", 30 | "outputId": "b5d84dfc-ca97-4b04-b15f-c2afbe42ab68" 31 | }, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "hello, Welcome to First Lab\n" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "print(\"hello, Welcome to First Lab\") #this command is to print." 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": { 48 | "id": "Vga6Z7eus-Fx" 49 | }, 50 | "source": [ 51 | "## ***Variables***" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "colab": { 59 | "base_uri": "https://localhost:8080/" 60 | }, 61 | "id": "teGkrTDc100E", 62 | "outputId": "69f65e65-e032-4662-919d-f22b1996bb37" 63 | }, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "pooja\n", 70 | "23.9\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "name='pooja'\n", 76 | "age =23.9\n", 77 | "\n", 78 | "print(name)\n", 79 | "print(age)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "colab": { 87 | "base_uri": "https://localhost:8080/" 88 | }, 89 | "id": "elmxitQtg0Xg", 90 | "outputId": "b9f153df-716e-4522-80d0-2a2844acd668" 91 | }, 92 | "outputs": [ 93 | { 94 | "name": "stdout", 95 | "output_type": "stream", 96 | "text": [ 97 | "priya\n", 98 | "27\n" 99 | ] 100 | } 101 | ], 102 | "source": [ 103 | "name=\"payal\"\n", 104 | "age =23\n", 105 | "name=\"priya\"\n", 106 | "age =27\n", 107 | "print(name)\n", 108 | "print(age)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "colab": { 116 | "base_uri": "https://localhost:8080/" 117 | }, 118 | "id": "8_OXWc8chAkT", 119 | "outputId": "8c3bd7bd-49ef-4729-b067-b2e3ebe65098" 120 | }, 121 | "outputs": [ 122 | { 123 | "name": "stdout", 124 | "output_type": "stream", 125 | "text": [ 126 | "payal sharma\n", 127 | "19\n", 128 | "True\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "first_name=\"payal\"\n", 134 | "last_name= \"sharma\"\n", 135 | "age= 19\n", 136 | "is_adult=True\n", 137 | "print(first_name,last_name)\n", 138 | "#printlast_name)\n", 139 | "print(age)\n", 140 | "print(is_adult)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": { 146 | "id": "fuuqgoEPjxqw" 147 | }, 148 | "source": [ 149 | "## ***Taking Input from user***" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "colab": { 157 | "base_uri": "https://localhost:8080/" 158 | }, 159 | "id": "7CodKmQoh33e", 160 | "outputId": "2e88a75e-2f8c-4b8a-8376-2603c9807d0b" 161 | }, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "whats your name ? pooja\n", 168 | " hello pooja\n" 169 | ] 170 | } 171 | ], 172 | "source": [ 173 | "name= input(\"whats your name ? \")\n", 174 | "print(\" hello\" , name)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": { 180 | "id": "w1I1FSxdjfpR" 181 | }, 182 | "source": [ 183 | "### ***Arithmetic Operators***" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": { 190 | "colab": { 191 | "base_uri": "https://localhost:8080/" 192 | }, 193 | "id": "edpRvF8ujvjb", 194 | "outputId": "4c2c6bd8-4a6e-4b08-fa95-ad05a2053bcf" 195 | }, 196 | "outputs": [ 197 | { 198 | "name": "stdout", 199 | "output_type": "stream", 200 | "text": [ 201 | "15\n", 202 | "3\n", 203 | "54\n", 204 | "1.5\n", 205 | "1\n", 206 | "3\n", 207 | "8\n" 208 | ] 209 | } 210 | ], 211 | "source": [ 212 | "print(9+6)\n", 213 | "print(9-6)\n", 214 | "print(9*6)\n", 215 | "print(9/6)\n", 216 | "print(9//6)\n", 217 | "print(9%6)\n", 218 | "print(2**3)" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": { 225 | "colab": { 226 | "base_uri": "https://localhost:8080/" 227 | }, 228 | "id": "zzp32ZJQkmi3", 229 | "outputId": "a267a0e6-136e-4625-eff3-cab1d9ed92e3" 230 | }, 231 | "outputs": [ 232 | { 233 | "name": "stdout", 234 | "output_type": "stream", 235 | "text": [ 236 | "12\n", 237 | "14\n", 238 | "12\n", 239 | "24\n" 240 | ] 241 | } 242 | ], 243 | "source": [ 244 | "i=5\n", 245 | "i=i+7\n", 246 | "print(i)\n", 247 | "i +=2\n", 248 | "print(i)\n", 249 | "i -=2\n", 250 | "print(i)\n", 251 | "i *=2\n", 252 | "print(i)" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": { 258 | "id": "1ncuhbEvlY03" 259 | }, 260 | "source": [ 261 | "### ***Operator Precedence***" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": { 268 | "colab": { 269 | "base_uri": "https://localhost:8080/" 270 | }, 271 | "id": "Op1xFtkclGGg", 272 | "outputId": "e2da2291-daf0-4139-a26d-265df140dad0" 273 | }, 274 | "outputs": [ 275 | { 276 | "name": "stdout", 277 | "output_type": "stream", 278 | "text": [ 279 | "13\n", 280 | "16\n" 281 | ] 282 | } 283 | ], 284 | "source": [ 285 | "result = 3+5*2\n", 286 | "print(result)\n", 287 | "result = (3+5)*2\n", 288 | "print(result)" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": { 294 | "id": "HfuwcNUbmPE0" 295 | }, 296 | "source": [ 297 | "### ***Comparision Operator***" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": { 304 | "colab": { 305 | "base_uri": "https://localhost:8080/" 306 | }, 307 | "id": "q24BgFp8mVQY", 308 | "outputId": "a5ffc4d4-1241-4582-8a01-739adc095db0" 309 | }, 310 | "outputs": [ 311 | { 312 | "name": "stdout", 313 | "output_type": "stream", 314 | "text": [ 315 | "False\n", 316 | "True\n", 317 | "False\n", 318 | "True\n" 319 | ] 320 | } 321 | ], 322 | "source": [ 323 | "print(3>8)\n", 324 | "print(3<=8)\n", 325 | "print(3==8)\n", 326 | "print(3!=8)" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": { 332 | "id": "zQZDFW39mkqv" 333 | }, 334 | "source": [ 335 | "### ***Logical Operator***\n", 336 | "\n", 337 | "\n" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": { 344 | "colab": { 345 | "base_uri": "https://localhost:8080/" 346 | }, 347 | "id": "PcQcYegHmwSr", 348 | "outputId": "b1764675-2ef4-4e8a-a843-5e14b7e80e12" 349 | }, 350 | "outputs": [ 351 | { 352 | "name": "stdout", 353 | "output_type": "stream", 354 | "text": [ 355 | "False\n", 356 | "True\n", 357 | "True\n" 358 | ] 359 | } 360 | ], 361 | "source": [ 362 | "print(3>2 and 1>10)\n", 363 | "print(3>2 or 1>10)\n", 364 | "print(not 1>10)" 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": { 370 | "id": "fT_nawtYutD6" 371 | }, 372 | "source": [ 373 | "### ***Data Types : int, float, str, boolean***" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": { 380 | "colab": { 381 | "base_uri": "https://localhost:8080/" 382 | }, 383 | "id": "5Jl_yZO1omwA", 384 | "outputId": "aa65a64d-995b-4d58-a64c-5cd458e0b0b5" 385 | }, 386 | "outputs": [ 387 | { 388 | "name": "stdout", 389 | "output_type": "stream", 390 | "text": [ 391 | "32\n" 392 | ] 393 | } 394 | ], 395 | "source": [ 396 | "age=29\n", 397 | "new_age = age+3\n", 398 | "print(new_age)" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": null, 404 | "metadata": { 405 | "colab": { 406 | "base_uri": "https://localhost:8080/" 407 | }, 408 | "id": "wvH9L7xxo64C", 409 | "outputId": "70528c2b-aaae-4ddd-881e-ab686a07f056" 410 | }, 411 | "outputs": [ 412 | { 413 | "output_type": "stream", 414 | "name": "stdout", 415 | "text": [ 416 | "enter your age 33\n", 417 | "36\n" 418 | ] 419 | } 420 | ], 421 | "source": [ 422 | "age= input(\"enter your age \") #Typecasting\n", 423 | "new_age= int (age) + 3\n", 424 | "print(new_age)" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": { 431 | "colab": { 432 | "base_uri": "https://localhost:8080/" 433 | }, 434 | "id": "D9tiRUsSsf2B", 435 | "outputId": "189bb053-14ea-45db-b63e-1521c671e8f7" 436 | }, 437 | "outputs": [ 438 | { 439 | "name": "stdout", 440 | "output_type": "stream", 441 | "text": [ 442 | "45\n" 443 | ] 444 | } 445 | ], 446 | "source": [ 447 | "number=45\n", 448 | "float(number)\n", 449 | "print(number)" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": null, 455 | "metadata": { 456 | "colab": { 457 | "base_uri": "https://localhost:8080/" 458 | }, 459 | "id": "vgzdNN-fsnZN", 460 | "outputId": "30a3562c-3a33-4ac7-d1a8-46c84f91d41e" 461 | }, 462 | "outputs": [ 463 | { 464 | "name": "stdout", 465 | "output_type": "stream", 466 | "text": [ 467 | "90.0\n" 468 | ] 469 | } 470 | ], 471 | "source": [ 472 | "num=number+float(number)\n", 473 | "print(num)" 474 | ] 475 | }, 476 | { 477 | "cell_type": "markdown", 478 | "metadata": { 479 | "id": "twR70yCJm7MP" 480 | }, 481 | "source": [ 482 | "### ***This is first program***" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": null, 488 | "metadata": { 489 | "colab": { 490 | "base_uri": "https://localhost:8080/" 491 | }, 492 | "id": "BcyCzaB3t50C", 493 | "outputId": "9a2f365e-0398-4aaa-e9f8-1bd60849fc1c" 494 | }, 495 | "outputs": [ 496 | { 497 | "output_type": "stream", 498 | "name": "stdout", 499 | "text": [ 500 | "first number 3\n", 501 | "second number 5\n", 502 | "sum is : 35\n" 503 | ] 504 | } 505 | ], 506 | "source": [ 507 | "first_num=input(\"first number \")\n", 508 | "second_num=input(\"second number \")\n", 509 | "\n", 510 | "sum=first_num+second_num\n", 511 | "#print(sum)\n", 512 | "print(\"sum is : \" + sum)" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": null, 518 | "metadata": { 519 | "colab": { 520 | "base_uri": "https://localhost:8080/" 521 | }, 522 | "id": "X7EWwis5ug4N", 523 | "outputId": "35bb4e05-ed30-4286-92b1-62991858483e" 524 | }, 525 | "outputs": [ 526 | { 527 | "output_type": "stream", 528 | "name": "stdout", 529 | "text": [ 530 | "first number 3\n", 531 | "second number 3\n", 532 | "sum is : 6\n" 533 | ] 534 | } 535 | ], 536 | "source": [ 537 | "first_num=input(\"first number \")\n", 538 | "second_num=input(\"second number \")\n", 539 | "\n", 540 | "sum=int (first_num) + int (second_num)\n", 541 | "#print(sum)\n", 542 | "#print(\"sum is : \" + sum)\n", 543 | "print(\"sum is : \" + str (sum))" 544 | ] 545 | }, 546 | { 547 | "cell_type": "markdown", 548 | "metadata": { 549 | "id": "lpoq6RC01FPi" 550 | }, 551 | "source": [ 552 | "### ***Operation on String***" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "execution_count": null, 558 | "metadata": { 559 | "colab": { 560 | "base_uri": "https://localhost:8080/" 561 | }, 562 | "id": "ye4RLhjVuzhg", 563 | "outputId": "04602d87-3175-45a9-d999-1b2c78508d11" 564 | }, 565 | "outputs": [ 566 | { 567 | "output_type": "stream", 568 | "name": "stdout", 569 | "text": [ 570 | "Pooja Vajpayee\n" 571 | ] 572 | } 573 | ], 574 | "source": [ 575 | "name= \"Pooja Vajpayee\"\n", 576 | "print(name)" 577 | ] 578 | }, 579 | { 580 | "cell_type": "code", 581 | "execution_count": null, 582 | "metadata": { 583 | "colab": { 584 | "base_uri": "https://localhost:8080/" 585 | }, 586 | "id": "9xa7Vl4bwNRz", 587 | "outputId": "648e3f65-d0c3-4069-aa05-514d623cb7b4" 588 | }, 589 | "outputs": [ 590 | { 591 | "output_type": "stream", 592 | "name": "stdout", 593 | "text": [ 594 | "POOJA VAJPAYEE\n" 595 | ] 596 | } 597 | ], 598 | "source": [ 599 | "print(name.upper())" 600 | ] 601 | }, 602 | { 603 | "cell_type": "code", 604 | "execution_count": null, 605 | "metadata": { 606 | "colab": { 607 | "base_uri": "https://localhost:8080/" 608 | }, 609 | "id": "0epvbOpqwSim", 610 | "outputId": "80ef6973-56cb-4d64-ba84-15be1fe216bc" 611 | }, 612 | "outputs": [ 613 | { 614 | "output_type": "stream", 615 | "name": "stdout", 616 | "text": [ 617 | "pooja vajpayee\n" 618 | ] 619 | } 620 | ], 621 | "source": [ 622 | "print(name.lower())" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": null, 628 | "metadata": { 629 | "colab": { 630 | "base_uri": "https://localhost:8080/" 631 | }, 632 | "id": "SopEoDvywfvj", 633 | "outputId": "7a2f3317-edb9-40de-c710-6dc40e72f910" 634 | }, 635 | "outputs": [ 636 | { 637 | "output_type": "stream", 638 | "name": "stdout", 639 | "text": [ 640 | "4\n" 641 | ] 642 | } 643 | ], 644 | "source": [ 645 | "print(name.find('a')) #return location starting from 0." 646 | ] 647 | }, 648 | { 649 | "cell_type": "code", 650 | "execution_count": null, 651 | "metadata": { 652 | "colab": { 653 | "base_uri": "https://localhost:8080/" 654 | }, 655 | "id": "VUu1sw3dzWZJ", 656 | "outputId": "69115bff-51b4-4948-a4ea-d13ca007ace8" 657 | }, 658 | "outputs": [ 659 | { 660 | "output_type": "stream", 661 | "name": "stdout", 662 | "text": [ 663 | "-1\n" 664 | ] 665 | } 666 | ], 667 | "source": [ 668 | "print(name.find('pooja'))" 669 | ] 670 | }, 671 | { 672 | "cell_type": "code", 673 | "execution_count": null, 674 | "metadata": { 675 | "colab": { 676 | "base_uri": "https://localhost:8080/" 677 | }, 678 | "id": "EoP5KzMkzaUR", 679 | "outputId": "92bb376b-8174-4528-bd25-c3a6fc846a66" 680 | }, 681 | "outputs": [ 682 | { 683 | "output_type": "stream", 684 | "name": "stdout", 685 | "text": [ 686 | "12\n" 687 | ] 688 | } 689 | ], 690 | "source": [ 691 | "print(name.find('e'))" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": null, 697 | "metadata": { 698 | "colab": { 699 | "base_uri": "https://localhost:8080/" 700 | }, 701 | "id": "obs9DWqpzNKL", 702 | "outputId": "819c56e0-143c-4e2b-eeec-354469b642a5" 703 | }, 704 | "outputs": [ 705 | { 706 | "output_type": "stream", 707 | "name": "stdout", 708 | "text": [ 709 | "6\n" 710 | ] 711 | } 712 | ], 713 | "source": [ 714 | "print(name.find('Vajpayee'))" 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": null, 720 | "metadata": { 721 | "colab": { 722 | "base_uri": "https://localhost:8080/" 723 | }, 724 | "id": "tSg8UQCNxcD5", 725 | "outputId": "8ce2cfa1-0cab-4c64-8135-d21b591f17b6" 726 | }, 727 | "outputs": [ 728 | { 729 | "output_type": "stream", 730 | "name": "stdout", 731 | "text": [ 732 | "Pooja Bajpai\n", 733 | "Pooja Vajpayee\n" 734 | ] 735 | } 736 | ], 737 | "source": [ 738 | "print(name.replace(\"Pooja Vajpayee\" , \"Pooja Bajpai\"))\n", 739 | "print(name)" 740 | ] 741 | }, 742 | { 743 | "cell_type": "code", 744 | "execution_count": null, 745 | "metadata": { 746 | "colab": { 747 | "base_uri": "https://localhost:8080/" 748 | }, 749 | "id": "BIk_A78Wxs8h", 750 | "outputId": "1ee6cf0d-01c0-4320-d57e-e56934db0678" 751 | }, 752 | "outputs": [ 753 | { 754 | "output_type": "stream", 755 | "name": "stdout", 756 | "text": [ 757 | "Dr. Pooja Vajpayee\n" 758 | ] 759 | } 760 | ], 761 | "source": [ 762 | "print(name.replace(\"Pooja\" , \"Dr. Pooja\"))" 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": null, 768 | "metadata": { 769 | "colab": { 770 | "base_uri": "https://localhost:8080/" 771 | }, 772 | "id": "_EvNWD08yF30", 773 | "outputId": "4083c054-139f-45d8-8409-2dff51af803b" 774 | }, 775 | "outputs": [ 776 | { 777 | "output_type": "stream", 778 | "name": "stdout", 779 | "text": [ 780 | "Pooja Bajpayee\n" 781 | ] 782 | } 783 | ], 784 | "source": [ 785 | "print(name.replace(\"V\" , \"B\"))" 786 | ] 787 | }, 788 | { 789 | "cell_type": "code", 790 | "execution_count": null, 791 | "metadata": { 792 | "colab": { 793 | "base_uri": "https://localhost:8080/" 794 | }, 795 | "id": "tX7LZ2aJyeYZ", 796 | "outputId": "8c01d796-31b3-4575-f191-f49ff965936f" 797 | }, 798 | "outputs": [ 799 | { 800 | "output_type": "stream", 801 | "name": "stdout", 802 | "text": [ 803 | "False\n" 804 | ] 805 | } 806 | ], 807 | "source": [ 808 | "print(\"t\" in name)" 809 | ] 810 | }, 811 | { 812 | "cell_type": "code", 813 | "execution_count": null, 814 | "metadata": { 815 | "colab": { 816 | "base_uri": "https://localhost:8080/" 817 | }, 818 | "id": "KmsG8l9hykFd", 819 | "outputId": "abdea27c-5c32-4701-a9f4-7a980aadb8c7" 820 | }, 821 | "outputs": [ 822 | { 823 | "output_type": "stream", 824 | "name": "stdout", 825 | "text": [ 826 | "True\n" 827 | ] 828 | } 829 | ], 830 | "source": [ 831 | "print(\"V\" in name)" 832 | ] 833 | } 834 | ], 835 | "metadata": { 836 | "colab": { 837 | "provenance": [], 838 | "authorship_tag": "ABX9TyPLehWswDaYMOEC/9Mqe3sJ", 839 | "include_colab_link": true 840 | }, 841 | "kernelspec": { 842 | "display_name": "Python 3", 843 | "name": "python3" 844 | }, 845 | "language_info": { 846 | "name": "python" 847 | } 848 | }, 849 | "nbformat": 4, 850 | "nbformat_minor": 0 851 | } -------------------------------------------------------------------------------- /Linear Regression - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/Linear Regression - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /Matpltlib - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/Matpltlib - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /Multiple Regression - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/Multiple Regression - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /Numpy -1 - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/Numpy -1 - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /Numpy -2 - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/Numpy -2 - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /Numpy-3 - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/Numpy-3 - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /Numpy-4 - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/Numpy-4 - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /PANDASONE - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/PANDASONE - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /PANDASTWO - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/PANDASTWO - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /Pandas CSV.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/Pandas CSV.pdf -------------------------------------------------------------------------------- /Pandas Series & Data Frame.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/Pandas Series & Data Frame.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data-Analytics 2 | Author : POOJA VAJPAYEE 3 | -------------------------------------------------------------------------------- /SEABORN - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/SEABORN - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /groupby - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/groupby - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /groupby.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "1b9c8b21", 6 | "metadata": {}, 7 | "source": [ 8 | "# groupby and aggregate function in panda¶" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "b568b77d", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "id": "1a88630d", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | "
FlavorBase FlavorLikedFlavor RatingTexture RatingTotal Rating
0Mint Chocolate ChipVanillaYes10.08.018.0
1ChocolateChocolateYes8.87.616.6
2VanillaVanillaNo4.75.09.7
3Cookie DoughVanillaYes6.96.513.4
4Rocky RoadChocolateYes8.27.015.2
5PistachioVanillaNo2.33.45.7
6Cake BatterVanillaYes6.56.012.5
7NeapolitanVanillaNo3.85.08.8
8Chocolte Fudge BrownieChocolateYes8.27.115.3
\n", 140 | "
" 141 | ], 142 | "text/plain": [ 143 | " Flavor Base Flavor Liked Flavor Rating Texture Rating \\\n", 144 | "0 Mint Chocolate Chip Vanilla Yes 10.0 8.0 \n", 145 | "1 Chocolate Chocolate Yes 8.8 7.6 \n", 146 | "2 Vanilla Vanilla No 4.7 5.0 \n", 147 | "3 Cookie Dough Vanilla Yes 6.9 6.5 \n", 148 | "4 Rocky Road Chocolate Yes 8.2 7.0 \n", 149 | "5 Pistachio Vanilla No 2.3 3.4 \n", 150 | "6 Cake Batter Vanilla Yes 6.5 6.0 \n", 151 | "7 Neapolitan Vanilla No 3.8 5.0 \n", 152 | "8 Chocolte Fudge Brownie Chocolate Yes 8.2 7.1 \n", 153 | "\n", 154 | " Total Rating \n", 155 | "0 18.0 \n", 156 | "1 16.6 \n", 157 | "2 9.7 \n", 158 | "3 13.4 \n", 159 | "4 15.2 \n", 160 | "5 5.7 \n", 161 | "6 12.5 \n", 162 | "7 8.8 \n", 163 | "8 15.3 " 164 | ] 165 | }, 166 | "execution_count": 2, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "df = pd.read_csv('C:\\\\Users\\\\Asus\\\\Downloads\\\\Flavors.csv')\n", 173 | "df" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 3, 179 | "id": "5035ce95", 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/plain": [ 185 | "" 186 | ] 187 | }, 188 | "execution_count": 3, 189 | "metadata": {}, 190 | "output_type": "execute_result" 191 | } 192 | ], 193 | "source": [ 194 | "df.groupby('Base Flavor')" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 4, 200 | "id": "cadaaeec", 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "df1=df.groupby('Base Flavor')\n", 205 | "\n" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 5, 211 | "id": "24310f83", 212 | "metadata": {}, 213 | "outputs": [ 214 | { 215 | "name": "stderr", 216 | "output_type": "stream", 217 | "text": [ 218 | "C:\\Users\\Asus\\AppData\\Local\\Temp\\ipykernel_23396\\2053335143.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n", 219 | " df1.mean()\n" 220 | ] 221 | }, 222 | { 223 | "data": { 224 | "text/html": [ 225 | "
\n", 226 | "\n", 239 | "\n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | "
Flavor RatingTexture RatingTotal Rating
Base Flavor
Chocolate8.47.23333315.70
Vanilla5.75.65000011.35
\n", 269 | "
" 270 | ], 271 | "text/plain": [ 272 | " Flavor Rating Texture Rating Total Rating\n", 273 | "Base Flavor \n", 274 | "Chocolate 8.4 7.233333 15.70\n", 275 | "Vanilla 5.7 5.650000 11.35" 276 | ] 277 | }, 278 | "execution_count": 5, 279 | "metadata": {}, 280 | "output_type": "execute_result" 281 | } 282 | ], 283 | "source": [ 284 | "df1.mean()" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 6, 290 | "id": "a0834186", 291 | "metadata": {}, 292 | "outputs": [ 293 | { 294 | "name": "stderr", 295 | "output_type": "stream", 296 | "text": [ 297 | "C:\\Users\\Asus\\AppData\\Local\\Temp\\ipykernel_23396\\2409314591.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n", 298 | " df.groupby('Base Flavor').mean()\n" 299 | ] 300 | }, 301 | { 302 | "data": { 303 | "text/html": [ 304 | "
\n", 305 | "\n", 318 | "\n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | "
Flavor RatingTexture RatingTotal Rating
Base Flavor
Chocolate8.47.23333315.70
Vanilla5.75.65000011.35
\n", 348 | "
" 349 | ], 350 | "text/plain": [ 351 | " Flavor Rating Texture Rating Total Rating\n", 352 | "Base Flavor \n", 353 | "Chocolate 8.4 7.233333 15.70\n", 354 | "Vanilla 5.7 5.650000 11.35" 355 | ] 356 | }, 357 | "execution_count": 6, 358 | "metadata": {}, 359 | "output_type": "execute_result" 360 | } 361 | ], 362 | "source": [ 363 | "df.groupby('Base Flavor').mean()" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": 7, 369 | "id": "547d6a56", 370 | "metadata": {}, 371 | "outputs": [ 372 | { 373 | "data": { 374 | "text/html": [ 375 | "
\n", 376 | "\n", 389 | "\n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | "
FlavorLikedFlavor RatingTexture RatingTotal Rating
Base Flavor
Chocolate33333
Vanilla66666
\n", 427 | "
" 428 | ], 429 | "text/plain": [ 430 | " Flavor Liked Flavor Rating Texture Rating Total Rating\n", 431 | "Base Flavor \n", 432 | "Chocolate 3 3 3 3 3\n", 433 | "Vanilla 6 6 6 6 6" 434 | ] 435 | }, 436 | "execution_count": 7, 437 | "metadata": {}, 438 | "output_type": "execute_result" 439 | } 440 | ], 441 | "source": [ 442 | "df.groupby('Base Flavor').count()" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 8, 448 | "id": "275f0291", 449 | "metadata": {}, 450 | "outputs": [ 451 | { 452 | "data": { 453 | "text/html": [ 454 | "
\n", 455 | "\n", 468 | "\n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | "
FlavorLikedFlavor RatingTexture RatingTotal Rating
Base Flavor
ChocolateChocolateYes8.27.015.2
VanillaCake BatterNo2.33.45.7
\n", 506 | "
" 507 | ], 508 | "text/plain": [ 509 | " Flavor Liked Flavor Rating Texture Rating Total Rating\n", 510 | "Base Flavor \n", 511 | "Chocolate Chocolate Yes 8.2 7.0 15.2\n", 512 | "Vanilla Cake Batter No 2.3 3.4 5.7" 513 | ] 514 | }, 515 | "execution_count": 8, 516 | "metadata": {}, 517 | "output_type": "execute_result" 518 | } 519 | ], 520 | "source": [ 521 | "df.groupby('Base Flavor').min()" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": 9, 527 | "id": "45ed0109", 528 | "metadata": {}, 529 | "outputs": [ 530 | { 531 | "data": { 532 | "text/html": [ 533 | "
\n", 534 | "\n", 547 | "\n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | "
FlavorLikedFlavor RatingTexture RatingTotal Rating
Base Flavor
ChocolateRocky RoadYes8.87.616.6
VanillaVanillaYes10.08.018.0
\n", 585 | "
" 586 | ], 587 | "text/plain": [ 588 | " Flavor Liked Flavor Rating Texture Rating Total Rating\n", 589 | "Base Flavor \n", 590 | "Chocolate Rocky Road Yes 8.8 7.6 16.6\n", 591 | "Vanilla Vanilla Yes 10.0 8.0 18.0" 592 | ] 593 | }, 594 | "execution_count": 9, 595 | "metadata": {}, 596 | "output_type": "execute_result" 597 | } 598 | ], 599 | "source": [ 600 | "df.groupby('Base Flavor').max()" 601 | ] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "execution_count": 10, 606 | "id": "7238b0c5", 607 | "metadata": {}, 608 | "outputs": [ 609 | { 610 | "name": "stderr", 611 | "output_type": "stream", 612 | "text": [ 613 | "C:\\Users\\Asus\\AppData\\Local\\Temp\\ipykernel_23396\\1221781544.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n", 614 | " df.groupby('Base Flavor').sum()\n" 615 | ] 616 | }, 617 | { 618 | "data": { 619 | "text/html": [ 620 | "
\n", 621 | "\n", 634 | "\n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | "
Flavor RatingTexture RatingTotal Rating
Base Flavor
Chocolate25.221.747.1
Vanilla34.233.968.1
\n", 664 | "
" 665 | ], 666 | "text/plain": [ 667 | " Flavor Rating Texture Rating Total Rating\n", 668 | "Base Flavor \n", 669 | "Chocolate 25.2 21.7 47.1\n", 670 | "Vanilla 34.2 33.9 68.1" 671 | ] 672 | }, 673 | "execution_count": 10, 674 | "metadata": {}, 675 | "output_type": "execute_result" 676 | } 677 | ], 678 | "source": [ 679 | "df.groupby('Base Flavor').sum()" 680 | ] 681 | }, 682 | { 683 | "cell_type": "code", 684 | "execution_count": 11, 685 | "id": "ff7756a2", 686 | "metadata": {}, 687 | "outputs": [ 688 | { 689 | "data": { 690 | "text/html": [ 691 | "
\n", 692 | "\n", 709 | "\n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | "
Flavor RatingTexture Rating
meanmaxcountsummeanmaxcountsum
Base Flavor
Chocolate8.48.8325.27.2333337.6321.7
Vanilla5.710.0634.25.6500008.0633.9
\n", 764 | "
" 765 | ], 766 | "text/plain": [ 767 | " Flavor Rating Texture Rating \n", 768 | " mean max count sum mean max count sum\n", 769 | "Base Flavor \n", 770 | "Chocolate 8.4 8.8 3 25.2 7.233333 7.6 3 21.7\n", 771 | "Vanilla 5.7 10.0 6 34.2 5.650000 8.0 6 33.9" 772 | ] 773 | }, 774 | "execution_count": 11, 775 | "metadata": {}, 776 | "output_type": "execute_result" 777 | } 778 | ], 779 | "source": [ 780 | "df.groupby('Base Flavor').agg({'Flavor Rating': ['mean','max','count','sum'], 'Texture Rating':['mean','max','count','sum'] })" 781 | ] 782 | }, 783 | { 784 | "cell_type": "code", 785 | "execution_count": 12, 786 | "id": "442da699", 787 | "metadata": {}, 788 | "outputs": [ 789 | { 790 | "name": "stderr", 791 | "output_type": "stream", 792 | "text": [ 793 | "C:\\Users\\Asus\\AppData\\Local\\Temp\\ipykernel_23396\\2854014148.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n", 794 | " df.groupby(['Base Flavor','Liked']).mean()\n" 795 | ] 796 | }, 797 | { 798 | "data": { 799 | "text/html": [ 800 | "
\n", 801 | "\n", 814 | "\n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | "
Flavor RatingTexture RatingTotal Rating
Base FlavorLiked
ChocolateYes8.47.23333315.700000
VanillaNo3.64.4666678.066667
Yes7.86.83333314.633333
\n", 854 | "
" 855 | ], 856 | "text/plain": [ 857 | " Flavor Rating Texture Rating Total Rating\n", 858 | "Base Flavor Liked \n", 859 | "Chocolate Yes 8.4 7.233333 15.700000\n", 860 | "Vanilla No 3.6 4.466667 8.066667\n", 861 | " Yes 7.8 6.833333 14.633333" 862 | ] 863 | }, 864 | "execution_count": 12, 865 | "metadata": {}, 866 | "output_type": "execute_result" 867 | } 868 | ], 869 | "source": [ 870 | "df.groupby(['Base Flavor','Liked']).mean()" 871 | ] 872 | }, 873 | { 874 | "cell_type": "code", 875 | "execution_count": 13, 876 | "id": "a33c6967", 877 | "metadata": {}, 878 | "outputs": [ 879 | { 880 | "data": { 881 | "text/html": [ 882 | "
\n", 883 | "\n", 900 | "\n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | "
Flavor Rating
meanmaxmincountsum
Base FlavorLiked
ChocolateYes8.48.88.2325.2
VanillaNo3.64.72.3310.8
Yes7.810.06.5323.4
\n", 955 | "
" 956 | ], 957 | "text/plain": [ 958 | " Flavor Rating \n", 959 | " mean max min count sum\n", 960 | "Base Flavor Liked \n", 961 | "Chocolate Yes 8.4 8.8 8.2 3 25.2\n", 962 | "Vanilla No 3.6 4.7 2.3 3 10.8\n", 963 | " Yes 7.8 10.0 6.5 3 23.4" 964 | ] 965 | }, 966 | "execution_count": 13, 967 | "metadata": {}, 968 | "output_type": "execute_result" 969 | } 970 | ], 971 | "source": [ 972 | "df.groupby(['Base Flavor','Liked']).agg({'Flavor Rating': ['mean','max','min','count','sum']})" 973 | ] 974 | }, 975 | { 976 | "cell_type": "code", 977 | "execution_count": 14, 978 | "id": "9a7cdd4f", 979 | "metadata": {}, 980 | "outputs": [ 981 | { 982 | "data": { 983 | "text/html": [ 984 | "
\n", 985 | "\n", 1002 | "\n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | "
Flavor RatingTexture RatingTotal Rating
countmeanstdmin25%50%75%maxcountmean...75%maxcountmeanstdmin25%50%75%max
Base Flavor
Chocolate3.08.40.3464108.28.2008.28.58.83.07.233333...7.3507.63.015.700.78102515.215.25015.315.95016.6
Vanilla6.05.72.7107192.34.0255.66.810.06.05.650000...6.3758.06.011.354.2636845.79.02511.113.17518.0
\n", 1110 | "

2 rows × 24 columns

\n", 1111 | "
" 1112 | ], 1113 | "text/plain": [ 1114 | " Flavor Rating \\\n", 1115 | " count mean std min 25% 50% 75% max \n", 1116 | "Base Flavor \n", 1117 | "Chocolate 3.0 8.4 0.346410 8.2 8.200 8.2 8.5 8.8 \n", 1118 | "Vanilla 6.0 5.7 2.710719 2.3 4.025 5.6 6.8 10.0 \n", 1119 | "\n", 1120 | " Texture Rating ... Total Rating \\\n", 1121 | " count mean ... 75% max count mean \n", 1122 | "Base Flavor ... \n", 1123 | "Chocolate 3.0 7.233333 ... 7.350 7.6 3.0 15.70 \n", 1124 | "Vanilla 6.0 5.650000 ... 6.375 8.0 6.0 11.35 \n", 1125 | "\n", 1126 | " \n", 1127 | " std min 25% 50% 75% max \n", 1128 | "Base Flavor \n", 1129 | "Chocolate 0.781025 15.2 15.250 15.3 15.950 16.6 \n", 1130 | "Vanilla 4.263684 5.7 9.025 11.1 13.175 18.0 \n", 1131 | "\n", 1132 | "[2 rows x 24 columns]" 1133 | ] 1134 | }, 1135 | "execution_count": 14, 1136 | "metadata": {}, 1137 | "output_type": "execute_result" 1138 | } 1139 | ], 1140 | "source": [ 1141 | "df.groupby('Base Flavor').describe()" 1142 | ] 1143 | }, 1144 | { 1145 | "cell_type": "code", 1146 | "execution_count": 15, 1147 | "id": "09a5fbc5", 1148 | "metadata": {}, 1149 | "outputs": [ 1150 | { 1151 | "name": "stderr", 1152 | "output_type": "stream", 1153 | "text": [ 1154 | "C:\\Users\\Asus\\AppData\\Local\\Temp\\ipykernel_23396\\2855992873.py:1: FutureWarning: ['Flavor', 'Base Flavor', 'Liked'] did not aggregate successfully. If any error is raised this will raise in a future version of pandas. Drop these columns/ops to avoid this warning.\n", 1155 | " df.aggregate(['mean'])\n" 1156 | ] 1157 | }, 1158 | { 1159 | "data": { 1160 | "text/html": [ 1161 | "
\n", 1162 | "\n", 1175 | "\n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | "
Flavor RatingTexture RatingTotal Rating
mean6.66.17777812.8
\n", 1193 | "
" 1194 | ], 1195 | "text/plain": [ 1196 | " Flavor Rating Texture Rating Total Rating\n", 1197 | "mean 6.6 6.177778 12.8" 1198 | ] 1199 | }, 1200 | "execution_count": 15, 1201 | "metadata": {}, 1202 | "output_type": "execute_result" 1203 | } 1204 | ], 1205 | "source": [ 1206 | "df.aggregate(['mean'])" 1207 | ] 1208 | }, 1209 | { 1210 | "cell_type": "code", 1211 | "execution_count": null, 1212 | "id": "9a86a6ca", 1213 | "metadata": {}, 1214 | "outputs": [], 1215 | "source": [] 1216 | }, 1217 | { 1218 | "cell_type": "code", 1219 | "execution_count": null, 1220 | "id": "dfcc0745", 1221 | "metadata": {}, 1222 | "outputs": [], 1223 | "source": [] 1224 | }, 1225 | { 1226 | "cell_type": "code", 1227 | "execution_count": null, 1228 | "id": "2a7b1762", 1229 | "metadata": {}, 1230 | "outputs": [], 1231 | "source": [] 1232 | } 1233 | ], 1234 | "metadata": { 1235 | "kernelspec": { 1236 | "display_name": "Python 3 (ipykernel)", 1237 | "language": "python", 1238 | "name": "python3" 1239 | }, 1240 | "language_info": { 1241 | "codemirror_mode": { 1242 | "name": "ipython", 1243 | "version": 3 1244 | }, 1245 | "file_extension": ".py", 1246 | "mimetype": "text/x-python", 1247 | "name": "python", 1248 | "nbconvert_exporter": "python", 1249 | "pygments_lexer": "ipython3", 1250 | "version": "3.10.9" 1251 | } 1252 | }, 1253 | "nbformat": 4, 1254 | "nbformat_minor": 5 1255 | } 1256 | -------------------------------------------------------------------------------- /groupby1 - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/groupby1 - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /handling missing data - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/handling missing data - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /homeprices.csv: -------------------------------------------------------------------------------- 1 | area,price 2 | 2600,550000 3 | 3000,565000 4 | 3200,610000 5 | 3600,680000 6 | 4000,725000 7 | -------------------------------------------------------------------------------- /income.csv: -------------------------------------------------------------------------------- 1 | Name,Age,Income(Rs.) 2 | barjraj,27,70000 3 | ramdin verma,29,90000 4 | sharat chandran,29,61000 5 | birender mandal,28,60000 6 | amit,42,150000 7 | kushal,39,155000 8 | kasid,41,160000 9 | shiv prakash,38,162000 10 | vikram singh,36,156000 11 | sanjay,35,130000 12 | abhi,37,137000 13 | ram dutt gupta,26,45000 14 | khadak singh,27,48000 15 | gurmit singh,28,51000 16 | chanderpal,29,49500 17 | aman,32,53000 18 | khursid,40,65000 19 | rajeev,41,63000 20 | durgesh,43,64000 21 | nahar singh,39,80000 22 | ram kumar,41,82000 23 | sunder paal,39,58000 24 | maansingh aswal,, 25 | -------------------------------------------------------------------------------- /min -max-Normalization - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/min -max-Normalization - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /multiple .homeprices.csv: -------------------------------------------------------------------------------- 1 | area,bedrooms,age,price 2 | 2600,3,20,550000 3 | 3000,4,15,565000 4 | 3200,,18,610000 5 | 3600,3,30,595000 6 | 4000,5,8,760000 7 | 4100,6,8,810000 8 | -------------------------------------------------------------------------------- /naive_bayes - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/naive_bayes - Jupyter Notebook.pdf -------------------------------------------------------------------------------- /salaries.csv: -------------------------------------------------------------------------------- 1 | company,job,degree,salary_more_then_100k 2 | google,sales executive,bachelors,0 3 | google,sales executive,masters,0 4 | google,business manager,bachelors,1 5 | google,business manager,masters,1 6 | google,computer programmer,bachelors,0 7 | google,computer programmer,masters,1 8 | abc pharma,sales executive,masters,0 9 | abc pharma,computer programmer,bachelors,0 10 | abc pharma,business manager,bachelors,0 11 | abc pharma,business manager,masters,1 12 | facebook,sales executive,bachelors,1 13 | facebook,sales executive,masters,1 14 | facebook,business manager,bachelors,1 15 | facebook,business manager,masters,1 16 | facebook,computer programmer,bachelors,1 17 | facebook,computer programmer,masters,1 -------------------------------------------------------------------------------- /training-testing split - Jupyter Notebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poojavjpy/Data-Analytics/0d965cc93ab906ec8d04ed7daac99589569c0c5e/training-testing split - Jupyter Notebook.pdf --------------------------------------------------------------------------------