├── Data cleaning using Python └── README.md /Data cleaning using Python: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "3631d4f9", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 3, 16 | "id": "85c18b1c", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import matplotlib.pyplot as plt" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 4, 26 | "id": "0f16a430", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "data=pd.read_csv(r\"D:\\dataset for ML\\Employee.csv\")" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 5, 36 | "id": "68f1e690", 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/html": [ 42 | "
\n", 43 | "\n", 56 | "\n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | "
EducationJoiningYearCityPaymentTierAgeGenderEverBenchedExperienceInCurrentDomainLeaveOrNot
0Bachelors2017Bangalore334MaleNo00
1Bachelors2013Pune128FemaleNo31
2Bachelors2014New Delhi338FemaleNo20
3Masters2016Bangalore327MaleNo51
4Masters2017Pune324MaleYes21
..............................
4648Bachelors2013Bangalore326FemaleNo40
4649Masters2013Pune237MaleNo21
4650Masters2018New Delhi327MaleNo51
4651Bachelors2012Bangalore330MaleYes20
4652Bachelors2015Bangalore333MaleYes40
\n", 206 | "

4653 rows × 9 columns

\n", 207 | "
" 208 | ], 209 | "text/plain": [ 210 | " Education JoiningYear City PaymentTier Age Gender EverBenched \\\n", 211 | "0 Bachelors 2017 Bangalore 3 34 Male No \n", 212 | "1 Bachelors 2013 Pune 1 28 Female No \n", 213 | "2 Bachelors 2014 New Delhi 3 38 Female No \n", 214 | "3 Masters 2016 Bangalore 3 27 Male No \n", 215 | "4 Masters 2017 Pune 3 24 Male Yes \n", 216 | "... ... ... ... ... ... ... ... \n", 217 | "4648 Bachelors 2013 Bangalore 3 26 Female No \n", 218 | "4649 Masters 2013 Pune 2 37 Male No \n", 219 | "4650 Masters 2018 New Delhi 3 27 Male No \n", 220 | "4651 Bachelors 2012 Bangalore 3 30 Male Yes \n", 221 | "4652 Bachelors 2015 Bangalore 3 33 Male Yes \n", 222 | "\n", 223 | " ExperienceInCurrentDomain LeaveOrNot \n", 224 | "0 0 0 \n", 225 | "1 3 1 \n", 226 | "2 2 0 \n", 227 | "3 5 1 \n", 228 | "4 2 1 \n", 229 | "... ... ... \n", 230 | "4648 4 0 \n", 231 | "4649 2 1 \n", 232 | "4650 5 1 \n", 233 | "4651 2 0 \n", 234 | "4652 4 0 \n", 235 | "\n", 236 | "[4653 rows x 9 columns]" 237 | ] 238 | }, 239 | "execution_count": 5, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | } 243 | ], 244 | "source": [ 245 | "data" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 6, 251 | "id": "7e30784e", 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "data=data.dropna()" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 7, 261 | "id": "c60e9f00", 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "data": { 266 | "text/html": [ 267 | "
\n", 268 | "\n", 281 | "\n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | "
EducationJoiningYearCityPaymentTierAgeGenderEverBenchedExperienceInCurrentDomainLeaveOrNot
0Bachelors2017Bangalore334MaleNo00
1Bachelors2013Pune128FemaleNo31
2Bachelors2014New Delhi338FemaleNo20
3Masters2016Bangalore327MaleNo51
4Masters2017Pune324MaleYes21
..............................
4648Bachelors2013Bangalore326FemaleNo40
4649Masters2013Pune237MaleNo21
4650Masters2018New Delhi327MaleNo51
4651Bachelors2012Bangalore330MaleYes20
4652Bachelors2015Bangalore333MaleYes40
\n", 431 | "

4653 rows × 9 columns

\n", 432 | "
" 433 | ], 434 | "text/plain": [ 435 | " Education JoiningYear City PaymentTier Age Gender EverBenched \\\n", 436 | "0 Bachelors 2017 Bangalore 3 34 Male No \n", 437 | "1 Bachelors 2013 Pune 1 28 Female No \n", 438 | "2 Bachelors 2014 New Delhi 3 38 Female No \n", 439 | "3 Masters 2016 Bangalore 3 27 Male No \n", 440 | "4 Masters 2017 Pune 3 24 Male Yes \n", 441 | "... ... ... ... ... ... ... ... \n", 442 | "4648 Bachelors 2013 Bangalore 3 26 Female No \n", 443 | "4649 Masters 2013 Pune 2 37 Male No \n", 444 | "4650 Masters 2018 New Delhi 3 27 Male No \n", 445 | "4651 Bachelors 2012 Bangalore 3 30 Male Yes \n", 446 | "4652 Bachelors 2015 Bangalore 3 33 Male Yes \n", 447 | "\n", 448 | " ExperienceInCurrentDomain LeaveOrNot \n", 449 | "0 0 0 \n", 450 | "1 3 1 \n", 451 | "2 2 0 \n", 452 | "3 5 1 \n", 453 | "4 2 1 \n", 454 | "... ... ... \n", 455 | "4648 4 0 \n", 456 | "4649 2 1 \n", 457 | "4650 5 1 \n", 458 | "4651 2 0 \n", 459 | "4652 4 0 \n", 460 | "\n", 461 | "[4653 rows x 9 columns]" 462 | ] 463 | }, 464 | "execution_count": 7, 465 | "metadata": {}, 466 | "output_type": "execute_result" 467 | } 468 | ], 469 | "source": [ 470 | "data" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": 8, 476 | "id": "755c3d9d", 477 | "metadata": {}, 478 | "outputs": [], 479 | "source": [ 480 | "data=data.fillna(2)" 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": 9, 486 | "id": "697267b1", 487 | "metadata": {}, 488 | "outputs": [ 489 | { 490 | "data": { 491 | "text/html": [ 492 | "
\n", 493 | "\n", 506 | "\n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | "
EducationJoiningYearCityPaymentTierAgeGenderEverBenchedExperienceInCurrentDomainLeaveOrNot
0Bachelors2017Bangalore334MaleNo00
1Bachelors2013Pune128FemaleNo31
2Bachelors2014New Delhi338FemaleNo20
3Masters2016Bangalore327MaleNo51
4Masters2017Pune324MaleYes21
..............................
4648Bachelors2013Bangalore326FemaleNo40
4649Masters2013Pune237MaleNo21
4650Masters2018New Delhi327MaleNo51
4651Bachelors2012Bangalore330MaleYes20
4652Bachelors2015Bangalore333MaleYes40
\n", 656 | "

4653 rows × 9 columns

\n", 657 | "
" 658 | ], 659 | "text/plain": [ 660 | " Education JoiningYear City PaymentTier Age Gender EverBenched \\\n", 661 | "0 Bachelors 2017 Bangalore 3 34 Male No \n", 662 | "1 Bachelors 2013 Pune 1 28 Female No \n", 663 | "2 Bachelors 2014 New Delhi 3 38 Female No \n", 664 | "3 Masters 2016 Bangalore 3 27 Male No \n", 665 | "4 Masters 2017 Pune 3 24 Male Yes \n", 666 | "... ... ... ... ... ... ... ... \n", 667 | "4648 Bachelors 2013 Bangalore 3 26 Female No \n", 668 | "4649 Masters 2013 Pune 2 37 Male No \n", 669 | "4650 Masters 2018 New Delhi 3 27 Male No \n", 670 | "4651 Bachelors 2012 Bangalore 3 30 Male Yes \n", 671 | "4652 Bachelors 2015 Bangalore 3 33 Male Yes \n", 672 | "\n", 673 | " ExperienceInCurrentDomain LeaveOrNot \n", 674 | "0 0 0 \n", 675 | "1 3 1 \n", 676 | "2 2 0 \n", 677 | "3 5 1 \n", 678 | "4 2 1 \n", 679 | "... ... ... \n", 680 | "4648 4 0 \n", 681 | "4649 2 1 \n", 682 | "4650 5 1 \n", 683 | "4651 2 0 \n", 684 | "4652 4 0 \n", 685 | "\n", 686 | "[4653 rows x 9 columns]" 687 | ] 688 | }, 689 | "execution_count": 9, 690 | "metadata": {}, 691 | "output_type": "execute_result" 692 | } 693 | ], 694 | "source": [ 695 | "data" 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "execution_count": 10, 701 | "id": "21ad4911", 702 | "metadata": {}, 703 | "outputs": [], 704 | "source": [ 705 | "pay=data['PaymentTier']" 706 | ] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "execution_count": 11, 711 | "id": "dc55b7d2", 712 | "metadata": {}, 713 | "outputs": [ 714 | { 715 | "data": { 716 | "text/plain": [ 717 | "0 3\n", 718 | "1 1\n", 719 | "2 3\n", 720 | "3 3\n", 721 | "4 3\n", 722 | " ..\n", 723 | "4648 3\n", 724 | "4649 2\n", 725 | "4650 3\n", 726 | "4651 3\n", 727 | "4652 3\n", 728 | "Name: PaymentTier, Length: 4653, dtype: int64" 729 | ] 730 | }, 731 | "execution_count": 11, 732 | "metadata": {}, 733 | "output_type": "execute_result" 734 | } 735 | ], 736 | "source": [ 737 | "pay" 738 | ] 739 | }, 740 | { 741 | "cell_type": "code", 742 | "execution_count": 18, 743 | "id": "ae905dd1", 744 | "metadata": {}, 745 | "outputs": [], 746 | "source": [ 747 | "age=data['Age']" 748 | ] 749 | }, 750 | { 751 | "cell_type": "code", 752 | "execution_count": 19, 753 | "id": "831979df", 754 | "metadata": {}, 755 | "outputs": [ 756 | { 757 | "data": { 758 | "text/plain": [ 759 | "0 34\n", 760 | "1 28\n", 761 | "2 38\n", 762 | "3 27\n", 763 | "4 24\n", 764 | " ..\n", 765 | "4648 26\n", 766 | "4649 37\n", 767 | "4650 27\n", 768 | "4651 30\n", 769 | "4652 33\n", 770 | "Name: Age, Length: 4653, dtype: int64" 771 | ] 772 | }, 773 | "execution_count": 19, 774 | "metadata": {}, 775 | "output_type": "execute_result" 776 | } 777 | ], 778 | "source": [ 779 | "age" 780 | ] 781 | }, 782 | { 783 | "cell_type": "code", 784 | "execution_count": 26, 785 | "id": "3b693527", 786 | "metadata": {}, 787 | "outputs": [ 788 | { 789 | "data": { 790 | "image/png": "", 791 | "text/plain": [ 792 | "
" 793 | ] 794 | }, 795 | "metadata": {}, 796 | "output_type": "display_data" 797 | } 798 | ], 799 | "source": [ 800 | "plt.bar(pay,age)\n", 801 | "plt.xlabel(\"Employe\")\n", 802 | "plt.ylabel(\"Growth of employe\")\n", 803 | "plt.title(\"Employe's salary tire growth\")\n", 804 | "plt.show()" 805 | ] 806 | }, 807 | { 808 | "cell_type": "code", 809 | "execution_count": null, 810 | "id": "31cb7e9c", 811 | "metadata": {}, 812 | "outputs": [], 813 | "source": [] 814 | } 815 | ], 816 | "metadata": { 817 | "kernelspec": { 818 | "display_name": "Python 3 (ipykernel)", 819 | "language": "python", 820 | "name": "python3" 821 | }, 822 | "language_info": { 823 | "codemirror_mode": { 824 | "name": "ipython", 825 | "version": 3 826 | }, 827 | "file_extension": ".py", 828 | "mimetype": "text/x-python", 829 | "name": "python", 830 | "nbconvert_exporter": "python", 831 | "pygments_lexer": "ipython3", 832 | "version": "3.11.5" 833 | } 834 | }, 835 | "nbformat": 4, 836 | "nbformat_minor": 5 837 | } 838 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data-Cleaning-using-Python 2 | This part consist of the Data cleaning which includes the removing the null values and replacing the some values instead of the null values and making it a cleaned data set 3 | --------------------------------------------------------------------------------