├── Data cleaning using Python
└── README.md
/Data cleaning using Python:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "3631d4f9",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 3,
16 | "id": "85c18b1c",
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "import matplotlib.pyplot as plt"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 4,
26 | "id": "0f16a430",
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "data=pd.read_csv(r\"D:\\dataset for ML\\Employee.csv\")"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 5,
36 | "id": "68f1e690",
37 | "metadata": {},
38 | "outputs": [
39 | {
40 | "data": {
41 | "text/html": [
42 | "
\n",
43 | "\n",
56 | "
\n",
57 | " \n",
58 | " \n",
59 | " | \n",
60 | " Education | \n",
61 | " JoiningYear | \n",
62 | " City | \n",
63 | " PaymentTier | \n",
64 | " Age | \n",
65 | " Gender | \n",
66 | " EverBenched | \n",
67 | " ExperienceInCurrentDomain | \n",
68 | " LeaveOrNot | \n",
69 | "
\n",
70 | " \n",
71 | " \n",
72 | " \n",
73 | " 0 | \n",
74 | " Bachelors | \n",
75 | " 2017 | \n",
76 | " Bangalore | \n",
77 | " 3 | \n",
78 | " 34 | \n",
79 | " Male | \n",
80 | " No | \n",
81 | " 0 | \n",
82 | " 0 | \n",
83 | "
\n",
84 | " \n",
85 | " 1 | \n",
86 | " Bachelors | \n",
87 | " 2013 | \n",
88 | " Pune | \n",
89 | " 1 | \n",
90 | " 28 | \n",
91 | " Female | \n",
92 | " No | \n",
93 | " 3 | \n",
94 | " 1 | \n",
95 | "
\n",
96 | " \n",
97 | " 2 | \n",
98 | " Bachelors | \n",
99 | " 2014 | \n",
100 | " New Delhi | \n",
101 | " 3 | \n",
102 | " 38 | \n",
103 | " Female | \n",
104 | " No | \n",
105 | " 2 | \n",
106 | " 0 | \n",
107 | "
\n",
108 | " \n",
109 | " 3 | \n",
110 | " Masters | \n",
111 | " 2016 | \n",
112 | " Bangalore | \n",
113 | " 3 | \n",
114 | " 27 | \n",
115 | " Male | \n",
116 | " No | \n",
117 | " 5 | \n",
118 | " 1 | \n",
119 | "
\n",
120 | " \n",
121 | " 4 | \n",
122 | " Masters | \n",
123 | " 2017 | \n",
124 | " Pune | \n",
125 | " 3 | \n",
126 | " 24 | \n",
127 | " Male | \n",
128 | " Yes | \n",
129 | " 2 | \n",
130 | " 1 | \n",
131 | "
\n",
132 | " \n",
133 | " ... | \n",
134 | " ... | \n",
135 | " ... | \n",
136 | " ... | \n",
137 | " ... | \n",
138 | " ... | \n",
139 | " ... | \n",
140 | " ... | \n",
141 | " ... | \n",
142 | " ... | \n",
143 | "
\n",
144 | " \n",
145 | " 4648 | \n",
146 | " Bachelors | \n",
147 | " 2013 | \n",
148 | " Bangalore | \n",
149 | " 3 | \n",
150 | " 26 | \n",
151 | " Female | \n",
152 | " No | \n",
153 | " 4 | \n",
154 | " 0 | \n",
155 | "
\n",
156 | " \n",
157 | " 4649 | \n",
158 | " Masters | \n",
159 | " 2013 | \n",
160 | " Pune | \n",
161 | " 2 | \n",
162 | " 37 | \n",
163 | " Male | \n",
164 | " No | \n",
165 | " 2 | \n",
166 | " 1 | \n",
167 | "
\n",
168 | " \n",
169 | " 4650 | \n",
170 | " Masters | \n",
171 | " 2018 | \n",
172 | " New Delhi | \n",
173 | " 3 | \n",
174 | " 27 | \n",
175 | " Male | \n",
176 | " No | \n",
177 | " 5 | \n",
178 | " 1 | \n",
179 | "
\n",
180 | " \n",
181 | " 4651 | \n",
182 | " Bachelors | \n",
183 | " 2012 | \n",
184 | " Bangalore | \n",
185 | " 3 | \n",
186 | " 30 | \n",
187 | " Male | \n",
188 | " Yes | \n",
189 | " 2 | \n",
190 | " 0 | \n",
191 | "
\n",
192 | " \n",
193 | " 4652 | \n",
194 | " Bachelors | \n",
195 | " 2015 | \n",
196 | " Bangalore | \n",
197 | " 3 | \n",
198 | " 33 | \n",
199 | " Male | \n",
200 | " Yes | \n",
201 | " 4 | \n",
202 | " 0 | \n",
203 | "
\n",
204 | " \n",
205 | "
\n",
206 | "
4653 rows × 9 columns
\n",
207 | "
"
208 | ],
209 | "text/plain": [
210 | " Education JoiningYear City PaymentTier Age Gender EverBenched \\\n",
211 | "0 Bachelors 2017 Bangalore 3 34 Male No \n",
212 | "1 Bachelors 2013 Pune 1 28 Female No \n",
213 | "2 Bachelors 2014 New Delhi 3 38 Female No \n",
214 | "3 Masters 2016 Bangalore 3 27 Male No \n",
215 | "4 Masters 2017 Pune 3 24 Male Yes \n",
216 | "... ... ... ... ... ... ... ... \n",
217 | "4648 Bachelors 2013 Bangalore 3 26 Female No \n",
218 | "4649 Masters 2013 Pune 2 37 Male No \n",
219 | "4650 Masters 2018 New Delhi 3 27 Male No \n",
220 | "4651 Bachelors 2012 Bangalore 3 30 Male Yes \n",
221 | "4652 Bachelors 2015 Bangalore 3 33 Male Yes \n",
222 | "\n",
223 | " ExperienceInCurrentDomain LeaveOrNot \n",
224 | "0 0 0 \n",
225 | "1 3 1 \n",
226 | "2 2 0 \n",
227 | "3 5 1 \n",
228 | "4 2 1 \n",
229 | "... ... ... \n",
230 | "4648 4 0 \n",
231 | "4649 2 1 \n",
232 | "4650 5 1 \n",
233 | "4651 2 0 \n",
234 | "4652 4 0 \n",
235 | "\n",
236 | "[4653 rows x 9 columns]"
237 | ]
238 | },
239 | "execution_count": 5,
240 | "metadata": {},
241 | "output_type": "execute_result"
242 | }
243 | ],
244 | "source": [
245 | "data"
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "execution_count": 6,
251 | "id": "7e30784e",
252 | "metadata": {},
253 | "outputs": [],
254 | "source": [
255 | "data=data.dropna()"
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": 7,
261 | "id": "c60e9f00",
262 | "metadata": {},
263 | "outputs": [
264 | {
265 | "data": {
266 | "text/html": [
267 | "\n",
268 | "\n",
281 | "
\n",
282 | " \n",
283 | " \n",
284 | " | \n",
285 | " Education | \n",
286 | " JoiningYear | \n",
287 | " City | \n",
288 | " PaymentTier | \n",
289 | " Age | \n",
290 | " Gender | \n",
291 | " EverBenched | \n",
292 | " ExperienceInCurrentDomain | \n",
293 | " LeaveOrNot | \n",
294 | "
\n",
295 | " \n",
296 | " \n",
297 | " \n",
298 | " 0 | \n",
299 | " Bachelors | \n",
300 | " 2017 | \n",
301 | " Bangalore | \n",
302 | " 3 | \n",
303 | " 34 | \n",
304 | " Male | \n",
305 | " No | \n",
306 | " 0 | \n",
307 | " 0 | \n",
308 | "
\n",
309 | " \n",
310 | " 1 | \n",
311 | " Bachelors | \n",
312 | " 2013 | \n",
313 | " Pune | \n",
314 | " 1 | \n",
315 | " 28 | \n",
316 | " Female | \n",
317 | " No | \n",
318 | " 3 | \n",
319 | " 1 | \n",
320 | "
\n",
321 | " \n",
322 | " 2 | \n",
323 | " Bachelors | \n",
324 | " 2014 | \n",
325 | " New Delhi | \n",
326 | " 3 | \n",
327 | " 38 | \n",
328 | " Female | \n",
329 | " No | \n",
330 | " 2 | \n",
331 | " 0 | \n",
332 | "
\n",
333 | " \n",
334 | " 3 | \n",
335 | " Masters | \n",
336 | " 2016 | \n",
337 | " Bangalore | \n",
338 | " 3 | \n",
339 | " 27 | \n",
340 | " Male | \n",
341 | " No | \n",
342 | " 5 | \n",
343 | " 1 | \n",
344 | "
\n",
345 | " \n",
346 | " 4 | \n",
347 | " Masters | \n",
348 | " 2017 | \n",
349 | " Pune | \n",
350 | " 3 | \n",
351 | " 24 | \n",
352 | " Male | \n",
353 | " Yes | \n",
354 | " 2 | \n",
355 | " 1 | \n",
356 | "
\n",
357 | " \n",
358 | " ... | \n",
359 | " ... | \n",
360 | " ... | \n",
361 | " ... | \n",
362 | " ... | \n",
363 | " ... | \n",
364 | " ... | \n",
365 | " ... | \n",
366 | " ... | \n",
367 | " ... | \n",
368 | "
\n",
369 | " \n",
370 | " 4648 | \n",
371 | " Bachelors | \n",
372 | " 2013 | \n",
373 | " Bangalore | \n",
374 | " 3 | \n",
375 | " 26 | \n",
376 | " Female | \n",
377 | " No | \n",
378 | " 4 | \n",
379 | " 0 | \n",
380 | "
\n",
381 | " \n",
382 | " 4649 | \n",
383 | " Masters | \n",
384 | " 2013 | \n",
385 | " Pune | \n",
386 | " 2 | \n",
387 | " 37 | \n",
388 | " Male | \n",
389 | " No | \n",
390 | " 2 | \n",
391 | " 1 | \n",
392 | "
\n",
393 | " \n",
394 | " 4650 | \n",
395 | " Masters | \n",
396 | " 2018 | \n",
397 | " New Delhi | \n",
398 | " 3 | \n",
399 | " 27 | \n",
400 | " Male | \n",
401 | " No | \n",
402 | " 5 | \n",
403 | " 1 | \n",
404 | "
\n",
405 | " \n",
406 | " 4651 | \n",
407 | " Bachelors | \n",
408 | " 2012 | \n",
409 | " Bangalore | \n",
410 | " 3 | \n",
411 | " 30 | \n",
412 | " Male | \n",
413 | " Yes | \n",
414 | " 2 | \n",
415 | " 0 | \n",
416 | "
\n",
417 | " \n",
418 | " 4652 | \n",
419 | " Bachelors | \n",
420 | " 2015 | \n",
421 | " Bangalore | \n",
422 | " 3 | \n",
423 | " 33 | \n",
424 | " Male | \n",
425 | " Yes | \n",
426 | " 4 | \n",
427 | " 0 | \n",
428 | "
\n",
429 | " \n",
430 | "
\n",
431 | "
4653 rows × 9 columns
\n",
432 | "
"
433 | ],
434 | "text/plain": [
435 | " Education JoiningYear City PaymentTier Age Gender EverBenched \\\n",
436 | "0 Bachelors 2017 Bangalore 3 34 Male No \n",
437 | "1 Bachelors 2013 Pune 1 28 Female No \n",
438 | "2 Bachelors 2014 New Delhi 3 38 Female No \n",
439 | "3 Masters 2016 Bangalore 3 27 Male No \n",
440 | "4 Masters 2017 Pune 3 24 Male Yes \n",
441 | "... ... ... ... ... ... ... ... \n",
442 | "4648 Bachelors 2013 Bangalore 3 26 Female No \n",
443 | "4649 Masters 2013 Pune 2 37 Male No \n",
444 | "4650 Masters 2018 New Delhi 3 27 Male No \n",
445 | "4651 Bachelors 2012 Bangalore 3 30 Male Yes \n",
446 | "4652 Bachelors 2015 Bangalore 3 33 Male Yes \n",
447 | "\n",
448 | " ExperienceInCurrentDomain LeaveOrNot \n",
449 | "0 0 0 \n",
450 | "1 3 1 \n",
451 | "2 2 0 \n",
452 | "3 5 1 \n",
453 | "4 2 1 \n",
454 | "... ... ... \n",
455 | "4648 4 0 \n",
456 | "4649 2 1 \n",
457 | "4650 5 1 \n",
458 | "4651 2 0 \n",
459 | "4652 4 0 \n",
460 | "\n",
461 | "[4653 rows x 9 columns]"
462 | ]
463 | },
464 | "execution_count": 7,
465 | "metadata": {},
466 | "output_type": "execute_result"
467 | }
468 | ],
469 | "source": [
470 | "data"
471 | ]
472 | },
473 | {
474 | "cell_type": "code",
475 | "execution_count": 8,
476 | "id": "755c3d9d",
477 | "metadata": {},
478 | "outputs": [],
479 | "source": [
480 | "data=data.fillna(2)"
481 | ]
482 | },
483 | {
484 | "cell_type": "code",
485 | "execution_count": 9,
486 | "id": "697267b1",
487 | "metadata": {},
488 | "outputs": [
489 | {
490 | "data": {
491 | "text/html": [
492 | "\n",
493 | "\n",
506 | "
\n",
507 | " \n",
508 | " \n",
509 | " | \n",
510 | " Education | \n",
511 | " JoiningYear | \n",
512 | " City | \n",
513 | " PaymentTier | \n",
514 | " Age | \n",
515 | " Gender | \n",
516 | " EverBenched | \n",
517 | " ExperienceInCurrentDomain | \n",
518 | " LeaveOrNot | \n",
519 | "
\n",
520 | " \n",
521 | " \n",
522 | " \n",
523 | " 0 | \n",
524 | " Bachelors | \n",
525 | " 2017 | \n",
526 | " Bangalore | \n",
527 | " 3 | \n",
528 | " 34 | \n",
529 | " Male | \n",
530 | " No | \n",
531 | " 0 | \n",
532 | " 0 | \n",
533 | "
\n",
534 | " \n",
535 | " 1 | \n",
536 | " Bachelors | \n",
537 | " 2013 | \n",
538 | " Pune | \n",
539 | " 1 | \n",
540 | " 28 | \n",
541 | " Female | \n",
542 | " No | \n",
543 | " 3 | \n",
544 | " 1 | \n",
545 | "
\n",
546 | " \n",
547 | " 2 | \n",
548 | " Bachelors | \n",
549 | " 2014 | \n",
550 | " New Delhi | \n",
551 | " 3 | \n",
552 | " 38 | \n",
553 | " Female | \n",
554 | " No | \n",
555 | " 2 | \n",
556 | " 0 | \n",
557 | "
\n",
558 | " \n",
559 | " 3 | \n",
560 | " Masters | \n",
561 | " 2016 | \n",
562 | " Bangalore | \n",
563 | " 3 | \n",
564 | " 27 | \n",
565 | " Male | \n",
566 | " No | \n",
567 | " 5 | \n",
568 | " 1 | \n",
569 | "
\n",
570 | " \n",
571 | " 4 | \n",
572 | " Masters | \n",
573 | " 2017 | \n",
574 | " Pune | \n",
575 | " 3 | \n",
576 | " 24 | \n",
577 | " Male | \n",
578 | " Yes | \n",
579 | " 2 | \n",
580 | " 1 | \n",
581 | "
\n",
582 | " \n",
583 | " ... | \n",
584 | " ... | \n",
585 | " ... | \n",
586 | " ... | \n",
587 | " ... | \n",
588 | " ... | \n",
589 | " ... | \n",
590 | " ... | \n",
591 | " ... | \n",
592 | " ... | \n",
593 | "
\n",
594 | " \n",
595 | " 4648 | \n",
596 | " Bachelors | \n",
597 | " 2013 | \n",
598 | " Bangalore | \n",
599 | " 3 | \n",
600 | " 26 | \n",
601 | " Female | \n",
602 | " No | \n",
603 | " 4 | \n",
604 | " 0 | \n",
605 | "
\n",
606 | " \n",
607 | " 4649 | \n",
608 | " Masters | \n",
609 | " 2013 | \n",
610 | " Pune | \n",
611 | " 2 | \n",
612 | " 37 | \n",
613 | " Male | \n",
614 | " No | \n",
615 | " 2 | \n",
616 | " 1 | \n",
617 | "
\n",
618 | " \n",
619 | " 4650 | \n",
620 | " Masters | \n",
621 | " 2018 | \n",
622 | " New Delhi | \n",
623 | " 3 | \n",
624 | " 27 | \n",
625 | " Male | \n",
626 | " No | \n",
627 | " 5 | \n",
628 | " 1 | \n",
629 | "
\n",
630 | " \n",
631 | " 4651 | \n",
632 | " Bachelors | \n",
633 | " 2012 | \n",
634 | " Bangalore | \n",
635 | " 3 | \n",
636 | " 30 | \n",
637 | " Male | \n",
638 | " Yes | \n",
639 | " 2 | \n",
640 | " 0 | \n",
641 | "
\n",
642 | " \n",
643 | " 4652 | \n",
644 | " Bachelors | \n",
645 | " 2015 | \n",
646 | " Bangalore | \n",
647 | " 3 | \n",
648 | " 33 | \n",
649 | " Male | \n",
650 | " Yes | \n",
651 | " 4 | \n",
652 | " 0 | \n",
653 | "
\n",
654 | " \n",
655 | "
\n",
656 | "
4653 rows × 9 columns
\n",
657 | "
"
658 | ],
659 | "text/plain": [
660 | " Education JoiningYear City PaymentTier Age Gender EverBenched \\\n",
661 | "0 Bachelors 2017 Bangalore 3 34 Male No \n",
662 | "1 Bachelors 2013 Pune 1 28 Female No \n",
663 | "2 Bachelors 2014 New Delhi 3 38 Female No \n",
664 | "3 Masters 2016 Bangalore 3 27 Male No \n",
665 | "4 Masters 2017 Pune 3 24 Male Yes \n",
666 | "... ... ... ... ... ... ... ... \n",
667 | "4648 Bachelors 2013 Bangalore 3 26 Female No \n",
668 | "4649 Masters 2013 Pune 2 37 Male No \n",
669 | "4650 Masters 2018 New Delhi 3 27 Male No \n",
670 | "4651 Bachelors 2012 Bangalore 3 30 Male Yes \n",
671 | "4652 Bachelors 2015 Bangalore 3 33 Male Yes \n",
672 | "\n",
673 | " ExperienceInCurrentDomain LeaveOrNot \n",
674 | "0 0 0 \n",
675 | "1 3 1 \n",
676 | "2 2 0 \n",
677 | "3 5 1 \n",
678 | "4 2 1 \n",
679 | "... ... ... \n",
680 | "4648 4 0 \n",
681 | "4649 2 1 \n",
682 | "4650 5 1 \n",
683 | "4651 2 0 \n",
684 | "4652 4 0 \n",
685 | "\n",
686 | "[4653 rows x 9 columns]"
687 | ]
688 | },
689 | "execution_count": 9,
690 | "metadata": {},
691 | "output_type": "execute_result"
692 | }
693 | ],
694 | "source": [
695 | "data"
696 | ]
697 | },
698 | {
699 | "cell_type": "code",
700 | "execution_count": 10,
701 | "id": "21ad4911",
702 | "metadata": {},
703 | "outputs": [],
704 | "source": [
705 | "pay=data['PaymentTier']"
706 | ]
707 | },
708 | {
709 | "cell_type": "code",
710 | "execution_count": 11,
711 | "id": "dc55b7d2",
712 | "metadata": {},
713 | "outputs": [
714 | {
715 | "data": {
716 | "text/plain": [
717 | "0 3\n",
718 | "1 1\n",
719 | "2 3\n",
720 | "3 3\n",
721 | "4 3\n",
722 | " ..\n",
723 | "4648 3\n",
724 | "4649 2\n",
725 | "4650 3\n",
726 | "4651 3\n",
727 | "4652 3\n",
728 | "Name: PaymentTier, Length: 4653, dtype: int64"
729 | ]
730 | },
731 | "execution_count": 11,
732 | "metadata": {},
733 | "output_type": "execute_result"
734 | }
735 | ],
736 | "source": [
737 | "pay"
738 | ]
739 | },
740 | {
741 | "cell_type": "code",
742 | "execution_count": 18,
743 | "id": "ae905dd1",
744 | "metadata": {},
745 | "outputs": [],
746 | "source": [
747 | "age=data['Age']"
748 | ]
749 | },
750 | {
751 | "cell_type": "code",
752 | "execution_count": 19,
753 | "id": "831979df",
754 | "metadata": {},
755 | "outputs": [
756 | {
757 | "data": {
758 | "text/plain": [
759 | "0 34\n",
760 | "1 28\n",
761 | "2 38\n",
762 | "3 27\n",
763 | "4 24\n",
764 | " ..\n",
765 | "4648 26\n",
766 | "4649 37\n",
767 | "4650 27\n",
768 | "4651 30\n",
769 | "4652 33\n",
770 | "Name: Age, Length: 4653, dtype: int64"
771 | ]
772 | },
773 | "execution_count": 19,
774 | "metadata": {},
775 | "output_type": "execute_result"
776 | }
777 | ],
778 | "source": [
779 | "age"
780 | ]
781 | },
782 | {
783 | "cell_type": "code",
784 | "execution_count": 26,
785 | "id": "3b693527",
786 | "metadata": {},
787 | "outputs": [
788 | {
789 | "data": {
790 | "image/png": "",
791 | "text/plain": [
792 | ""
793 | ]
794 | },
795 | "metadata": {},
796 | "output_type": "display_data"
797 | }
798 | ],
799 | "source": [
800 | "plt.bar(pay,age)\n",
801 | "plt.xlabel(\"Employe\")\n",
802 | "plt.ylabel(\"Growth of employe\")\n",
803 | "plt.title(\"Employe's salary tire growth\")\n",
804 | "plt.show()"
805 | ]
806 | },
807 | {
808 | "cell_type": "code",
809 | "execution_count": null,
810 | "id": "31cb7e9c",
811 | "metadata": {},
812 | "outputs": [],
813 | "source": []
814 | }
815 | ],
816 | "metadata": {
817 | "kernelspec": {
818 | "display_name": "Python 3 (ipykernel)",
819 | "language": "python",
820 | "name": "python3"
821 | },
822 | "language_info": {
823 | "codemirror_mode": {
824 | "name": "ipython",
825 | "version": 3
826 | },
827 | "file_extension": ".py",
828 | "mimetype": "text/x-python",
829 | "name": "python",
830 | "nbconvert_exporter": "python",
831 | "pygments_lexer": "ipython3",
832 | "version": "3.11.5"
833 | }
834 | },
835 | "nbformat": 4,
836 | "nbformat_minor": 5
837 | }
838 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Data-Cleaning-using-Python
2 | This part consist of the Data cleaning which includes the removing the null values and replacing the some values instead of the null values and making it a cleaned data set
3 |
--------------------------------------------------------------------------------