└── PythonEDA /PythonEDA: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | 5 | df = pd.read_csv(r"C:\Users\kumar\Downloads\Employee Sample Dataint 217 (1).csv", encoding='ISO-8859-1') 6 | 7 | print(df.dtypes) 8 | 9 | df['Hire Year'] = pd.to_datetime(df['Hire Date'], errors='coerce').dt.year 10 | hire_trend = df['Hire Year'].value_counts().sort_index() 11 | hire_trend.plot(kind='line', marker='o', title='Hiring Trend Over Years') 12 | plt.xlabel("Year") 13 | plt.ylabel("Number of Hires") 14 | plt.grid(True) 15 | plt.show() 16 | 17 | gender_dept = df.groupby(['Department', 'Gender']).size().unstack() 18 | gender_dept.plot(kind='bar', stacked=True, figsize=(10, 6), colormap='Paired') 19 | plt.title('Gender Distribution by Department') 20 | plt.xlabel('Department') 21 | plt.ylabel('Number of Employees') 22 | plt.xticks(rotation=45) 23 | plt.legend(title='Gender') 24 | plt.tight_layout() 25 | plt.show() 26 | 27 | print(df.columns) 28 | 29 | numeric_cols = df.select_dtypes(include='number') 30 | print(numeric_cols.columns) 31 | 32 | plt.figure(figsize=(10, 6)) 33 | correlation = numeric_cols.corr() 34 | sns.heatmap(correlation, annot=True, cmap='coolwarm', fmt=".2f") 35 | plt.title("Correlation Between Numeric Features") 36 | plt.tight_layout() 37 | plt.show() 38 | 39 | plt.figure(figsize=(10, 5)) 40 | sns.histplot(df['Age'], kde=True, color='skyblue', bins=20) 41 | plt.title("Employee Age Distribution") 42 | plt.xlabel("Age") 43 | plt.ylabel("Frequency") 44 | plt.tight_layout() 45 | plt.show() 46 | 47 | df['Hire Date'] = pd.to_datetime(df['Hire Date'], errors='coerce') 48 | df['Exit Date'] = pd.to_datetime(df['Exit Date'], errors='coerce') 49 | df['End Date'] = df['Exit Date'].fillna(pd.Timestamp.today()) 50 | df['Tenure (Years)'] = (df['End Date'] - df['Hire Date']).dt.days / 365 51 | 52 | plt.figure(figsize=(10, 6)) 53 | sns.scatterplot(data=df, x='Tenure (Years)', y='Annual Salary', hue='Department', palette='tab10') 54 | plt.title("Annual Salary vs Tenure (Years)") 55 | plt.xlabel("Tenure in Years") 56 | plt.ylabel("Annual Salary") 57 | plt.tight_layout() 58 | plt.show() 59 | 60 | plt.figure(figsize=(10, 6)) 61 | sns.countplot(data=df, x='Department', hue='Gender', palette='Set1') 62 | plt.title("Gender Distribution per Department") 63 | plt.xticks(rotation=45) 64 | plt.tight_layout() 65 | plt.show() 66 | 67 | emp_type_counts = df['Job Title'].value_counts() 68 | 69 | plt.figure(figsize=(10, 8)) 70 | sns.barplot( 71 | x=emp_type_counts.values, 72 | y=emp_type_counts.index, 73 | hue=emp_type_counts.index, 74 | palette='Set2', 75 | dodge=False, 76 | legend=False 77 | ) 78 | plt.xlabel("Count") 79 | plt.ylabel("Job Title") 80 | plt.title("Employment Type Distribution") 81 | plt.tight_layout() 82 | plt.show() 83 | 84 | plt.figure(figsize=(8, 6)) 85 | sns.boxplot( 86 | data=df, 87 | x='Gender', 88 | y='Annual Salary', 89 | hue='Gender', 90 | palette='coolwarm', 91 | dodge=False, 92 | legend=False 93 | ) 94 | plt.title("Annual Salary Distribution by Gender") 95 | plt.tight_layout() 96 | plt.show() 97 | 98 | pivot = df.pivot_table(values='Annual Salary', index='Department', columns='Gender', aggfunc='mean') 99 | plt.figure(figsize=(10, 6)) 100 | sns.heatmap(pivot, annot=True, fmt=".0f", cmap='YlGnBu') 101 | plt.title("Average Salary by Department and Gender") 102 | plt.tight_layout() 103 | plt.show() 104 | 105 | pivot = df.pivot_table(values='Annual Salary', index='Department', columns='Gender', aggfunc='mean') 106 | plt.figure(figsize=(10, 6)) 107 | sns.heatmap(pivot, annot=True, fmt=".0f", cmap='YlGnBu') 108 | plt.title("Average Salary by Department and Gender") 109 | plt.tight_layout() 110 | plt.show() 111 | 112 | avg_tenure = df.groupby('Department')['years'].mean().sort_values() 113 | plt.figure(figsize=(10, 5)) 114 | sns.lineplot(x=avg_tenure.index, y=avg_tenure.values, marker='o') 115 | plt.xticks(rotation=45) 116 | plt.ylabel("Average Tenure (Years)") 117 | plt.title("Average Tenure by Department") 118 | plt.tight_layout() 119 | plt.show() 120 | --------------------------------------------------------------------------------