├── cleaned_data.csv ├── p2.py └── report file.docx /p2.py: -------------------------------------------------------------------------------- 1 | 2 | import pandas as pd 3 | 4 | df = pd.read_csv('cleaned_data.csv') 5 | 6 | print(df.head()) 7 | 8 | print(df.info()) 9 | 10 | print(df.isnull().sum()) 11 | 12 | print(df.columns) 13 | 14 | print(df.describe()) 15 | 16 | 17 | #Total Pending Cases State-wise 18 | state_cases = df.groupby('srcStateName')['Pending cases'].sum().reset_index() 19 | print(state_cases) 20 | 21 | #Total Pending Cases District-wise 22 | district_cases = df.groupby('srcDistrictName')['Pending cases'].sum().reset_index() 23 | print(district_cases) 24 | 25 | #Analyze Pending Cases by Year Range 26 | year_range_cols = [ 27 | 'Pending cases for a period of 0 to 1 Years', 28 | 'Pending cases for a period of 1 to 3 Years', 29 | 'Pending cases for a period of 3 to 5 Years', 30 | 'Pending cases for a period of 5 to 10 Years', 31 | 'Pending cases for a period of 10 to 20 Years', 32 | 'Pending cases for a period of 20 to 30 Years', 33 | 'Pending cases over 30 Years' 34 | ] 35 | 36 | pending_by_years = df[year_range_cols].sum() 37 | print(pending_by_years) 38 | 39 | #Analyze Cases by Stage 40 | stage_cols = [ 41 | 'Cases pending at Appearance or Service-Related stage', 42 | 'Cases pending at Compliance or Steps or stay stage', 43 | 'Cases pending at Evidence or Argument or Judgement stage', 44 | 'Cases pending at Pleadings or Issues or Charge stage' 45 | ] 46 | 47 | cases_by_stage = df[stage_cols].sum() 48 | print(cases_by_stage) 49 | 50 | #Total Cases Filed vs Disposed in Last Month 51 | filed_vs_disposed = df[['Cases instituted in last month', 'Cases disposed in last month']].sum() 52 | print(filed_vs_disposed) 53 | 54 | #Total Cases Filed by Senior Citizens and Women 55 | special_cases = df[['Cases filed by Senior Citizens', 'Cases filed by women']].sum() 56 | print(special_cases) 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | import matplotlib.pyplot as plt 65 | import seaborn as sns 66 | 67 | 68 | #Pending Cases Across States & Districts 69 | 70 | # State-wise Pending Cases - Bar Plot 71 | plt.figure(figsize=(12,6)) 72 | state_cases = df.groupby('srcStateName')['Pending cases'].sum().sort_values(ascending=False) 73 | state_cases.plot(kind='bar', color='teal') 74 | plt.title('State-wise Pending Court Cases') 75 | plt.xlabel('State') 76 | plt.ylabel('Total Pending Cases') 77 | plt.xticks(rotation=90) 78 | plt.show() 79 | 80 | # Heatmap: State vs Court case Type Pending Cases 81 | #state_district_cases = df.pivot_table(values='Pending cases', index='srcStateName', columns='District and Taluk Court Case type', fill_value=0) 82 | #plt.figure(figsize=(16,8)) 83 | #sns.heatmap(state_district_cases, cmap='Reds') 84 | #plt.title('Heatmap: Pending Cases Across States & Districts') 85 | #plt.show() 86 | 87 | # ----------------------------------------------- 88 | # Objective 2: Duration-wise Pending Cases Analysis 89 | 90 | duration_cols = [ 91 | 'Pending cases for a period of 0 to 1 Years', 92 | 'Pending cases for a period of 1 to 3 Years', 93 | 'Pending cases for a period of 3 to 5 Years', 94 | 'Pending cases for a period of 5 to 10 Years', 95 | 'Pending cases for a period of 10 to 20 Years', 96 | 'Pending cases for a period of 20 to 30 Years', 97 | 'Pending cases over 30 Years' 98 | ] 99 | 100 | 101 | #Boxplot for Cases Pending Duration 102 | plt.figure(figsize=(10, 6)) 103 | sns.boxplot(data=df[duration_cols]) 104 | plt.title('Boxplot for Cases Pending Duration') 105 | plt.xlabel('Duration Categories') 106 | plt.ylabel('Number of Cases') 107 | plt.xticks(rotation=45) 108 | plt.tight_layout() 109 | plt.show() 110 | 111 | 112 | # Stacked Bar Chart 113 | df_duration_sum = df[duration_cols].sum() 114 | df_duration_sum.plot(kind='bar', stacked=True, color=sns.color_palette("Blues", 7)) 115 | plt.title('Pending Cases by Duration Range') 116 | plt.xlabel('Duration Range') 117 | plt.ylabel('Number of Pending Cases') 118 | plt.xticks(rotation=45) 119 | plt.show() 120 | 121 | # Histogram 122 | df[duration_cols].plot.hist(bins=20, figsize=(10,6)) 123 | plt.title('Histogram of Pending Cases Duration') 124 | plt.xlabel('Pending Cases Count') 125 | plt.show() 126 | 127 | # Boxplot 128 | #plt.figure(figsize=(10,6)) 129 | #sns.boxplot(data=df[duration_cols]) 130 | #plt.title('Boxplot of Pending Cases Duration') 131 | #plt.xticks(rotation=45) 132 | #plt.show() 133 | 134 | # ----------------------------------------------- 135 | # Objective 3: Stage-wise Pending Cases Analysis 136 | 137 | stage_cols = [ 138 | 'Cases pending at Appearance or Service-Related stage', 139 | 'Cases pending at Compliance or Steps or stay stage', 140 | 'Cases pending at Evidence or Argument or Judgement stage', 141 | 'Cases pending at Pleadings or Issues or Charge stage' 142 | ] 143 | 144 | # Bar Plot 145 | df_stage_sum = df[stage_cols].sum() 146 | df_stage_sum.plot(kind='bar', color='coral') 147 | plt.title('Stage-wise Pending Court Cases') 148 | plt.xticks(rotation=45) 149 | plt.ylabel('Number of Cases') 150 | plt.show() 151 | 152 | # Pie Chart 153 | df_stage_sum.plot(kind='pie', autopct='%1.1f%%', figsize=(7,7)) 154 | plt.title('Stage-wise Pending Cases Distribution') 155 | plt.ylabel('') 156 | plt.show() 157 | 158 | # Heatmap: Stage vs District 159 | #plt.figure(figsize=(14,8)) 160 | #sns.heatmap(df.pivot_table(values='Pending cases', index='srcDistrictName', columns='srcStateName', aggfunc='sum', fill_value=0), cmap='YlGnBu') 161 | #plt.title('Heatmap of Pending Cases Across Stages & Districts') 162 | #plt.show() 163 | 164 | # ----------------------------------------------- 165 | # Objective 4: Cases Instituted vs Disposed Last Month 166 | 167 | plt.figure(figsize=(12,6)) 168 | df.groupby('srcStateName')[['Cases instituted in last month', 'Cases disposed in last month']].sum().plot(kind='bar') 169 | plt.title('Cases Instituted vs Disposed Last Month (State-wise)') 170 | plt.ylabel('Number of Cases') 171 | plt.xticks(rotation=90) 172 | plt.show() 173 | 174 | # Scatter Plot: Correlation 175 | plt.figure(figsize=(8,6)) 176 | sns.scatterplot(x='Cases instituted in last month', y='Cases disposed in last month', data=df) 177 | plt.title('Correlation: Cases Instituted vs Disposed') 178 | plt.xlabel('Cases Instituted') 179 | plt.ylabel('Cases Disposed') 180 | plt.show() 181 | 182 | # ----------------------------------------------- 183 | # Objective 5: Special Case Categories (Senior Citizens, Women, Delayed) 184 | 185 | # Bar Plot: Senior Citizens vs Women Cases 186 | plt.figure(figsize=(10,6)) 187 | df[['Cases filed by Senior Citizens', 'Cases filed by women']].sum().plot(kind='bar', color=['orange', 'purple']) 188 | plt.title('Cases Filed by Senior Citizens vs Women') 189 | plt.ylabel('Total Cases') 190 | plt.xticks(rotation=0) 191 | plt.show() 192 | 193 | # Heatmap: District-wise Delayed Cases 194 | #plt.figure(figsize=(14,8)) 195 | #sns.heatmap(df.pivot_table(values='Cases delayed in disposal', index='srcDistrictName', columns='srcStateName', aggfunc='sum', fill_value=0), cmap='coolwarm') 196 | #plt.title('Heatmap of Delayed Case Disposal (District-wise)') 197 | #plt.show() 198 | 199 | # Boxplot: Delayed Cases Distribution 200 | #plt.figure(figsize=(10,6)) 201 | #sns.boxplot(x='srcStateName', y='Cases delayed in disposal', data=df) 202 | #plt.title('Distribution of Delayed Cases Across States') 203 | #plt.xticks(rotation=90) 204 | #plt.show() 205 | 206 | 207 | 208 | 209 | 210 | 211 | # Duration Columns 212 | duration_cols = [ 213 | 'Pending cases for a period of 0 to 1 Years', 214 | 'Pending cases for a period of 1 to 3 Years', 215 | 'Pending cases for a period of 3 to 5 Years', 216 | 'Pending cases for a period of 5 to 10 Years', 217 | 'Pending cases for a period of 10 to 20 Years', 218 | 'Pending cases for a period of 20 to 30 Years', 219 | 'Pending cases over 30 Years' 220 | ] 221 | 222 | # Group by State and Sum Duration-wise Pending Cases 223 | state_duration = df.groupby('srcStateName')[duration_cols].sum() 224 | 225 | # Plot Heatmap 226 | plt.figure(figsize=(14, 8)) 227 | sns.heatmap(state_duration, annot=True, fmt='.0f', cmap='YlGnBu', linewidths=0.5) 228 | plt.title('Heatmap: State vs Duration-wise Pending Cases') 229 | plt.xlabel('Case Pending Duration') 230 | plt.ylabel('State') 231 | plt.show() 232 | 233 | 234 | 235 | -------------------------------------------------------------------------------- /report file.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ayushk-07/python-project/b2c8dbeaf40cd350d95d6a25fc478e493b36bb76/report file.docx --------------------------------------------------------------------------------