├── Project_PyDA1 └── README.md /Project_PyDA1: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | df = pd.read_excel("C:/Users/Divya/Downloads/Python_CA2_DataSheet.xlsx") 3 | print(df.head()) 4 | import seaborn as sns 5 | import matplotlib.pyplot as plt 6 | # Grouping data by States and Districts 7 | state_trend = df.groupby("States/UT's")['Total'].sum().reset_index().sort_values(by='Total', ascending=False) 8 | district_trend = df.groupby('Districts')['Total'].sum().reset_index().sort_values(by='Total', ascending=False) 9 | 10 | # Display the tables 11 | print("State-wise Tele-Law Case Registrations:\n", state_trend) 12 | print("\nDistrict-wise Tele-Law Case Registrations:\n", district_trend) 13 | 14 | # Visualization 15 | plt.figure(figsize=(10,5)) 16 | sns.barplot(data=state_trend.head(10), x='Total', y="States/UT's", palette='viridis') 17 | plt.title("Top 10 States by Tele-Law Case Registrations") 18 | plt.xlabel("Total Cases") 19 | plt.ylabel("States/UT's") 20 | plt.tight_layout() 21 | plt.show() 22 | 23 | # Summarize gender distribution 24 | gender_cases = pd.DataFrame({ 25 | 'Gender': ['Female', 'Male'], 26 | 'Total Cases': [df['Female'].sum(), df['Male'].sum()] 27 | }) 28 | 29 | print("\nGender-wise Legal Advice Distribution:\n", gender_cases) 30 | 31 | # Visualization 32 | plt.figure(figsize=(5,5)) 33 | plt.pie(gender_cases['Total Cases'], labels=gender_cases['Gender'], autopct='%1.1f%%', startangle=140, colors=['#FF9999','#66B3FF']) 34 | plt.title('Gender Distribution of Legal Advice Cases') 35 | plt.show() 36 | 37 | # Summarize caste-category distribution 38 | category_cases = pd.DataFrame({ 39 | 'Category': ['General', 'OBC', 'SC', 'ST'], 40 | 'Total Cases': [df['General'].sum(), df['OBC'].sum(), df['SC'].sum(), df['ST'].sum()] 41 | }) 42 | 43 | print("\nSocial Category-wise Legal Advice Distribution:\n", category_cases) 44 | 45 | # Visualization 46 | plt.figure(figsize=(8,5)) 47 | sns.barplot(data=category_cases, x='Category', y='Total Cases', palette='mako') 48 | plt.title("Legal Advice Distribution by Social Category") 49 | plt.xlabel("Category") 50 | plt.ylabel("Total Cases") 51 | plt.tight_layout() 52 | plt.show() 53 | 54 | # Calculate performance: Cases per CSC in each District 55 | csc_performance = df[['Districts', 'No. of CSCs', 'Total']].copy() 56 | csc_performance['Cases per CSC'] = csc_performance['Total'] / csc_performance['No. of CSCs'] 57 | 58 | # Identify underperforming CSCs (bottom 10) 59 | underperforming_cscs = csc_performance.sort_values(by='Cases per CSC').head(10) 60 | 61 | print("\nUnderperforming CSCs (Bottom 10 Districts by Case Load per CSC):\n") 62 | print(underperforming_cscs[['Districts', 'Cases per CSC']]) 63 | 64 | # Visualization — Lollipop Chart 65 | plt.figure(figsize=(10,6)) 66 | # Plot the sticks 67 | plt.hlines(y=underperforming_cscs['Districts'], xmin=0, xmax=underperforming_cscs['Cases per CSC'], color='gray', linewidth=2) 68 | # Plot the dots 69 | plt.plot(underperforming_cscs['Cases per CSC'], underperforming_cscs['Districts'], 'o', color='crimson', markersize=8) 70 | 71 | plt.title("Districts with Underperforming CSCs", fontsize=14, weight='bold') 72 | plt.xlabel("Cases per CSC") 73 | plt.ylabel("District") 74 | plt.grid(axis='x', linestyle='--', alpha=0.7) 75 | plt.tight_layout() 76 | plt.show() 77 | 78 | from sklearn.linear_model import LinearRegression 79 | 80 | # Using a simple Linear Regression Model: Predict 'Total' cases based on 'No. of CSCs' 81 | X = df[['No. of CSCs']] 82 | y = df['Total'] 83 | 84 | model = LinearRegression() 85 | model.fit(X, y) 86 | 87 | df['Predicted_Total'] = model.predict(X).round().astype(int) 88 | 89 | # Display Actual vs Predicted sample table 90 | prediction_table = df[["States/UT's", 'Districts', 'No. of CSCs', 'Total', 'Predicted_Total']].head(10) 91 | print("\nPredicted vs Actual Tele-Law Case Registrations:\n", prediction_table) 92 | 93 | # Visualization 94 | plt.figure(figsize=(8,5)) 95 | plt.scatter(df['No. of CSCs'], df['Total'], color='blue', label='Actual Cases') 96 | plt.plot(df['No. of CSCs'], df['Predicted_Total'], color='red', linewidth=2, label='Predicted Trend') 97 | plt.title('Tele-Law Cases Prediction Based on CSC Count') 98 | plt.xlabel('Number of CSCs') 99 | plt.ylabel('Total Cases') 100 | plt.legend() 101 | plt.tight_layout() 102 | plt.show() 103 | 104 | plt.figure(figsize=(10,6)) 105 | sns.scatterplot(data=df, x='No. of CSCs', y='Total', hue="States/UT's", palette='tab10', s=100, alpha=0.7) 106 | 107 | plt.title("Relationship between No. of CSCs and Total Tele-Law Cases", fontsize=14, weight='bold') 108 | plt.xlabel("Number of CSCs in District") 109 | plt.ylabel("Total Tele-Law Cases") 110 | plt.legend(title='States/UTs', bbox_to_anchor=(1.05, 1), loc='upper left') 111 | plt.grid(True, linestyle='--', alpha=0.5) 112 | plt.tight_layout() 113 | plt.show() 114 | 115 | print(df[['No. of CSCs', 'Total']].value_counts()) 116 | 117 | state_share = df.groupby("States/UT's")['Total'].sum().reset_index().sort_values(by='Total', ascending=False) 118 | 119 | plt.figure(figsize=(8,8)) 120 | plt.pie(state_share['Total'], labels=state_share['States/UT\'s'], 121 | autopct='%1.1f%%', startangle=140, colors=sns.color_palette('pastel', len(state_share))) 122 | 123 | plt.title("State-wise Share of Total Tele-Law Cases", fontsize=14, weight='bold') 124 | plt.tight_layout() 125 | plt.show() 126 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | I’m excited to share my recent data analysis and visualization project on Tele-Law Services in India — a dataset-driven deep dive into how legal aid is accessed across states, districts, social categories, and genders. 2 | 3 | 🔍 Key Objectives of the Project: 4 | 5 | ✅ 1️⃣ Analyzing State-wise and District-wise Trends in Tele-Law case registrations. 6 | 7 | ✅ 2️⃣ Exploring Gender-based Legal Advice Patterns to highlight disparities. 8 | 9 | ✅ 3️⃣ Studying Social Category Representation (General, OBC, SC, ST) for legal service usage. 10 | 11 | ✅ 4️⃣ Evaluating CSC (Common Service Centers) Performance in delivering legal advice. 12 | 13 | ✅ 5️⃣ Using Predictive Analysis to model future Tele-Law case trends. 14 | 15 | ✅ 6️⃣ Visualizing the Relationship between CSC Count and Total Cases using Scatter Plots. 16 | 17 | ✅ 7️⃣ Presenting State-wise Distribution of Total Cases via Pie Charts. 18 | 19 | 💡 The project combined: 20 | 21 | 📊 Pandas & Seaborn for data cleaning and analysis 22 | 23 | 📈 Matplotlib & Seaborn for insightful visualizations 24 | 25 | 🎯 Takeaway: 26 | 27 | This project gave me hands-on experience with real-world datasets, trend spotting, and visual storytelling — key skills for any aspiring data analyst or data scientist. 28 | 29 | --------------------------------------------------------------------------------