├── Project_PyDA1
└── README.md


/Project_PyDA1:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | df = pd.read_excel("C:/Users/Divya/Downloads/Python_CA2_DataSheet.xlsx")
  3 | print(df.head())
  4 | import seaborn as sns
  5 | import matplotlib.pyplot as plt
  6 | # Grouping data by States and Districts
  7 | state_trend = df.groupby("States/UT's")['Total'].sum().reset_index().sort_values(by='Total', ascending=False)
  8 | district_trend = df.groupby('Districts')['Total'].sum().reset_index().sort_values(by='Total', ascending=False)
  9 | 
 10 | # Display the tables
 11 | print("State-wise Tele-Law Case Registrations:\n", state_trend)
 12 | print("\nDistrict-wise Tele-Law Case Registrations:\n", district_trend)
 13 | 
 14 | # Visualization
 15 | plt.figure(figsize=(10,5))
 16 | sns.barplot(data=state_trend.head(10), x='Total', y="States/UT's", palette='viridis')
 17 | plt.title("Top 10 States by Tele-Law Case Registrations")
 18 | plt.xlabel("Total Cases")
 19 | plt.ylabel("States/UT's")
 20 | plt.tight_layout()
 21 | plt.show()
 22 | 
 23 | # Summarize gender distribution
 24 | gender_cases = pd.DataFrame({
 25 |     'Gender': ['Female', 'Male'],
 26 |     'Total Cases': [df['Female'].sum(), df['Male'].sum()]
 27 | })
 28 | 
 29 | print("\nGender-wise Legal Advice Distribution:\n", gender_cases)
 30 | 
 31 | # Visualization
 32 | plt.figure(figsize=(5,5))
 33 | plt.pie(gender_cases['Total Cases'], labels=gender_cases['Gender'], autopct='%1.1f%%', startangle=140, colors=['#FF9999','#66B3FF'])
 34 | plt.title('Gender Distribution of Legal Advice Cases')
 35 | plt.show()
 36 | 
 37 | # Summarize caste-category distribution
 38 | category_cases = pd.DataFrame({
 39 |     'Category': ['General', 'OBC', 'SC', 'ST'],
 40 |     'Total Cases': [df['General'].sum(), df['OBC'].sum(), df['SC'].sum(), df['ST'].sum()]
 41 | })
 42 | 
 43 | print("\nSocial Category-wise Legal Advice Distribution:\n", category_cases)
 44 | 
 45 | # Visualization
 46 | plt.figure(figsize=(8,5))
 47 | sns.barplot(data=category_cases, x='Category', y='Total Cases', palette='mako')
 48 | plt.title("Legal Advice Distribution by Social Category")
 49 | plt.xlabel("Category")
 50 | plt.ylabel("Total Cases")
 51 | plt.tight_layout()
 52 | plt.show()
 53 | 
 54 | # Calculate performance: Cases per CSC in each District
 55 | csc_performance = df[['Districts', 'No. of CSCs', 'Total']].copy()
 56 | csc_performance['Cases per CSC'] = csc_performance['Total'] / csc_performance['No. of CSCs']
 57 | 
 58 | # Identify underperforming CSCs (bottom 10)
 59 | underperforming_cscs = csc_performance.sort_values(by='Cases per CSC').head(10)
 60 | 
 61 | print("\nUnderperforming CSCs (Bottom 10 Districts by Case Load per CSC):\n")
 62 | print(underperforming_cscs[['Districts', 'Cases per CSC']])
 63 | 
 64 | # Visualization — Lollipop Chart
 65 | plt.figure(figsize=(10,6))
 66 | # Plot the sticks
 67 | plt.hlines(y=underperforming_cscs['Districts'], xmin=0, xmax=underperforming_cscs['Cases per CSC'], color='gray', linewidth=2)
 68 | # Plot the dots
 69 | plt.plot(underperforming_cscs['Cases per CSC'], underperforming_cscs['Districts'], 'o', color='crimson', markersize=8)
 70 | 
 71 | plt.title("Districts with Underperforming CSCs", fontsize=14, weight='bold')
 72 | plt.xlabel("Cases per CSC")
 73 | plt.ylabel("District")
 74 | plt.grid(axis='x', linestyle='--', alpha=0.7)
 75 | plt.tight_layout()
 76 | plt.show()
 77 | 
 78 | from sklearn.linear_model import LinearRegression
 79 | 
 80 | # Using a simple Linear Regression Model: Predict 'Total' cases based on 'No. of CSCs'
 81 | X = df[['No. of CSCs']]
 82 | y = df['Total']
 83 | 
 84 | model = LinearRegression()
 85 | model.fit(X, y)
 86 | 
 87 | df['Predicted_Total'] = model.predict(X).round().astype(int)
 88 | 
 89 | # Display Actual vs Predicted sample table
 90 | prediction_table = df[["States/UT's", 'Districts', 'No. of CSCs', 'Total', 'Predicted_Total']].head(10)
 91 | print("\nPredicted vs Actual Tele-Law Case Registrations:\n", prediction_table)
 92 | 
 93 | # Visualization
 94 | plt.figure(figsize=(8,5))
 95 | plt.scatter(df['No. of CSCs'], df['Total'], color='blue', label='Actual Cases')
 96 | plt.plot(df['No. of CSCs'], df['Predicted_Total'], color='red', linewidth=2, label='Predicted Trend')
 97 | plt.title('Tele-Law Cases Prediction Based on CSC Count')
 98 | plt.xlabel('Number of CSCs')
 99 | plt.ylabel('Total Cases')
100 | plt.legend()
101 | plt.tight_layout()
102 | plt.show()
103 | 
104 | plt.figure(figsize=(10,6))
105 | sns.scatterplot(data=df, x='No. of CSCs', y='Total', hue="States/UT's", palette='tab10', s=100, alpha=0.7)
106 | 
107 | plt.title("Relationship between No. of CSCs and Total Tele-Law Cases", fontsize=14, weight='bold')
108 | plt.xlabel("Number of CSCs in District")
109 | plt.ylabel("Total Tele-Law Cases")
110 | plt.legend(title='States/UTs', bbox_to_anchor=(1.05, 1), loc='upper left')
111 | plt.grid(True, linestyle='--', alpha=0.5)
112 | plt.tight_layout()
113 | plt.show()
114 | 
115 | print(df[['No. of CSCs', 'Total']].value_counts())
116 | 
117 | state_share = df.groupby("States/UT's")['Total'].sum().reset_index().sort_values(by='Total', ascending=False)
118 | 
119 | plt.figure(figsize=(8,8))
120 | plt.pie(state_share['Total'], labels=state_share['States/UT\'s'], 
121 |         autopct='%1.1f%%', startangle=140, colors=sns.color_palette('pastel', len(state_share)))
122 | 
123 | plt.title("State-wise Share of Total Tele-Law Cases", fontsize=14, weight='bold')
124 | plt.tight_layout()
125 | plt.show()
126 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | I’m excited to share my recent data analysis and visualization project on Tele-Law Services in India — a dataset-driven deep dive into how legal aid is accessed across states, districts, social categories, and genders.
 2 | 
 3 | 🔍 Key Objectives of the Project:
 4 | 
 5 | ✅ 1️⃣ Analyzing State-wise and District-wise Trends in Tele-Law case registrations.
 6 | 
 7 | ✅ 2️⃣ Exploring Gender-based Legal Advice Patterns to highlight disparities.
 8 | 
 9 |  ✅ 3️⃣ Studying Social Category Representation (General, OBC, SC, ST) for legal service usage.
10 |  
11 |  ✅ 4️⃣ Evaluating CSC (Common Service Centers) Performance in delivering legal advice.
12 |  
13 |  ✅ 5️⃣ Using Predictive Analysis to model future Tele-Law case trends.
14 |  
15 |  ✅ 6️⃣ Visualizing the Relationship between CSC Count and Total Cases using Scatter Plots.
16 |  
17 |  ✅ 7️⃣ Presenting State-wise Distribution of Total Cases via Pie Charts.
18 |  
19 | 💡 The project combined:
20 | 
21 |  📊 Pandas & Seaborn for data cleaning and analysis
22 | 
23 |  📈 Matplotlib & Seaborn for insightful visualizations
24 |  
25 | 🎯 Takeaway:
26 | 
27 |  This project gave me hands-on experience with real-world datasets, trend spotting, and visual storytelling — key skills for any aspiring data analyst or data scientist.
28 |  
29 | 


--------------------------------------------------------------------------------