├── DDW-2000C-08.xlsx
├── README.md
└── mainFile.py


/DDW-2000C-08.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rajsinghpiyush/Literacy-Rate-in-Jharkhand-by-age-and-gender/32a8d7a2684778918bdb8706d56a783b71acedee/DDW-2000C-08.xlsx


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Literacy-Rate-in-Jharkhand-by-age-and-gender
2 | This dataset from the Census of India provides detailed information about educational attainment levels across different age groups and genders in Jharkhand state. The data is broken down by rural/urban areas and includes categories from illiteracy through graduate education.
3 | 2nd Day
4 | changed 5th column to header column
5 | 3rd day
6 | renamed columns and did simple eda
7 | 


--------------------------------------------------------------------------------
/mainFile.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | import seaborn as sns
  5 | 
  6 | 
  7 | df = pd.read_excel("C:\\Python CA2\\DDW-2000C-08.xlsx")
  8 | 
  9 | 
 10 | print(df.head(15))
 11 | print(df.describe())
 12 | print(df.info())
 13 | 
 14 | 
 15 | 
 16 | df.dropna(how='all', inplace=True)
 17 | df.dropna(axis=1, how='all', inplace = True)
 18 | 
 19 | columns = [
 20 |     'Table_Name', 'State_Code', 'Distt_Code', 'Area_Name', 'Total_Rural_Urban',
 21 |     'Age_group', 'Persons_Total', 'Males_Total', 'Females_Total',
 22 |     'Persons_Illiterate', 'Males_Illiterate', 'Females_Illiterate',
 23 |     'Persons_Literate', 'Males_Literate', 'Females_Literate',
 24 |     'Persons_Literate_wo_Edu', 'Males_Literate_wo_Edu', 'Females_Literate_wo_Edu',
 25 |     'Persons_Below_Primary', 'Males_Below_Primary', 'Females_Below_Primary',
 26 |     'Persons_Primary', 'Males_Primary', 'Females_Primary',
 27 |     'Persons_Middle', 'Males_Middle', 'Females_Middle',
 28 |     'Persons_Matric_Secondary', 'Males_Matric_Secondary', 'Females_Matric_Secondary',
 29 |     'Persons_Higher_Secondary', 'Males_Higher_Secondary', 'Females_Higher_Secondary',
 30 |     'Persons_NonTech_Diploma', 'Males_NonTech_Diploma', 'Females_NonTech_Diploma',
 31 |     'Persons_Tech_Diploma', 'Males_Tech_Diploma', 'Females_Tech_Diploma',
 32 |     'Persons_Graduate', 'Males_Graduate', 'Females_Graduate',
 33 |     'Persons_Unclassified', 'Males_Unclassified', 'Females_Unclassified'
 34 | ]
 35 | 
 36 | df.columns = columns
 37 | # print("Hi",df.head)
 38 | # print(df.info())
 39 | # print(df.describe())
 40 | 
 41 | numeric_cols = df.columns[6:]  
 42 | df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')
 43 | print(df['Age_group'])
 44 | 
 45 | df.fillna(0, inplace=True)
 46 | df = df[(df[numeric_cols] != 0).any(axis=1)]
 47 | 
 48 | df['Area_Type'] = df['Area_Name'].apply(lambda x: x.split(' - ')[1] if ' - ' in str(x) else np.nan)
 49 | df['District'] = df['Area_Name'].apply(lambda x: x.split(' - ')[-1].replace('District - ', '') if ' - ' in str(x) and str(x) != 'State - JHARKHAND' else np.nan)
 50 | 
 51 | 
 52 | 
 53 | plt.figure(figsize=(14, 6))
 54 | total_pop = df[df['Age_group'] == 'All ages'].groupby('District')['Persons_Total'].sum().sort_values(ascending=False)
 55 | total_pop.plot(kind='bar')
 56 | plt.title('Total Population by District')
 57 | plt.ylabel('Population Count')
 58 | plt.xticks(rotation=45)
 59 | plt.tight_layout()
 60 | plt.show()
 61 | 
 62 | 
 63 | 
 64 | gender_df = df[df['Age_group'] == 'All ages'].groupby('District')[['Males_Total', 'Females_Total']].sum()
 65 | fig, ax = plt.subplots(figsize=(12, 6))
 66 | gender_df.plot(kind='bar', ax=ax)
 67 | ax.set_title('Gender Distribution by District')
 68 | ax.set_ylabel('Population Count')
 69 | ax.set_xlabel('District')
 70 | ax.set_xticklabels(gender_df.index, rotation=45)
 71 | plt.tight_layout()
 72 | plt.show()
 73 | 
 74 | 
 75 | lit_df = df[df['Age_group'] == 'All ages'].groupby('District')[['Persons_Illiterate', 'Persons_Literate']].sum()
 76 | fig, ax = plt.subplots(figsize=(12, 6))
 77 | lit_df.plot(kind='bar', ax=ax)
 78 | ax.set_title('Literacy Status by District')
 79 | ax.set_ylabel('Population Count')
 80 | ax.set_xlabel('District')
 81 | ax.set_xticklabels(lit_df.index, rotation=45)
 82 | plt.tight_layout()
 83 | plt.show()
 84 | 
 85 | edu_cols = ['Persons_Below_Primary', 'Persons_Primary', 'Persons_Middle', 
 86 |             'Persons_Matric_Secondary', 'Persons_Higher_Secondary', 'Persons_Graduate']
 87 | edu_df = df[df['Age_group'] == 'All ages'].groupby('District')[edu_cols].sum()
 88 | fig, ax = plt.subplots(figsize=(14, 8))
 89 | edu_df.plot(kind='bar', ax=ax)
 90 | ax.set_title('Education Level Distribution by District')
 91 | ax.set_ylabel('Population Count')
 92 | ax.set_xlabel('District')
 93 | ax.set_xticklabels(edu_df.index, rotation=45)
 94 | plt.tight_layout()
 95 | plt.show()
 96 | 
 97 | if 'Total_Rural_Urban' in df.columns:
 98 |     plt.figure(figsize=(12, 6))
 99 |     rural_urban = df[(df['Age_group'] == 'All ages')& 
100 |         (~df['Total_Rural_Urban'].str.contains('Total', case=False, na=False))].groupby('Total_Rural_Urban')['Persons_Total'].sum()
101 |     rural_urban.plot(kind='pie', autopct='%1.1f%%')
102 |     plt.title('Population Distribution: Rural vs Urban')
103 |     plt.ylabel('')
104 |     plt.show()
105 | 
106 | age_groups = ['7', '8', '9', '10-14', '15-19', '20-24', '25-29', '30-34', '35+']
107 | age_df = df[df['Age_group'].isin(age_groups)]
108 | pivot_edu = age_df.pivot_table(
109 |     index='Age_group',
110 |     values=edu_cols,
111 |     aggfunc='sum'
112 | )
113 | gender_gap_cols = [
114 |     'Males_Literate', 'Females_Literate',
115 |     'Males_Graduate', 'Females_Graduate',
116 |     'Males_Matric_Secondary', 'Females_Matric_Secondary'
117 | ]
118 | plt.figure(figsize=(12, 8))
119 | sns.heatmap(df[gender_gap_cols].corr(), annot=True, cmap='coolwarm', center=0)
120 | plt.title('Gender Gap in Education (Correlation)')
121 | plt.tight_layout()
122 | plt.show()
123 | 
124 | plt.figure(figsize=(12, 6))
125 | grad_df = df[df['Age_group'] == 'All ages'].groupby('District')['Persons_Graduate'].sum()
126 | grad_df = grad_df.sort_values(ascending=False).head(10)
127 | grad_df.plot(kind='bar')
128 | plt.title('Top Districts by Graduate Population')
129 | plt.ylabel('Graduate Count')
130 | plt.xticks(rotation=45)
131 | plt.tight_layout()
132 | plt.show()
133 | 
134 | plt.figure(figsize=(12, 6))
135 | age_groups = ['7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59', '60-64', '65-69']
136 | line_df = df[(df['Age_group'].isin(age_groups)) & (df['Total_Rural_Urban'] == 'Total')]
137 | line_df['Literacy_Rate_Persons'] = (line_df['Persons_Literate'] / line_df['Persons_Total'] * 100)
138 | line_df['Literacy_Rate_Males'] = (line_df['Males_Literate'] / line_df['Males_Total'] * 100)
139 | line_df['Literacy_Rate_Females'] = (line_df['Females_Literate'] / line_df['Females_Total'] * 100)
140 | line_agg = line_df.groupby('Age_group')[['Literacy_Rate_Males', 'Literacy_Rate_Females']].mean().reindex(age_groups)
141 | plt.plot(line_agg.index, line_agg['Literacy_Rate_Males'], marker='o', label='Males', color='blue')
142 | plt.plot(line_agg.index, line_agg['Literacy_Rate_Females'], marker='o', label='Females', color='red')
143 | plt.title('Literacy Rate by Age Group (Males vs. Females)')
144 | plt.xlabel('Age Group')
145 | plt.ylabel('Literacy Rate (%)')
146 | plt.xticks(rotation=45)
147 | plt.legend()
148 | plt.grid(True, linestyle='--', alpha=0.7)
149 | plt.tight_layout()
150 | plt.show()


--------------------------------------------------------------------------------