├── Dataset.csv ├── README.md ├── basic_eda.py ├── objective_five.py ├── objective_four.py ├── objective_one.py ├── objective_three.py └── objective_two.py /README.md: -------------------------------------------------------------------------------- 1 | # Data-Science-Project 2 | 3 | Problem Statement: 4 | Analyze the availability of basic infrastructure and facilities in schools across different state and management types in INDIA. 5 | 6 | Objectives: 7 | 1. Impact of Private vs. Government Schools on Students: Comparing the infrastructure provided by private and government schools to understand its potential influence on students’ learning environment and development. 8 | 2. Access to Educational Technology: Evaluating the availability of computers and internet in Indian schools across different states, school types, and management to understand digital readiness. 9 | 3. Gender Gap in Schools: Investigating gender disparities in education, including number of boys schools, number of girls schools, and number of co-ed schools. 10 | 4. Infrastructure Analysis in Rural Areas: Focusing on the availability of school infrastructure in rural regions to uncover challenges faced in non-urban areas and support data-driven educational interventions. 11 | 5. Evaluating Basic Facility Access in Schools Across India: Analyze the availability of essential school facilities such as drinking water, toilets, handwashing stations, electricity, and furniture to assess the quality of basic learning environments across different regions and school types. 12 | -------------------------------------------------------------------------------- /basic_eda.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | 6 | 7 | #Reading data from csv file 8 | df = pd.read_csv("C:\\Users\\mailt\\Desktop\\Diya\\DS_py\\Project\\Dataset.csv") 9 | 10 | 11 | # Clean column names 12 | df.columns = df.columns.str.strip().str.replace("_", " ").str.replace("(", "").str.replace(")", "").str.replace(",", "").str.replace("’", "").str.replace("'", "").str.replace(".", "") 13 | 14 | 15 | #DataSet 16 | print("Dataset Overview: ") 17 | print(df) 18 | 19 | 20 | #Basic EDA Prints 21 | print("Basic EDA prints: ") 22 | print(df.describe()) 23 | print(df.info()) 24 | print(df.head()) 25 | print(df.tail()) 26 | print(df.columns) 27 | print(df.shape) 28 | print(df.isnull().sum()) 29 | 30 | 31 | # Map school managements 32 | management_map = { 33 | # Private 34 | "Private Unaided (Recognized)": "Private", 35 | "Unrecognized": "Private", 36 | "Madarsa recognized (by Wakf board/Madarsa Board)": "Private", 37 | "Madarsa unrecognized": "Private", 38 | 39 | # Government 40 | "Department of Education": "Government", 41 | "Government Aided": "Government", 42 | "Tribal Welfare Department": "Government", 43 | "Local body": "Government", 44 | "Social welfare Department": "Government", 45 | "Other Govt. managed schools": "Government", 46 | "Kendriya Vidyalaya / Central School": "Government", 47 | "Jawahar Navodaya Vidyalaya": "Government", 48 | "Other Central Govt. Schools": "Government", 49 | "Railway School": "Government", 50 | "Sainik School": "Government", 51 | "Ministry of Labor": "Government", 52 | "Central Tibetan School": "Government" 53 | } 54 | df["School_Type_Grouped"] = df["School Management"].map(management_map) 55 | 56 | 57 | -------------------------------------------------------------------------------- /objective_five.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | 6 | df = pd.read_csv("C:\\Users\\mailt\\Desktop\\Diya\\DS_py\\Project\\Dataset.csv") 7 | 8 | management_map = { 9 | # Private 10 | "Private Unaided (Recognized)": "Private", 11 | "Unrecognized": "Private", 12 | "Madarsa recognized (by Wakf board/Madarsa Board)": "Private", 13 | "Madarsa unrecognized": "Private", 14 | 15 | # Government 16 | "Department of Education": "Government", 17 | "Government Aided": "Government", 18 | "Tribal Welfare Department": "Government", 19 | "Local body": "Government", 20 | "Social welfare Department": "Government", 21 | "Other Govt. managed schools": "Government", 22 | "Kendriya Vidyalaya / Central School": "Government", 23 | "Jawahar Navodaya Vidyalaya": "Government", 24 | "Other Central Govt. Schools": "Government", 25 | "Railway School": "Government", 26 | "Sainik School": "Government", 27 | "Ministry of Labor": "Government", 28 | "Central Tibetan School": "Government" 29 | } 30 | df["School_Type_Grouped"] = df["School Management"].map(management_map) 31 | 32 | #5. Impact of Private vs. Government Schools on Students: Comparing the infrastructure provided by private and government schools to understand its potential influence on students’ learning environment and development. 33 | 34 | infra_comparison = df.groupby("School_Type_Grouped")[["Functional Drinking Water","Functional Electricity","Functional Toilet Facility","Furniture","Handwash","Water Purifier" 35 | ]].sum().T 36 | infra_comparison.plot(kind="bar") 37 | plt.title("Infrastructure Facilities: Government vs Private Schools") 38 | plt.ylabel("Number of Schools with Functional Facility") 39 | plt.xlabel("Infrastructure Type") 40 | plt.xticks(rotation=45) 41 | plt.legend(title="School Type") 42 | plt.tight_layout() 43 | -------------------------------------------------------------------------------- /objective_four.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | 6 | df = pd.read_csv("C:\\Users\\mailt\\Desktop\\Diya\\DS_py\\Project\\Dataset.csv") 7 | 8 | management_map = { 9 | # Private 10 | "Private Unaided (Recognized)": "Private", 11 | "Unrecognized": "Private", 12 | "Madarsa recognized (by Wakf board/Madarsa Board)": "Private", 13 | "Madarsa unrecognized": "Private", 14 | 15 | # Government 16 | "Department of Education": "Government", 17 | "Government Aided": "Government", 18 | "Tribal Welfare Department": "Government", 19 | "Local body": "Government", 20 | "Social welfare Department": "Government", 21 | "Other Govt. managed schools": "Government", 22 | "Kendriya Vidyalaya / Central School": "Government", 23 | "Jawahar Navodaya Vidyalaya": "Government", 24 | "Other Central Govt. Schools": "Government", 25 | "Railway School": "Government", 26 | "Sainik School": "Government", 27 | "Ministry of Labor": "Government", 28 | "Central Tibetan School": "Government" 29 | } 30 | df["School_Type_Grouped"] = df["School Management"].map(management_map) 31 | 32 | #4. Infrastructure Analysis in Rural Areas: Focusing on the availability of school infrastructure in rural regions to uncover challenges faced in non-urban areas and support data-driven educational interventions. 33 | 34 | rural_infra = df.groupby("Rural/Urban")[["Functional Drinking Water", "Functional Electricity", "Functional Toilet Facility", "Furniture", "Handwash", "Water Purifier"]].sum().T 35 | rural_infra.plot(kind="bar", color=["#f1d2cd", "#e15e5e"]) 36 | plt.title("Infrastructure Comparison: Rural vs Urban Schools") 37 | plt.ylabel("Number of Schools with Facility") 38 | plt.xlabel("Infrastructure Type") 39 | plt.xticks(rotation=45) 40 | plt.legend(title="Location") 41 | -------------------------------------------------------------------------------- /objective_one.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | 6 | df = pd.read_csv("C:\\Users\\mailt\\Desktop\\Diya\\DS_py\\Project\\Dataset.csv") 7 | 8 | #1. Evaluating Basic Facility Access in Schools Across India: Analyze the availability of essential school facilities such as drinking water, toilets, handwashing stations, electricity, and furniture to assess the quality of basic learning environments across different regions and school types. 9 | 10 | state_facility = df.groupby("Location")[["Functional Drinking Water","Functional Electricity","Functional Toilet Facility","Furniture","Handwash","Water Purifier"]].sum() 11 | plt.figure(figsize=(14, 10)) 12 | sns.heatmap(state_facility, annot=False, cmap="Blues", linewidths=0.5) 13 | plt.title("Basic School Facilities Across States") 14 | plt.xlabel("Facilities") 15 | plt.ylabel("State") 16 | -------------------------------------------------------------------------------- /objective_three.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | 6 | 7 | df = pd.read_csv("C:\\Users\\mailt\\Desktop\\Diya\\DS_py\\Project\\Dataset.csv") 8 | 9 | 10 | print("Dataset Overview: ") 11 | print(df) 12 | 13 | management_map = { 14 | # Private 15 | "Private Unaided (Recognized)": "Private", 16 | "Unrecognized": "Private", 17 | "Madarsa recognized (by Wakf board/Madarsa Board)": "Private", 18 | "Madarsa unrecognized": "Private", 19 | 20 | # Government 21 | "Department of Education": "Government", 22 | "Government Aided": "Government", 23 | "Tribal Welfare Department": "Government", 24 | "Local body": "Government", 25 | "Social welfare Department": "Government", 26 | "Other Govt. managed schools": "Government", 27 | "Kendriya Vidyalaya / Central School": "Government", 28 | "Jawahar Navodaya Vidyalaya": "Government", 29 | "Other Central Govt. Schools": "Government", 30 | "Railway School": "Government", 31 | "Sainik School": "Government", 32 | "Ministry of Labor": "Government", 33 | "Central Tibetan School": "Government" 34 | } 35 | df["School_Type_Grouped"] = df["School Management"].map(management_map) 36 | df["School_Type_Grouped"] 37 | 38 | 39 | #3. Access to Educational Technology: Evaluating the availability of computers and internet in Indian schools across different states, school types, and management to understand digital readiness. 40 | #A. Computer 41 | df_B = df[df["Location"] != "All India"] 42 | computer = df_B.groupby("Location")[["Computer Available", "Total No of Schools"]].sum() 43 | computer 44 | computer["Computers Available (in %)"] = (computer["Computer Available"]/computer["Total No of Schools"])*100 45 | computer["Computers Available (in %)"] 46 | plt.figure(figsize=(18, 10)) 47 | computer["Computers Available (in %)"].plot(kind='bar', color="#8e468a") 48 | plt.title("Computer Availability in Schools by State") 49 | plt.xlabel("State") 50 | plt.ylabel("Computers Available (in %)") 51 | plt.xticks(rotation=90) 52 | 53 | #B. Internet 54 | internet = df_B.groupby("Location")[["Internet", "Total No of Schools"]].sum() 55 | internet 56 | internet["Internet Available (in %)"] = (internet["Internet"]/internet["Total No of Schools"])*100 57 | internet["Internet Available (in %)"] 58 | plt.figure(figsize=(18, 10)) 59 | internet["Internet Available (in %)"].plot(kind='bar', color="pink") 60 | plt.title("Internet Availability in Schools by State") 61 | plt.xlabel("State") 62 | plt.ylabel("Computers Available (in %)") 63 | plt.xticks(rotation=90) 64 | 65 | #C. Government Vs Private 66 | tech_comparison = df.groupby("School_Type_Grouped")[["Computer Available", "Internet"]].sum().T 67 | tech_comparison 68 | tech_cols = ["Computer Available", "Internet"] 69 | tech_comparison = df.groupby("School_Type_Grouped")[tech_cols].sum().T 70 | tech_comparison.plot(kind="bar", color=["#8e468a", "pink"]) 71 | plt.title("Access to Computers and Internet: Government vs Private Schools") 72 | plt.ylabel("Number of Schools with Access") 73 | plt.xlabel("Technology Type") 74 | plt.xticks(rotation=0) 75 | plt.legend(title="School Type") 76 | plt.tight_layout() 77 | plt.show() 78 | 79 | -------------------------------------------------------------------------------- /objective_two.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | 6 | 7 | df = pd.read_csv("C:\\Users\\mailt\\Desktop\\Diya\\DS_py\\Project\\Dataset.csv") 8 | 9 | 10 | print("Dataset Overview: ") 11 | print(df) 12 | 13 | 14 | #2. Gender Gap in Schools: Investigating gender disparities in education, including number of boys schools, number of girls schools, and number of co-ed schools. 15 | df = df[df["Location"] != "All India"] 16 | gender_gap = df.groupby(["Location", "School Type"])["Total No of Schools"].sum().unstack(fill_value=0) 17 | gender_gap 18 | 19 | #A. Boys 20 | plt.figure(figsize=(18, 8)) 21 | plt.plot(gender_gap.index, gender_gap["Boys"], label="Boys Schools", color="#4058ef") 22 | plt.xticks(rotation=90) 23 | plt.legend() 24 | plt.ylabel("No. Of Schools") 25 | plt.title("Boys Schools Across India") 26 | 27 | #B. Girls 28 | plt.figure(figsize=(18, 8)) 29 | plt.plot(gender_gap.index, gender_gap["Girls"], label="Girls Schools", color="#f577e4") 30 | plt.xticks(rotation=90) 31 | plt.legend() 32 | plt.ylabel("No. Of Schools") 33 | plt.title("Girls Schools Across India") 34 | 35 | #C. Co-ed 36 | plt.figure(figsize=(18, 8)) 37 | plt.plot(gender_gap.index, gender_gap["Co-Ed"], label="Co-Educational Schools", color="#34a154") 38 | plt.xticks(rotation=90) 39 | plt.legend() 40 | plt.ylabel("No. Of Schools") 41 | plt.title("Co-ed Schools Across India") 42 | 43 | #D. Girls Vs Boys Vs Co-ed 44 | plt.figure(figsize=(18, 8)) 45 | plt.plot(gender_gap.index, gender_gap["Boys"], label="Boys Schools", color="#4058ef") 46 | plt.plot(gender_gap.index, gender_gap["Girls"], label="Girls Schools", color="#f577e4") 47 | plt.plot(gender_gap.index, gender_gap["Co-Ed"], label="Co-Educational Schools", color="#34a154") 48 | plt.xticks(rotation=90) 49 | plt.legend() 50 | plt.ylabel("No. Of Schools") 51 | plt.title("Gender-wise Distribution Of Schools Across India") 52 | plt.show() 53 | --------------------------------------------------------------------------------