├── Dataset.csv
├── README.md
├── basic_eda.py
├── objective_five.py
├── objective_four.py
├── objective_one.py
├── objective_three.py
└── objective_two.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Data-Science-Project
 2 | 
 3 | Problem Statement:
 4 | Analyze the availability of basic infrastructure and facilities in schools across different state and management types in INDIA.
 5 | 
 6 | Objectives:
 7 | 1.	Impact of Private vs. Government Schools on Students: Comparing the infrastructure provided by private and government schools to understand its potential influence on students’ learning environment and development.
 8 | 2.	Access to Educational Technology: Evaluating the availability of computers and internet in Indian schools across different states, school types, and management to understand digital readiness.
 9 | 3.	Gender Gap in Schools: Investigating gender disparities in education, including number of boys schools, number of girls schools, and number of co-ed schools.
10 | 4.	Infrastructure Analysis in Rural Areas: Focusing on the availability of school infrastructure in rural regions to uncover challenges faced in non-urban areas and support data-driven educational interventions.
11 | 5.	Evaluating Basic Facility Access in Schools Across India: Analyze the availability of essential school facilities such as drinking water, toilets, handwashing stations, electricity, and furniture to assess the quality of basic learning environments across different regions and school types.
12 | 


--------------------------------------------------------------------------------
/basic_eda.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | 
 6 | 
 7 | #Reading data from csv file
 8 | df = pd.read_csv("C:\\Users\\mailt\\Desktop\\Diya\\DS_py\\Project\\Dataset.csv")
 9 | 
10 | 
11 | # Clean column names
12 | df.columns = df.columns.str.strip().str.replace("_", "  ").str.replace("(", "").str.replace(")", "").str.replace(",", "").str.replace("’", "").str.replace("'", "").str.replace(".", "")
13 | 
14 | 
15 | #DataSet
16 | print("Dataset Overview: ")
17 | print(df)
18 | 
19 | 
20 | #Basic EDA Prints
21 | print("Basic EDA prints: ")
22 | print(df.describe())
23 | print(df.info())
24 | print(df.head())
25 | print(df.tail())
26 | print(df.columns)
27 | print(df.shape)
28 | print(df.isnull().sum())
29 | 
30 | 
31 | # Map school managements
32 | management_map = {
33 |     # Private
34 |     "Private Unaided (Recognized)": "Private",
35 |     "Unrecognized": "Private",
36 |     "Madarsa recognized (by Wakf board/Madarsa Board)": "Private",
37 |     "Madarsa unrecognized": "Private",
38 |     
39 |     # Government
40 |     "Department of Education": "Government",
41 |     "Government Aided": "Government",
42 |     "Tribal Welfare Department": "Government",
43 |     "Local body": "Government",
44 |     "Social welfare Department": "Government",
45 |     "Other Govt. managed schools": "Government",
46 |     "Kendriya Vidyalaya / Central School": "Government",
47 |     "Jawahar Navodaya Vidyalaya": "Government",
48 |     "Other Central Govt. Schools": "Government",
49 |     "Railway School": "Government",
50 |     "Sainik School": "Government",
51 |     "Ministry of Labor": "Government",
52 |     "Central Tibetan School": "Government"
53 | }
54 | df["School_Type_Grouped"] = df["School Management"].map(management_map)
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/objective_five.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | 
 6 | df = pd.read_csv("C:\\Users\\mailt\\Desktop\\Diya\\DS_py\\Project\\Dataset.csv")
 7 | 
 8 | management_map = {
 9 |     # Private
10 |     "Private Unaided (Recognized)": "Private",
11 |     "Unrecognized": "Private",
12 |     "Madarsa recognized (by Wakf board/Madarsa Board)": "Private",
13 |     "Madarsa unrecognized": "Private",
14 |     
15 |     # Government
16 |     "Department of Education": "Government",
17 |     "Government Aided": "Government",
18 |     "Tribal Welfare Department": "Government",
19 |     "Local body": "Government",
20 |     "Social welfare Department": "Government",
21 |     "Other Govt. managed schools": "Government",
22 |     "Kendriya Vidyalaya / Central School": "Government",
23 |     "Jawahar Navodaya Vidyalaya": "Government",
24 |     "Other Central Govt. Schools": "Government",
25 |     "Railway School": "Government",
26 |     "Sainik School": "Government",
27 |     "Ministry of Labor": "Government",
28 |     "Central Tibetan School": "Government"
29 | }
30 | df["School_Type_Grouped"] = df["School Management"].map(management_map)
31 | 
32 | #5.	Impact of Private vs. Government Schools on Students: Comparing the infrastructure provided by private and government schools to understand its potential influence on students’ learning environment and development.
33 | 
34 | infra_comparison = df.groupby("School_Type_Grouped")[["Functional Drinking Water","Functional Electricity","Functional Toilet Facility","Furniture","Handwash","Water Purifier"
35 | ]].sum().T
36 | infra_comparison.plot(kind="bar")
37 | plt.title("Infrastructure Facilities: Government vs Private Schools")
38 | plt.ylabel("Number of Schools with Functional Facility")
39 | plt.xlabel("Infrastructure Type")
40 | plt.xticks(rotation=45)
41 | plt.legend(title="School Type")
42 | plt.tight_layout()
43 | 


--------------------------------------------------------------------------------
/objective_four.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | 
 6 | df = pd.read_csv("C:\\Users\\mailt\\Desktop\\Diya\\DS_py\\Project\\Dataset.csv")
 7 | 
 8 | management_map = {
 9 |     # Private
10 |     "Private Unaided (Recognized)": "Private",
11 |     "Unrecognized": "Private",
12 |     "Madarsa recognized (by Wakf board/Madarsa Board)": "Private",
13 |     "Madarsa unrecognized": "Private",
14 |     
15 |     # Government
16 |     "Department of Education": "Government",
17 |     "Government Aided": "Government",
18 |     "Tribal Welfare Department": "Government",
19 |     "Local body": "Government",
20 |     "Social welfare Department": "Government",
21 |     "Other Govt. managed schools": "Government",
22 |     "Kendriya Vidyalaya / Central School": "Government",
23 |     "Jawahar Navodaya Vidyalaya": "Government",
24 |     "Other Central Govt. Schools": "Government",
25 |     "Railway School": "Government",
26 |     "Sainik School": "Government",
27 |     "Ministry of Labor": "Government",
28 |     "Central Tibetan School": "Government"
29 | }
30 | df["School_Type_Grouped"] = df["School Management"].map(management_map)
31 | 
32 | #4. Infrastructure Analysis in Rural Areas: Focusing on the availability of school infrastructure in rural regions to uncover challenges faced in non-urban areas and support data-driven educational interventions.
33 | 
34 | rural_infra = df.groupby("Rural/Urban")[["Functional Drinking Water", "Functional Electricity", "Functional Toilet Facility", "Furniture", "Handwash", "Water Purifier"]].sum().T
35 | rural_infra.plot(kind="bar", color=["#f1d2cd", "#e15e5e"])
36 | plt.title("Infrastructure Comparison: Rural vs Urban Schools")
37 | plt.ylabel("Number of Schools with Facility")
38 | plt.xlabel("Infrastructure Type")
39 | plt.xticks(rotation=45)
40 | plt.legend(title="Location")
41 | 


--------------------------------------------------------------------------------
/objective_one.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | 
 6 | df = pd.read_csv("C:\\Users\\mailt\\Desktop\\Diya\\DS_py\\Project\\Dataset.csv")
 7 | 
 8 | #1.	Evaluating Basic Facility Access in Schools Across India: Analyze the availability of essential school facilities such as drinking water, toilets, handwashing stations, electricity, and furniture to assess the quality of basic learning environments across different regions and school types.
 9 | 
10 | state_facility = df.groupby("Location")[["Functional Drinking Water","Functional Electricity","Functional Toilet Facility","Furniture","Handwash","Water Purifier"]].sum()
11 | plt.figure(figsize=(14, 10))
12 | sns.heatmap(state_facility, annot=False, cmap="Blues", linewidths=0.5)
13 | plt.title("Basic School Facilities Across States")
14 | plt.xlabel("Facilities")
15 | plt.ylabel("State")
16 | 


--------------------------------------------------------------------------------
/objective_three.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | 
 6 | 
 7 | df = pd.read_csv("C:\\Users\\mailt\\Desktop\\Diya\\DS_py\\Project\\Dataset.csv")
 8 | 
 9 | 
10 | print("Dataset Overview: ")
11 | print(df)
12 | 
13 | management_map = {
14 |     # Private
15 |     "Private Unaided (Recognized)": "Private",
16 |     "Unrecognized": "Private",
17 |     "Madarsa recognized (by Wakf board/Madarsa Board)": "Private",
18 |     "Madarsa unrecognized": "Private",
19 |     
20 |     # Government
21 |     "Department of Education": "Government",
22 |     "Government Aided": "Government",
23 |     "Tribal Welfare Department": "Government",
24 |     "Local body": "Government",
25 |     "Social welfare Department": "Government",
26 |     "Other Govt. managed schools": "Government",
27 |     "Kendriya Vidyalaya / Central School": "Government",
28 |     "Jawahar Navodaya Vidyalaya": "Government",
29 |     "Other Central Govt. Schools": "Government",
30 |     "Railway School": "Government",
31 |     "Sainik School": "Government",
32 |     "Ministry of Labor": "Government",
33 |     "Central Tibetan School": "Government"
34 | }
35 | df["School_Type_Grouped"] = df["School Management"].map(management_map)
36 | df["School_Type_Grouped"]
37 | 
38 | 
39 | #3. Access to Educational Technology: Evaluating the availability of computers and internet in Indian schools across different states, school types, and management to understand digital readiness.
40 | #A. Computer
41 | df_B = df[df["Location"] != "All India"]
42 | computer = df_B.groupby("Location")[["Computer Available", "Total No of Schools"]].sum()
43 | computer
44 | computer["Computers Available (in %)"] = (computer["Computer Available"]/computer["Total No of Schools"])*100
45 | computer["Computers Available (in %)"]
46 | plt.figure(figsize=(18, 10))
47 | computer["Computers Available (in %)"].plot(kind='bar', color="#8e468a")
48 | plt.title("Computer Availability in Schools by State")
49 | plt.xlabel("State")
50 | plt.ylabel("Computers Available (in %)")
51 | plt.xticks(rotation=90)
52 | 
53 | #B. Internet
54 | internet = df_B.groupby("Location")[["Internet", "Total No of Schools"]].sum()
55 | internet
56 | internet["Internet Available (in %)"] = (internet["Internet"]/internet["Total No of Schools"])*100
57 | internet["Internet Available (in %)"]
58 | plt.figure(figsize=(18, 10))
59 | internet["Internet Available (in %)"].plot(kind='bar', color="pink")
60 | plt.title("Internet Availability in Schools by State")
61 | plt.xlabel("State")
62 | plt.ylabel("Computers Available (in %)")
63 | plt.xticks(rotation=90)
64 | 
65 | #C. Government Vs Private
66 | tech_comparison = df.groupby("School_Type_Grouped")[["Computer Available", "Internet"]].sum().T
67 | tech_comparison
68 | tech_cols = ["Computer Available", "Internet"]
69 | tech_comparison = df.groupby("School_Type_Grouped")[tech_cols].sum().T
70 | tech_comparison.plot(kind="bar", color=["#8e468a", "pink"])
71 | plt.title("Access to Computers and Internet: Government vs Private Schools")
72 | plt.ylabel("Number of Schools with Access")
73 | plt.xlabel("Technology Type")
74 | plt.xticks(rotation=0)
75 | plt.legend(title="School Type")
76 | plt.tight_layout()
77 | plt.show()
78 | 
79 | 


--------------------------------------------------------------------------------
/objective_two.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | 
 6 | 
 7 | df = pd.read_csv("C:\\Users\\mailt\\Desktop\\Diya\\DS_py\\Project\\Dataset.csv")
 8 | 
 9 | 
10 | print("Dataset Overview: ")
11 | print(df)
12 | 
13 | 
14 | #2. Gender Gap in Schools: Investigating gender disparities in education, including number of boys schools, number of girls schools, and number of co-ed schools.
15 | df = df[df["Location"] != "All India"]
16 | gender_gap = df.groupby(["Location", "School Type"])["Total No of Schools"].sum().unstack(fill_value=0)
17 | gender_gap
18 | 
19 | #A. Boys
20 | plt.figure(figsize=(18, 8))
21 | plt.plot(gender_gap.index, gender_gap["Boys"], label="Boys Schools", color="#4058ef")
22 | plt.xticks(rotation=90)
23 | plt.legend()
24 | plt.ylabel("No. Of Schools")
25 | plt.title("Boys Schools Across India")
26 | 
27 | #B. Girls
28 | plt.figure(figsize=(18, 8))
29 | plt.plot(gender_gap.index, gender_gap["Girls"], label="Girls Schools", color="#f577e4")
30 | plt.xticks(rotation=90)
31 | plt.legend()
32 | plt.ylabel("No. Of Schools")
33 | plt.title("Girls Schools Across India")
34 | 
35 | #C. Co-ed
36 | plt.figure(figsize=(18, 8))
37 | plt.plot(gender_gap.index, gender_gap["Co-Ed"], label="Co-Educational Schools", color="#34a154")
38 | plt.xticks(rotation=90)
39 | plt.legend()
40 | plt.ylabel("No. Of Schools")
41 | plt.title("Co-ed Schools Across India")
42 | 
43 | #D. Girls Vs Boys Vs Co-ed
44 | plt.figure(figsize=(18, 8))
45 | plt.plot(gender_gap.index, gender_gap["Boys"], label="Boys Schools", color="#4058ef")
46 | plt.plot(gender_gap.index, gender_gap["Girls"], label="Girls Schools", color="#f577e4")
47 | plt.plot(gender_gap.index, gender_gap["Co-Ed"], label="Co-Educational Schools", color="#34a154")
48 | plt.xticks(rotation=90)
49 | plt.legend()
50 | plt.ylabel("No. Of Schools")
51 | plt.title("Gender-wise Distribution Of Schools Across India")
52 | plt.show()
53 | 


--------------------------------------------------------------------------------