├── Adult_literacy_rate_cleaned.py
├── Dividing_countries_Arabic_printing.py
├── Graph_for_all_region_Primar_NAR_nvd3.py
├── Plotly_Graph_for_Primary_NAR.worldMap.py
├── README.md
├── Total_Average_Primary_NAR_SNS_Graph.py
├── VARUN.SANDEEP.PPT.odp
├── World_map_Adult_literacy_rate.py
├── _config.yml
├── admin_one_sheet.py
├── african.png
├── american.png
├── arabic_literacy rate.png
├── cleaned_Primary_NAR.py
├── figure_1.png
└── final_adult_literacyrate_graph.py
/Adult_literacy_rate_cleaned.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | def prepare_sheet():
4 | df = pd.ExcelFile(r"/home/varun/Desktop/Project/Table-Youth-and-Adult-Literacy-Rate.xlsx")
5 | df = df.parse('Adult literacy rate',skiprows = 10 ,na_values = ['NA'])
6 | df.columns=['ISO Code','Countries','Year','Total','S5','Male','S7','Feamle','S9','S10']
7 | df = df.iloc[:-18,:]
8 | df = df.drop('S10', axis = 1)
9 | df = df.dropna(axis='columns',how='all')
10 |
11 | df = df.convert_objects(convert_numeric=True)
12 | df = df.dropna(axis='rows',how='any')
13 |
14 | df.to_excel('Adult_literacy_rate_cleaned.xlsx',index = False)
15 | prepare_sheet()
16 |
--------------------------------------------------------------------------------
/Dividing_countries_Arabic_printing.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | # importing cleaned Primary NAR dataset
3 | NAR = pd.ExcelFile(r"/home/varun/PrimaryNAR_cleaned.xlsx")
4 | NAR = pd.read_excel("/home/varun/PrimaryNAR_cleaned.xlsx",index_col = 1)
5 |
6 | # Dividing of countries into region
7 | # we made a list of countries according to their ISO Code
8 |
9 | Arabic = ['ARM','GEO','IRQ','JOR','KAZ','KGZ','OMN','PSE','SYR','TJK','TUR','TKM','UZB','YEM','UKR']
10 |
11 | Asia_Pacific = ['AFG','AZE','BGD','BTN','CHN','KHM','IND','IDN','JPN','MDV','MNG','MMR','NPL','PAK',
12 | 'PHL','THA','VNM']
13 |
14 | American_Region = ['ATG','BHS','BRB','BLZ','CAN','COL','CYM','CRI','CUB','CUW','DMA','DOM','SLV','GRL','GRD',
15 | 'GLP','GTM','HTI','HND','JAM','MTQ','MEX','SPM','MSR','ANT','KNA','NIC','PAN','PER','PRI','BES','BES','SXM',
16 | 'KNA','LCA','SPM','VCT','TTO','TCA','USA','VIR','VGB']
17 |
18 |
19 | African_Region = ['DZA','AGO','SHN','BEN','BWA','BFA','BDI','CMR','CPV','CAF','TCD','COM','COG',
20 | 'COD','DJI','EGY','GNQ','ERI','ETH','GAB','GMB','GHA','GIN','GNB','CIV','KEN','LSO','LBR','LBY',
21 | 'MDG','MWI','MLI','MRT','MUS','MYT','MAR','MOZ','NAM','NER','NGA','STP','REU','RWA','STP','SEN','SYC',
22 | 'SLE','SOM','ZAF','SSD','SHN','SDN','SWZ','TZA','TGO','TUN','UGA','COD','ZMB','TZA','ZWE']
23 |
24 | Others = Asia_Pacific+Arabic+American_Region+African_Region
25 |
26 | # For representing the countries and to get countries from dataset
27 | Arabic_countries = NAR.loc[NAR['ISO Code'].isin(Arabic)]
28 | Asian_countries = NAR.loc[NAR['ISO Code'].isin(Asia_Pacific)]
29 | African_countries = NAR.loc[NAR['ISO Code'].isin(African_Region)]
30 | American_countries = NAR.loc[NAR['ISO Code'].isin(American_Region)]
31 | Other_countries = NAR.loc[~NAR['ISO Code'].isin(Others)]
32 |
33 | # If Arabic countries present in dataset then it will print the
34 |
35 | print Arabic_countries
--------------------------------------------------------------------------------
/Graph_for_all_region_Primar_NAR_nvd3.py:
--------------------------------------------------------------------------------
1 | import nvd3
2 | from nvd3 import multiBarChart
3 | from IPython.core.display import display, HTML
4 | # saving a file into HTML file
5 | output_file = open('Adult_literacy_rate_nvd3.html', 'w')
6 | chart = multiBarChart(width=1000, height=400, x_axis_format=None,color = 'green')
7 | xdata = ['Poorest','Second','Middle','Fourth','Richest']
8 | Arabic_countries =[91.18,94.90,96.11,96.81,97.69]
9 | Asian_countries = [78.31,84.17,88.07,90.02,91.58]
10 | African_countries = [62.07,69.21,75.00,81.53,88.79]
11 | American_countries = [91.33,93.55,94.59,95.42,96.72]
12 | Other_countries = [87.01,89.64,90.65,91.52,93.34]
13 |
14 | chart.add_serie(name="Arabic_countries", y=Arabic_countries, x=xdata)
15 | chart.add_serie(name="Asian_countries", y=Asian_countries, x=xdata)
16 | chart.add_serie(name="African_countries", y=African_countries, x=xdata)
17 | chart.add_serie(name="American_countries", y=American_countries, x=xdata)
18 | chart.add_serie(name="Other_countries", y=Other_countries, x=xdata)
19 | chart.buildhtml()
20 | display(HTML(chart.htmlcontent))
21 | output_file.write(chart.htmlcontent)
22 |
23 | # close HTML file
24 | output_file.close()
25 |
--------------------------------------------------------------------------------
/Plotly_Graph_for_Primary_NAR.worldMap.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import plotly.plotly as py
3 | import plotly
4 | plotly.tools.set_credentials_file(username='sandeepbabgond', api_key='vcpfBduhzWqlk04B7Tc8')
5 |
6 | df = pd.read_excel('/home/varun/project2/cleaneddata/PrimaryNAR_cleaned.xlsx')
7 |
8 | data2 = dict(
9 | type = 'choropleth',
10 | locations = df['ISO Code'],
11 | z = df['Total'],
12 | text = df['Countries'],
13 | colorscale = [[0,'rgb(5, 10, 172)'],[0.35,"rgb(106, 137, 247)"],[0.5,"rgb(190,190,190)"],\
14 | [0.6,"rgb(220, 170, 132)"],[0.7,"rgb(230, 145, 90)"],[1,"rgb(178, 10, 28)"]],
15 | autocolorscale = False,
16 | reversescale = True,
17 | name = 'Total',
18 | marker = dict(
19 | line = dict (
20 | color = 'rgb(220,220,0)',
21 | width = 1.0
22 | ) ),
23 |
24 | )
25 | fig = dict( data=[data2] )
26 | py.iplot( fig, validate=False, filename='Primary_net_attendance_rate_map' )
27 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Mini-Project
2 | ##Topic :UNICEF data about the state of schooling,education and literacy across globe.
3 | ### Datasets
4 | 1] Youth and adult literacy rates
5 | 2]Net attendance rates
6 | 3]Completion rates
7 | 4]Out-of-school rates
8 |
9 | ### Data pre-processing
10 | 1] Data cleaning (Unstructured format to Structured format)
11 | 2]pandas is a powerfull open source Python data analysis library that is used for data cleaning.
12 |
13 | ### Data Visualization
14 | Data visualization is a key part of any data science workflow. Data visualization should really be part of your workflow from the very beginning, as there is a lot of value and insight to be gained from just looking at your data. Summary statistics often don’t tell the whole story. When visualizing data, the most important factor to keep in mind is the purpose of the visualization.
15 |
16 | ### Data Analysis
17 | We used pandas for Data pre processing ,Python pandas are the one of most powerfull to make structured data
18 |
19 | Link = https://varunkashyapks.github.io/Mini-Project/
20 |
--------------------------------------------------------------------------------
/Total_Average_Primary_NAR_SNS_Graph.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import matplotlib
4 | from matplotlib import pyplot as plt
5 | from IPython.core.display import display, HTML
6 | import seaborn as sns
7 |
8 | # Importing Data Sets
9 |
10 | #output_file = open('PNAR_Interactive_nvd3_graph[#01].html', 'w')
11 | NAR = pd.ExcelFile(r"/home/varun/PrimaryNAR_cleaned (another copy).xlsx")
12 | NAR = pd.read_excel("/home/varun/PrimaryNAR_cleaned (another copy).xlsx",index_col = 1)
13 |
14 |
15 | # Dividing of countries into region
16 | Arabic = ['ARM','GEO','IRQ','JOR','KAZ','KGZ','OMN','PSE','SYR','TJK','TUR','TKM','UZB','YEM','UKR']
17 |
18 | Asia_Pacific = ['AFG','AZE','BGD','BTN','CHN','KHM','IND','IDN','JPN','MDV','MNG','MMR','NPL','PAK',
19 | 'PHL','THA','VNM']
20 |
21 | American_Region = ['ATG','BHS','BRB','BLZ','CAN','COL','CYM','CRI','CUB','CUW','DMA','DOM','SLV','GRL','GRD',
22 | 'GLP','GTM','HTI','HND','JAM','MTQ','MEX','SPM','MSR','ANT','KNA','NIC','PAN','PER','PRI','BES','BES','SXM',
23 | 'KNA','LCA','SPM','VCT','TTO','TCA','USA','VIR','VGB']
24 |
25 |
26 | African_Region = ['DZA','AGO','SHN','BEN','BWA','BFA','BDI','CMR','CPV','CAF','TCD','COM','COG',
27 | 'COD','DJI','EGY','GNQ','ERI','ETH','GAB','GMB','GHA','GIN','GNB','CIV','KEN','LSO','LBR','LBY',
28 | 'MDG','MWI','MLI','MRT','MUS','MYT','MAR','MOZ','NAM','NER','NGA','STP','REU','RWA','STP','SEN','SYC',
29 | 'SLE','SOM','ZAF','SSD','SHN','SDN','SWZ','TZA','TGO','TUN','UGA','COD','ZMB','TZA','ZWE']
30 |
31 | Others = Asia_Pacific+Arabic+American_Region+African_Region
32 |
33 | # For representing the countries
34 | Arabic_countries = NAR.loc[NAR['ISO Code'].isin(Arabic)]
35 | Asian_countries = NAR.loc[NAR['ISO Code'].isin(Asia_Pacific)]
36 | African_countries = NAR.loc[NAR['ISO Code'].isin(African_Region)]
37 | American_countries = NAR.loc[NAR['ISO Code'].isin(American_Region)]
38 | Other_countries = NAR.loc[~NAR['ISO Code'].isin(Others)]
39 |
40 | # Taking Total Average
41 | AV_Total_Arabic = Arabic_countries['Total'].mean()
42 | AV_Total_Asian = Asian_countries['Total'].mean()
43 | AV_Total_African = African_countries['Total'].mean()
44 | AV_Total_American = American_countries['Total'].mean()
45 | AV_Total_Other = Other_countries['Total'].mean()
46 |
47 | Arabic_AV = {'AV_Total_Arabic':AV_Total_Arabic}
48 | a_av1 = pd.Series(Arabic_AV)
49 | a_av1 = pd.DataFrame(a_av1)
50 | a_av1.columns = ['Total Average']
51 |
52 | Asian_AV = {'AV_Total_Asian':AV_Total_Asian}
53 | a_av2 = pd.Series(Asian_AV)
54 | a_av2 = pd.DataFrame(a_av2)
55 | a_av2.columns = ['Total Average']
56 |
57 | African_AV = {'AV_Total_African':AV_Total_African}
58 | a_av3 = pd.Series(African_AV)
59 | a_av3 = pd.DataFrame(a_av3)
60 | a_av3.columns = ['Total Average']
61 |
62 | American_AV = {'AV_Total_American':AV_Total_American}
63 | a_av4 = pd.Series(American_AV)
64 | a_av4 = pd.DataFrame(a_av4)
65 | a_av4.columns = ['Total Average']
66 |
67 | Other_AV = {'AV_Total_Other':AV_Total_Other}
68 | a_av5 = pd.Series(Other_AV)
69 | a_av5 = pd.DataFrame(a_av5)
70 | a_av5.columns = ['Total Average']
71 |
72 | Total_av = [a_av1,a_av2,a_av3,a_av4,a_av5]
73 | TotalAverage = pd.concat(Total_av)
74 | TotalAverage = pd.DataFrame(TotalAverage)
75 | print TotalAverage
76 | dataset =['Arabic_countries','Asian_countries','African_countries','American_countries','Other_countries']
77 | var = sns.barplot(x=dataset, y='Total Average',data =TotalAverage,color = 'darkblue')
78 | var.axes.set_title('Regions vs Average NAR')
79 | var.set(xlabel='Regions', ylabel='Average NAR')
80 | plt.show()
81 |
--------------------------------------------------------------------------------
/VARUN.SANDEEP.PPT.odp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/varunkashyapks/Mini-Project/2d80c89d95f965c42b8461febaed35a32fcceeef/VARUN.SANDEEP.PPT.odp
--------------------------------------------------------------------------------
/World_map_Adult_literacy_rate.py:
--------------------------------------------------------------------------------
1 | import plotly.plotly as py
2 | import pandas as pd
3 | import plotly
4 | #from IPython.display import Image
5 | #from IPython.core.display import display, HTML
6 | #from nvd3 import
7 | plotly.tools.set_credentials_file(username='sandeepbabgond', api_key='vcpfBduhzWqlk04B7Tc8')
8 |
9 | df = pd.read_excel('/home/sandeep/project2/cleaneddata/Adult_literacy_rate_cleaned.xlsx')
10 |
11 | data2 = dict(
12 | type = 'choropleth',
13 | locations = df['ISO Code'],
14 | z = df['Total'],
15 | text = df['Countries'],
16 | colorscale = [[0,'rgb(5, 10, 172)'],[0.35,"rgb(106, 137, 247)"],[0.5,"rgb(190,190,190)"],\
17 | [0.6,"rgb(220, 170, 132)"],[0.7,"rgb(230, 145, 90)"],[1,"rgb(178, 10, 28)"]],
18 | autocolorscale = False,
19 | reversescale = True,
20 | name = 'Total',
21 | marker = dict(
22 | line = dict (
23 | color = 'rgb(220,220,0)',
24 | width = 1.0
25 | ) ),
26 |
27 | )
28 | fig = dict( data=[data2] )
29 | py.iplot( fig, validate=False, filename='Adult_literacy_rate_map' )
30 |
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman
--------------------------------------------------------------------------------
/admin_one_sheet.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | def prepare_sheet():
4 | df = pd.ExcelFile(r"/home/varun/Desktop/Project/Admin.xlsx")
5 | df = df.parse('Pre-primary GER',skiprows = 10 ,na_values = ['NA'])
6 | df.columns=['ISO Code','Countries','Year','Total','S5','Male','S7','Feamle','S9','S10']
7 | df = df.iloc[:-18,:]
8 | df = df.drop('S10', axis = 1)
9 | df = df.dropna(axis='columns',how='all')
10 |
11 | df = df.convert_objects(convert_numeric=True)
12 | df = df.dropna(axis='rows',how='any')
13 |
14 | df.to_excel('FIRST_cleaned_admin.xlsx',index = False)
15 | prepare_sheet()
--------------------------------------------------------------------------------
/african.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/varunkashyapks/Mini-Project/2d80c89d95f965c42b8461febaed35a32fcceeef/african.png
--------------------------------------------------------------------------------
/american.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/varunkashyapks/Mini-Project/2d80c89d95f965c42b8461febaed35a32fcceeef/american.png
--------------------------------------------------------------------------------
/arabic_literacy rate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/varunkashyapks/Mini-Project/2d80c89d95f965c42b8461febaed35a32fcceeef/arabic_literacy rate.png
--------------------------------------------------------------------------------
/cleaned_Primary_NAR.py:
--------------------------------------------------------------------------------
1 | #importing file and cleaning Primary net attendance ratio
2 | import pandas as pd
3 | def prepare_sheet():
4 | df = pd.ExcelFile(r"/home/varun/Desktop/Project/survey.xlsx")
5 | df = df.parse('Primary NAR',skiprows = 10 ,na_values = ['NA'])
6 | df = df.iloc[11:197,:]
7 | df.columns = ['S1','S2','S3','S4','S5','S6','S7','S8','S9','S10','S11',
8 | 'S12','S13','S14','S15','S16','S17','S18','S19','S20','S21','S22','S23','S24']
9 | df = df.drop(['S5','S7','S9','S11','S13','S15','S17','S19','S21','S23','S24'],axis = 1)
10 | df = df.dropna(axis='columns',how='all')
11 | df = df.convert_objects(convert_numeric=True)
12 | df.to_excel('PrimaryNAR_cleaned',index = False)
13 |
14 | prepare_sheet()
15 |
--------------------------------------------------------------------------------
/figure_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/varunkashyapks/Mini-Project/2d80c89d95f965c42b8461febaed35a32fcceeef/figure_1.png
--------------------------------------------------------------------------------
/final_adult_literacyrate_graph.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import matplotlib
4 | from matplotlib import pyplot as plt
5 | from IPython.core.display import display, HTML
6 | import seaborn as sns
7 |
8 | ALR = pd.ExcelFile(r"/home/varun/Adult_literacy_rate_cleaned.xlsx")
9 | ALR= pd.read_excel("/home/varun/Adult_literacy_rate_cleaned.xlsx",index_col = 1)
10 |
11 | # Dividing of countries into region
12 | Arabic = ['ARM','ARE','GEO','IRQ','BHR','JOR','KAZ','KGZ','IRN','SAU','OMN','PSE','SYR','TJK','TUR','TKM','UZB','YEM','UKR','QAT']
13 |
14 | Asia_Pacific = ['AFG','AZE','BGD','BTN','CHN','KHM','IND','IDN','JPN','MDV','MNG','MMR','NPL','PAK','LKA',
15 | 'PHL','THA','SGP','VNM']
16 |
17 | American_Region = ['ATG','BHS','BRB','BRA','CHL','BLZ','CAN','COL','CYM','CRI','CUB','CUW','DMA','ECU','DOM','SLV','GRL','GRD',
18 | 'GLP','GTM','HTI','HND','JAM','MTQ','MEX','SPM','MSR','ANT','KNA','NIC','PAN','PER','PRI','BES','BES','SXM',
19 | 'KNA','LCA','SPM','VCT','TTO','TCA','USA','VIR','VGB','URY']
20 |
21 |
22 | African_Region = ['DZA','AGO','SHN','BEN','BWA','BFA','BDI','CMR','CPV','CAF','TCD','COM','COG',
23 | 'COD','DJI','EGY','GNQ','ERI','ETH','GAB','GMB','GHA','GIN','GNB','CIV','KEN','LSO','LBR','LBY',
24 | 'MDG','MWI','MLI','MRT','MUS','MYT','MAR','MOZ','NAM','NER','NGA','STP','REU','RWA','STP','SEN','SYC',
25 | 'SLE','SOM','ZAF','SSD','SHN','SDN','SWZ','TZA','TGO','TUN','UGA','COD','ZMB','TZA','ZWE']
26 |
27 | Others = Asia_Pacific+Arabic+American_Region+African_Region
28 |
29 | # For representing the countries
30 | Arabic_countries = ALR.loc[ALR['ISO Code'].isin(Arabic)]
31 | Asian_countries = ALR.loc[ALR['ISO Code'].isin(Asia_Pacific)]
32 | African_countries = ALR.loc[ALR['ISO Code'].isin(African_Region)]
33 | American_countries = ALR.loc[ALR['ISO Code'].isin(American_Region)]
34 | Others_countries = ALR.loc[~ALR['ISO Code'].isin(Others)]
35 | # for Arabic countries
36 | var = sns.barplot(x='ISO Code', y='Total',data =Arabic_countries)
37 | var.set(xlabel='Countries Name', ylabel='Total literacy rate')
38 | plt.show()
39 | # for asian
40 | var = sns.barplot(x='ISO Code', y='Total',data =Asian_countries)
41 | var.set(xlabel='Countries Name', ylabel='Total literacy rate')
42 | plt.show()
43 | # for African
44 | var = sns.barplot(x='ISO Code', y='Total',data =African_countries)
45 | var.set(xlabel='Countries Name', ylabel='Total literacy rate')
46 | plt.show()
47 | # for American
48 | var = sns.barplot(x='ISO Code', y='Total',data =American_countries)
49 | var.set(xlabel='Countries Name', ylabel='Total literacy rate')
50 | plt.show()
51 | # for other
52 | var = sns.barplot(x='ISO Code', y='Total',data =Others_countries)
53 | var.set(xlabel='Countries Name', ylabel='Total literacy rate')
54 | plt.show()
55 |
--------------------------------------------------------------------------------