├── Adult_literacy_rate_cleaned.py ├── Dividing_countries_Arabic_printing.py ├── Graph_for_all_region_Primar_NAR_nvd3.py ├── Plotly_Graph_for_Primary_NAR.worldMap.py ├── README.md ├── Total_Average_Primary_NAR_SNS_Graph.py ├── VARUN.SANDEEP.PPT.odp ├── World_map_Adult_literacy_rate.py ├── _config.yml ├── admin_one_sheet.py ├── african.png ├── american.png ├── arabic_literacy rate.png ├── cleaned_Primary_NAR.py ├── figure_1.png └── final_adult_literacyrate_graph.py /Adult_literacy_rate_cleaned.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def prepare_sheet(): 4 | df = pd.ExcelFile(r"/home/varun/Desktop/Project/Table-Youth-and-Adult-Literacy-Rate.xlsx") 5 | df = df.parse('Adult literacy rate',skiprows = 10 ,na_values = ['NA']) 6 | df.columns=['ISO Code','Countries','Year','Total','S5','Male','S7','Feamle','S9','S10'] 7 | df = df.iloc[:-18,:] 8 | df = df.drop('S10', axis = 1) 9 | df = df.dropna(axis='columns',how='all') 10 | 11 | df = df.convert_objects(convert_numeric=True) 12 | df = df.dropna(axis='rows',how='any') 13 | 14 | df.to_excel('Adult_literacy_rate_cleaned.xlsx',index = False) 15 | prepare_sheet() 16 | -------------------------------------------------------------------------------- /Dividing_countries_Arabic_printing.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | # importing cleaned Primary NAR dataset 3 | NAR = pd.ExcelFile(r"/home/varun/PrimaryNAR_cleaned.xlsx") 4 | NAR = pd.read_excel("/home/varun/PrimaryNAR_cleaned.xlsx",index_col = 1) 5 | 6 | # Dividing of countries into region 7 | # we made a list of countries according to their ISO Code 8 | 9 | Arabic = ['ARM','GEO','IRQ','JOR','KAZ','KGZ','OMN','PSE','SYR','TJK','TUR','TKM','UZB','YEM','UKR'] 10 | 11 | Asia_Pacific = ['AFG','AZE','BGD','BTN','CHN','KHM','IND','IDN','JPN','MDV','MNG','MMR','NPL','PAK', 12 | 'PHL','THA','VNM'] 13 | 14 | American_Region = ['ATG','BHS','BRB','BLZ','CAN','COL','CYM','CRI','CUB','CUW','DMA','DOM','SLV','GRL','GRD', 15 | 'GLP','GTM','HTI','HND','JAM','MTQ','MEX','SPM','MSR','ANT','KNA','NIC','PAN','PER','PRI','BES','BES','SXM', 16 | 'KNA','LCA','SPM','VCT','TTO','TCA','USA','VIR','VGB'] 17 | 18 | 19 | African_Region = ['DZA','AGO','SHN','BEN','BWA','BFA','BDI','CMR','CPV','CAF','TCD','COM','COG', 20 | 'COD','DJI','EGY','GNQ','ERI','ETH','GAB','GMB','GHA','GIN','GNB','CIV','KEN','LSO','LBR','LBY', 21 | 'MDG','MWI','MLI','MRT','MUS','MYT','MAR','MOZ','NAM','NER','NGA','STP','REU','RWA','STP','SEN','SYC', 22 | 'SLE','SOM','ZAF','SSD','SHN','SDN','SWZ','TZA','TGO','TUN','UGA','COD','ZMB','TZA','ZWE'] 23 | 24 | Others = Asia_Pacific+Arabic+American_Region+African_Region 25 | 26 | # For representing the countries and to get countries from dataset 27 | Arabic_countries = NAR.loc[NAR['ISO Code'].isin(Arabic)] 28 | Asian_countries = NAR.loc[NAR['ISO Code'].isin(Asia_Pacific)] 29 | African_countries = NAR.loc[NAR['ISO Code'].isin(African_Region)] 30 | American_countries = NAR.loc[NAR['ISO Code'].isin(American_Region)] 31 | Other_countries = NAR.loc[~NAR['ISO Code'].isin(Others)] 32 | 33 | # If Arabic countries present in dataset then it will print the 34 | 35 | print Arabic_countries -------------------------------------------------------------------------------- /Graph_for_all_region_Primar_NAR_nvd3.py: -------------------------------------------------------------------------------- 1 | import nvd3 2 | from nvd3 import multiBarChart 3 | from IPython.core.display import display, HTML 4 | # saving a file into HTML file 5 | output_file = open('Adult_literacy_rate_nvd3.html', 'w') 6 | chart = multiBarChart(width=1000, height=400, x_axis_format=None,color = 'green') 7 | xdata = ['Poorest','Second','Middle','Fourth','Richest'] 8 | Arabic_countries =[91.18,94.90,96.11,96.81,97.69] 9 | Asian_countries = [78.31,84.17,88.07,90.02,91.58] 10 | African_countries = [62.07,69.21,75.00,81.53,88.79] 11 | American_countries = [91.33,93.55,94.59,95.42,96.72] 12 | Other_countries = [87.01,89.64,90.65,91.52,93.34] 13 | 14 | chart.add_serie(name="Arabic_countries", y=Arabic_countries, x=xdata) 15 | chart.add_serie(name="Asian_countries", y=Asian_countries, x=xdata) 16 | chart.add_serie(name="African_countries", y=African_countries, x=xdata) 17 | chart.add_serie(name="American_countries", y=American_countries, x=xdata) 18 | chart.add_serie(name="Other_countries", y=Other_countries, x=xdata) 19 | chart.buildhtml() 20 | display(HTML(chart.htmlcontent)) 21 | output_file.write(chart.htmlcontent) 22 | 23 | # close HTML file 24 | output_file.close() 25 | -------------------------------------------------------------------------------- /Plotly_Graph_for_Primary_NAR.worldMap.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import plotly.plotly as py 3 | import plotly 4 | plotly.tools.set_credentials_file(username='sandeepbabgond', api_key='vcpfBduhzWqlk04B7Tc8') 5 | 6 | df = pd.read_excel('/home/varun/project2/cleaneddata/PrimaryNAR_cleaned.xlsx') 7 | 8 | data2 = dict( 9 | type = 'choropleth', 10 | locations = df['ISO Code'], 11 | z = df['Total'], 12 | text = df['Countries'], 13 | colorscale = [[0,'rgb(5, 10, 172)'],[0.35,"rgb(106, 137, 247)"],[0.5,"rgb(190,190,190)"],\ 14 | [0.6,"rgb(220, 170, 132)"],[0.7,"rgb(230, 145, 90)"],[1,"rgb(178, 10, 28)"]], 15 | autocolorscale = False, 16 | reversescale = True, 17 | name = 'Total', 18 | marker = dict( 19 | line = dict ( 20 | color = 'rgb(220,220,0)', 21 | width = 1.0 22 | ) ), 23 | 24 | ) 25 | fig = dict( data=[data2] ) 26 | py.iplot( fig, validate=False, filename='Primary_net_attendance_rate_map' ) 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mini-Project 2 | ##Topic :UNICEF data about the state of schooling,education and literacy across globe. 3 | ### Datasets 4 | 1] Youth and adult literacy rates
5 | 2]Net attendance rates
6 | 3]Completion rates
7 | 4]Out-of-school rates 8 | 9 | ### Data pre-processing 10 | 1] Data cleaning (Unstructured format to Structured format)
11 | 2]pandas is a powerfull open source Python data analysis library that is used for data cleaning.
12 | 13 | ### Data Visualization 14 | Data visualization is a key part of any data science workflow. Data visualization should really be part of your workflow from the very beginning, as there is a lot of value and insight to be gained from just looking at your data. Summary statistics often don’t tell the whole story. When visualizing data, the most important factor to keep in mind is the purpose of the visualization. 15 | 16 | ### Data Analysis 17 | We used pandas for Data pre processing ,Python pandas are the one of most powerfull to make structured data 18 | 19 | Link = https://varunkashyapks.github.io/Mini-Project/ 20 | -------------------------------------------------------------------------------- /Total_Average_Primary_NAR_SNS_Graph.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib 4 | from matplotlib import pyplot as plt 5 | from IPython.core.display import display, HTML 6 | import seaborn as sns 7 | 8 | # Importing Data Sets 9 | 10 | #output_file = open('PNAR_Interactive_nvd3_graph[#01].html', 'w') 11 | NAR = pd.ExcelFile(r"/home/varun/PrimaryNAR_cleaned (another copy).xlsx") 12 | NAR = pd.read_excel("/home/varun/PrimaryNAR_cleaned (another copy).xlsx",index_col = 1) 13 | 14 | 15 | # Dividing of countries into region 16 | Arabic = ['ARM','GEO','IRQ','JOR','KAZ','KGZ','OMN','PSE','SYR','TJK','TUR','TKM','UZB','YEM','UKR'] 17 | 18 | Asia_Pacific = ['AFG','AZE','BGD','BTN','CHN','KHM','IND','IDN','JPN','MDV','MNG','MMR','NPL','PAK', 19 | 'PHL','THA','VNM'] 20 | 21 | American_Region = ['ATG','BHS','BRB','BLZ','CAN','COL','CYM','CRI','CUB','CUW','DMA','DOM','SLV','GRL','GRD', 22 | 'GLP','GTM','HTI','HND','JAM','MTQ','MEX','SPM','MSR','ANT','KNA','NIC','PAN','PER','PRI','BES','BES','SXM', 23 | 'KNA','LCA','SPM','VCT','TTO','TCA','USA','VIR','VGB'] 24 | 25 | 26 | African_Region = ['DZA','AGO','SHN','BEN','BWA','BFA','BDI','CMR','CPV','CAF','TCD','COM','COG', 27 | 'COD','DJI','EGY','GNQ','ERI','ETH','GAB','GMB','GHA','GIN','GNB','CIV','KEN','LSO','LBR','LBY', 28 | 'MDG','MWI','MLI','MRT','MUS','MYT','MAR','MOZ','NAM','NER','NGA','STP','REU','RWA','STP','SEN','SYC', 29 | 'SLE','SOM','ZAF','SSD','SHN','SDN','SWZ','TZA','TGO','TUN','UGA','COD','ZMB','TZA','ZWE'] 30 | 31 | Others = Asia_Pacific+Arabic+American_Region+African_Region 32 | 33 | # For representing the countries 34 | Arabic_countries = NAR.loc[NAR['ISO Code'].isin(Arabic)] 35 | Asian_countries = NAR.loc[NAR['ISO Code'].isin(Asia_Pacific)] 36 | African_countries = NAR.loc[NAR['ISO Code'].isin(African_Region)] 37 | American_countries = NAR.loc[NAR['ISO Code'].isin(American_Region)] 38 | Other_countries = NAR.loc[~NAR['ISO Code'].isin(Others)] 39 | 40 | # Taking Total Average 41 | AV_Total_Arabic = Arabic_countries['Total'].mean() 42 | AV_Total_Asian = Asian_countries['Total'].mean() 43 | AV_Total_African = African_countries['Total'].mean() 44 | AV_Total_American = American_countries['Total'].mean() 45 | AV_Total_Other = Other_countries['Total'].mean() 46 | 47 | Arabic_AV = {'AV_Total_Arabic':AV_Total_Arabic} 48 | a_av1 = pd.Series(Arabic_AV) 49 | a_av1 = pd.DataFrame(a_av1) 50 | a_av1.columns = ['Total Average'] 51 | 52 | Asian_AV = {'AV_Total_Asian':AV_Total_Asian} 53 | a_av2 = pd.Series(Asian_AV) 54 | a_av2 = pd.DataFrame(a_av2) 55 | a_av2.columns = ['Total Average'] 56 | 57 | African_AV = {'AV_Total_African':AV_Total_African} 58 | a_av3 = pd.Series(African_AV) 59 | a_av3 = pd.DataFrame(a_av3) 60 | a_av3.columns = ['Total Average'] 61 | 62 | American_AV = {'AV_Total_American':AV_Total_American} 63 | a_av4 = pd.Series(American_AV) 64 | a_av4 = pd.DataFrame(a_av4) 65 | a_av4.columns = ['Total Average'] 66 | 67 | Other_AV = {'AV_Total_Other':AV_Total_Other} 68 | a_av5 = pd.Series(Other_AV) 69 | a_av5 = pd.DataFrame(a_av5) 70 | a_av5.columns = ['Total Average'] 71 | 72 | Total_av = [a_av1,a_av2,a_av3,a_av4,a_av5] 73 | TotalAverage = pd.concat(Total_av) 74 | TotalAverage = pd.DataFrame(TotalAverage) 75 | print TotalAverage 76 | dataset =['Arabic_countries','Asian_countries','African_countries','American_countries','Other_countries'] 77 | var = sns.barplot(x=dataset, y='Total Average',data =TotalAverage,color = 'darkblue') 78 | var.axes.set_title('Regions vs Average NAR') 79 | var.set(xlabel='Regions', ylabel='Average NAR') 80 | plt.show() 81 | -------------------------------------------------------------------------------- /VARUN.SANDEEP.PPT.odp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/varunkashyapks/Mini-Project/2d80c89d95f965c42b8461febaed35a32fcceeef/VARUN.SANDEEP.PPT.odp -------------------------------------------------------------------------------- /World_map_Adult_literacy_rate.py: -------------------------------------------------------------------------------- 1 | import plotly.plotly as py 2 | import pandas as pd 3 | import plotly 4 | #from IPython.display import Image 5 | #from IPython.core.display import display, HTML 6 | #from nvd3 import 7 | plotly.tools.set_credentials_file(username='sandeepbabgond', api_key='vcpfBduhzWqlk04B7Tc8') 8 | 9 | df = pd.read_excel('/home/sandeep/project2/cleaneddata/Adult_literacy_rate_cleaned.xlsx') 10 | 11 | data2 = dict( 12 | type = 'choropleth', 13 | locations = df['ISO Code'], 14 | z = df['Total'], 15 | text = df['Countries'], 16 | colorscale = [[0,'rgb(5, 10, 172)'],[0.35,"rgb(106, 137, 247)"],[0.5,"rgb(190,190,190)"],\ 17 | [0.6,"rgb(220, 170, 132)"],[0.7,"rgb(230, 145, 90)"],[1,"rgb(178, 10, 28)"]], 18 | autocolorscale = False, 19 | reversescale = True, 20 | name = 'Total', 21 | marker = dict( 22 | line = dict ( 23 | color = 'rgb(220,220,0)', 24 | width = 1.0 25 | ) ), 26 | 27 | ) 28 | fig = dict( data=[data2] ) 29 | py.iplot( fig, validate=False, filename='Adult_literacy_rate_map' ) 30 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /admin_one_sheet.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def prepare_sheet(): 4 | df = pd.ExcelFile(r"/home/varun/Desktop/Project/Admin.xlsx") 5 | df = df.parse('Pre-primary GER',skiprows = 10 ,na_values = ['NA']) 6 | df.columns=['ISO Code','Countries','Year','Total','S5','Male','S7','Feamle','S9','S10'] 7 | df = df.iloc[:-18,:] 8 | df = df.drop('S10', axis = 1) 9 | df = df.dropna(axis='columns',how='all') 10 | 11 | df = df.convert_objects(convert_numeric=True) 12 | df = df.dropna(axis='rows',how='any') 13 | 14 | df.to_excel('FIRST_cleaned_admin.xlsx',index = False) 15 | prepare_sheet() -------------------------------------------------------------------------------- /african.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/varunkashyapks/Mini-Project/2d80c89d95f965c42b8461febaed35a32fcceeef/african.png -------------------------------------------------------------------------------- /american.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/varunkashyapks/Mini-Project/2d80c89d95f965c42b8461febaed35a32fcceeef/american.png -------------------------------------------------------------------------------- /arabic_literacy rate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/varunkashyapks/Mini-Project/2d80c89d95f965c42b8461febaed35a32fcceeef/arabic_literacy rate.png -------------------------------------------------------------------------------- /cleaned_Primary_NAR.py: -------------------------------------------------------------------------------- 1 | #importing file and cleaning Primary net attendance ratio 2 | import pandas as pd 3 | def prepare_sheet(): 4 | df = pd.ExcelFile(r"/home/varun/Desktop/Project/survey.xlsx") 5 | df = df.parse('Primary NAR',skiprows = 10 ,na_values = ['NA']) 6 | df = df.iloc[11:197,:] 7 | df.columns = ['S1','S2','S3','S4','S5','S6','S7','S8','S9','S10','S11', 8 | 'S12','S13','S14','S15','S16','S17','S18','S19','S20','S21','S22','S23','S24'] 9 | df = df.drop(['S5','S7','S9','S11','S13','S15','S17','S19','S21','S23','S24'],axis = 1) 10 | df = df.dropna(axis='columns',how='all') 11 | df = df.convert_objects(convert_numeric=True) 12 | df.to_excel('PrimaryNAR_cleaned',index = False) 13 | 14 | prepare_sheet() 15 | -------------------------------------------------------------------------------- /figure_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/varunkashyapks/Mini-Project/2d80c89d95f965c42b8461febaed35a32fcceeef/figure_1.png -------------------------------------------------------------------------------- /final_adult_literacyrate_graph.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib 4 | from matplotlib import pyplot as plt 5 | from IPython.core.display import display, HTML 6 | import seaborn as sns 7 | 8 | ALR = pd.ExcelFile(r"/home/varun/Adult_literacy_rate_cleaned.xlsx") 9 | ALR= pd.read_excel("/home/varun/Adult_literacy_rate_cleaned.xlsx",index_col = 1) 10 | 11 | # Dividing of countries into region 12 | Arabic = ['ARM','ARE','GEO','IRQ','BHR','JOR','KAZ','KGZ','IRN','SAU','OMN','PSE','SYR','TJK','TUR','TKM','UZB','YEM','UKR','QAT'] 13 | 14 | Asia_Pacific = ['AFG','AZE','BGD','BTN','CHN','KHM','IND','IDN','JPN','MDV','MNG','MMR','NPL','PAK','LKA', 15 | 'PHL','THA','SGP','VNM'] 16 | 17 | American_Region = ['ATG','BHS','BRB','BRA','CHL','BLZ','CAN','COL','CYM','CRI','CUB','CUW','DMA','ECU','DOM','SLV','GRL','GRD', 18 | 'GLP','GTM','HTI','HND','JAM','MTQ','MEX','SPM','MSR','ANT','KNA','NIC','PAN','PER','PRI','BES','BES','SXM', 19 | 'KNA','LCA','SPM','VCT','TTO','TCA','USA','VIR','VGB','URY'] 20 | 21 | 22 | African_Region = ['DZA','AGO','SHN','BEN','BWA','BFA','BDI','CMR','CPV','CAF','TCD','COM','COG', 23 | 'COD','DJI','EGY','GNQ','ERI','ETH','GAB','GMB','GHA','GIN','GNB','CIV','KEN','LSO','LBR','LBY', 24 | 'MDG','MWI','MLI','MRT','MUS','MYT','MAR','MOZ','NAM','NER','NGA','STP','REU','RWA','STP','SEN','SYC', 25 | 'SLE','SOM','ZAF','SSD','SHN','SDN','SWZ','TZA','TGO','TUN','UGA','COD','ZMB','TZA','ZWE'] 26 | 27 | Others = Asia_Pacific+Arabic+American_Region+African_Region 28 | 29 | # For representing the countries 30 | Arabic_countries = ALR.loc[ALR['ISO Code'].isin(Arabic)] 31 | Asian_countries = ALR.loc[ALR['ISO Code'].isin(Asia_Pacific)] 32 | African_countries = ALR.loc[ALR['ISO Code'].isin(African_Region)] 33 | American_countries = ALR.loc[ALR['ISO Code'].isin(American_Region)] 34 | Others_countries = ALR.loc[~ALR['ISO Code'].isin(Others)] 35 | # for Arabic countries 36 | var = sns.barplot(x='ISO Code', y='Total',data =Arabic_countries) 37 | var.set(xlabel='Countries Name', ylabel='Total literacy rate') 38 | plt.show() 39 | # for asian 40 | var = sns.barplot(x='ISO Code', y='Total',data =Asian_countries) 41 | var.set(xlabel='Countries Name', ylabel='Total literacy rate') 42 | plt.show() 43 | # for African 44 | var = sns.barplot(x='ISO Code', y='Total',data =African_countries) 45 | var.set(xlabel='Countries Name', ylabel='Total literacy rate') 46 | plt.show() 47 | # for American 48 | var = sns.barplot(x='ISO Code', y='Total',data =American_countries) 49 | var.set(xlabel='Countries Name', ylabel='Total literacy rate') 50 | plt.show() 51 | # for other 52 | var = sns.barplot(x='ISO Code', y='Total',data =Others_countries) 53 | var.set(xlabel='Countries Name', ylabel='Total literacy rate') 54 | plt.show() 55 | --------------------------------------------------------------------------------