├── README.md
├── Intro-Prgrm.py
├── 06.Read_Write.py
├── 18.hc.py
├── 05.DataSummarization.py
├── 11.Simple Linear Regression.py
├── 03.Apply_Functions.py
├── 12.Multiple Linear Regression.py
├── 17.kmeans.py
├── 07.Joins.py
├── 08.Index_Select_Filter.py
├── 16.RF.py
├── 15.DecisionTree.py
├── 09.MissingValues.py
├── 04.Loops.py
├── 02.Functions_Basics.py
├── 01.DataStructures.py
├── 19.MarketBasketAnalysis_AprioriAlgo.py
├── 13.multiple_linear_regression_BackwardElimination.py
├── 10.Graphs.py
├── 14.logistic_regression.py
├── 31.Reading Files into Python.ipynb
├── 32.Min_Max_Range_Updated.ipynb
└── 33.Mean_Variance.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # Data Science with Python
2 | 


--------------------------------------------------------------------------------
/Intro-Prgrm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Apr 25 16:45:56 2021
 4 | 
 5 | @author: pc
 6 | """
 7 | 
 8 | V = [1,2,3,4,5]
 9 | print(V)
10 | 
11 | import matplotlib.pyplot as plt
12 | plt.plot(V)
13 | 


--------------------------------------------------------------------------------
/06.Read_Write.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat Aug 28 15:58:59 2021
 4 | 
 5 | @author: Admin
 6 | """
 7 | #-------------------------Reading & Writing data in Files----------------------
 8 | 
 9 | import pandas
10 | 
11 | # Reading CSV Files with Pandas:
12 | df = pandas.read_csv('F:/WORK/pyWork/AnalyticsEdge_Python/pyData/User_Data.csv')
13 | print(df)
14 | 
15 | # Writing CSV Files with Pandas:
16 | df.to_csv('F:/WORK/pyWork/AnalyticsEdge_Python/pyData/IIT-B.csv')
17 | 
18 | # Reading Excel Files with Pandas
19 | df1 = pandas.read_excel('F:/WORK/pyWork/AnalyticsEdge_Python/pyData/User_Data.xlsx')
20 | 
21 | df1 = pandas.read_excel('User_Data.xlsx')
22 | print(df1)
23 | 
24 | # Writing Excel Files with Pandas 
25 | df1.to_excel('IIT-B.xlsx')
26 | df2 = pandas.DataFrame(df1)
27 | print (df2)
28 | 


--------------------------------------------------------------------------------
/18.hc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Tue Oct 19 19:56:10 2021
 4 | 
 5 | @author: Admin
 6 | """
 7 | # Hierarchical Clustering
 8 | 
 9 | # Importing the libraries
10 | import matplotlib.pyplot as plt
11 | import pandas as pd
12 | 
13 | # Importing the dataset
14 | dataset = pd.read_csv('F:/WORK/pyWork/pyData/Mall_Customers.csv')
15 | X = dataset.iloc[:, [3, 4]].values
16 | 
17 | # Using the dendrogram to find the optimal number of clusters
18 | import scipy.cluster.hierarchy as sch
19 | dendrogram = sch.dendrogram(sch.linkage(X, method = 'ward'))
20 | plt.title('Dendrogram')
21 | plt.xlabel('Customers')
22 | plt.ylabel('Euclidean distances')
23 | 
24 | #cut the dendrogram tree with a horizontal line at a height where the line can traverse 
25 | #without intersecting the merging point. Hence, we can see the ideal no. of clusters is 5
26 | 
27 | # Fitting Hierarchical Clustering to the dataset
28 | from sklearn.cluster import AgglomerativeClustering
29 | hc = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean', linkage = 'ward')
30 | y_hc = hc.fit_predict(X)
31 | 
32 | # Visualising the clusters
33 | plt.scatter(X[y_hc == 0, 0], X[y_hc == 0, 1], s = 100, c = 'red', label = 'Cluster 1')
34 | plt.scatter(X[y_hc == 1, 0], X[y_hc == 1, 1], s = 100, c = 'blue', label = 'Cluster 2')
35 | plt.scatter(X[y_hc == 2, 0], X[y_hc == 2, 1], s = 100, c = 'green', label = 'Cluster 3')
36 | plt.scatter(X[y_hc == 3, 0], X[y_hc == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4')
37 | plt.scatter(X[y_hc == 4, 0], X[y_hc == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5')
38 | 
39 | plt.title('Clusters of customers')
40 | plt.xlabel('Annual Income (k$)')
41 | plt.ylabel('Spending Score (1-100)')
42 | plt.legend()
43 | 


--------------------------------------------------------------------------------
/05.DataSummarization.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat Aug 28 15:58:01 2021
 4 | 
 5 | @author: Admin
 6 | """
 7 | #-------------------------------Data Summary-------------------------------
 8 | #Describe()- Used to get summary statistics in python.
 9 | #Describe Function gives the mean, std and IQR values.
10 | #It analyzes both numeric and object series and also the DataFrame column sets of mixed data types.
11 | # creation of DataFrame
12 | import pandas as pd
13 | import numpy as np
14 | 
15 | #Example 1:
16 | a1 = pd.Series([1, 2, 3,4]) 
17 | a1 
18 | a1.describe()  
19 | 
20 | a2 = pd.Series(['q', 'r', 'r', 'r','q','s','p'])  
21 | a2
22 | a2.describe() 
23 | 
24 | info = pd.DataFrame({'numeric': [1, 2, 3, 4],  
25 | 'object': ['p', 'q', 'r','e']  
26 |  })  
27 | info
28 | 
29 | info.describe(include=[np.number])  
30 | info.describe(include=[np.object])  
31 | info.describe()
32 | 
33 | #Example 2:
34 | #Create a Dictionary of series
35 | d = {'Name':['Cathrine','Alisa','Bobby','Madonna','Rocky','Sebastian','Jaqluine',
36 |                        'Rahul','David','Andrew','Ajay','Teresa'],
37 | 'Age':[26,27,25,24,31,27,25,33,42,32,51,47],
38 | 'Score':[89,87,67,55,47,72,76,79,44,92,99,69]}
39 | 
40 | #Create a DataFrame
41 | df = pd.DataFrame(d)
42 | df
43 | 
44 | #Descriptive or Summary Statistic of the numeric columns:
45 | #Summary statistics
46 | print(df.describe())
47 | 
48 | #Descriptive or Summary Statistic of the character columns:
49 | #Summary statistics of character column
50 | print(df.describe(include='object'))
51 | 
52 | #Descriptive or Summary Statistic of all the columns
53 | #Summary statistics of both - character & numerical columns
54 | print(df.describe(include='all'))
55 | #--------------------------------------------------------------------------------------------------------------- 
56 | 


--------------------------------------------------------------------------------
/11.Simple Linear Regression.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat Sep 18 19:04:28 2021
 4 | 
 5 | @author: Admin
 6 | """
 7 | # Simple Linear Regression
 8 | 
 9 | # Importing the libraries
10 | import matplotlib.pyplot as plt
11 | import pandas as pd
12 | 
13 | # Importing the dataset
14 | dataset = pd.read_csv('F:/pyWork/pyData/stud_reg.csv')
15 | print(type(dataset))
16 | 
17 | X = dataset.iloc[:,:-1].values
18 | y = dataset.iloc[:, 1].values
19 | 
20 | # Splitting the dataset into the Training set and Test set
21 | from sklearn.model_selection import train_test_split
22 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3, random_state = 0)
23 | 
24 | #Note: The parameter 'random_state' is used to randomly bifurcate the dataset into training &
25 | #testing datasets. That number should be supplied as arguments to parameter 'random_state'
26 | #which helps us get the max accuracy. And that number is decided by hit & trial method.
27 |     
28 | # Fitting Simple Linear Regression to the Training set
29 | from sklearn.linear_model import LinearRegression
30 | regressor = LinearRegression()
31 | regressor.fit(X_train, y_train)
32 | 
33 | #Calculating the coefficients:
34 | print(regressor.coef_)
35 | 
36 | #Calculating the intercept:
37 | print(regressor.intercept_)
38 | 
39 | # Predicting the Test set results
40 | y_pred = regressor.predict(X_test)
41 | 
42 | # Accuracy of the model
43 | 
44 | #Calculating the r squared value:
45 | from sklearn.metrics import r2_score
46 | r2_score(y_test,y_pred)
47 | 
48 | #Create a DataFrame
49 | df1 = {'Actual Applicants':y_test,
50 | 'Predicted Applicants':y_pred}
51 | df1 = pd.DataFrame(df1,columns=['Actual Applicants','Predicted Applicants'])
52 | print(df1)
53 |  
54 | # Visualising the predicted results
55 | line_chart1 = plt.plot(X_test,y_pred, '--', c ='red')
56 | line_chart2 = plt.plot(X_test,y_test, ':', c='blue')
57 | 
58 | #--------------------------------------------------------
59 | 


--------------------------------------------------------------------------------
/03.Apply_Functions.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat Aug 21 15:33:29 2021
 4 | 
 5 | @author: Admin
 6 | """
 7 | #-----------------Apply Family of Functions--------------------------------
 8 | #To apply our own functions to dataset, pandas provides three functions from
 9 | #apply family of functons: pipe(), apply(), applymap()
10 | 
11 | # pipe():Table wise Function Application.
12 | # It performs the custom operation for the entire dataframe.
13 | import pandas as pd
14 | # own function
15 | def adder(adder1,adder2):return adder1+adder2
16 | 
17 | #Create a Dictionary of series
18 | d = {'Score_Math':pd.Series([66,57,75,44,31,67,85,33,42,62,51,47]),
19 |      'Score_Science':pd.Series([89,87,67,55,47,72,76,79,44,92,93,69])}
20 | 
21 | print(type(d))
22 | print(d)
23 | df = pd.DataFrame(d)
24 | print (df)
25 | print (df.pipe(adder,2))
26 | 
27 | # apply():Row or Column Wise Function Application.
28 | # It performs the custom operation for either row wise or column wise.
29 | import numpy as np
30 | #Create a DataFrame
31 | d = {'Score_Math':pd.Series([66,57,75,44,31,67,85,33,42,62,51,47]),
32 |      'Score_Science':pd.Series([89,87,67,55,47,72,76,79,44,92,93,69])}
33 | 
34 | df = pd.DataFrame(d)
35 | print (df)
36 | #Row Wise Fxn Application:
37 | #row wise mean
38 | print (df.apply(np.mean,axis=1))
39 | 
40 | #Column Wise Fxn Application:
41 | #column wise mean
42 | print (df.apply(np.mean,axis=0))
43 | 
44 | # applymap():Element wise Function Application.
45 | 
46 | # applymap():Element wise Function Application.
47 | # It performs specified operation on all the elements of the dataframe. 
48 | 
49 | #Create a DataFrame
50 | d = {'Score_Math':pd.Series([66,57,75,44,31,67,85,33,42,62,51,47]),
51 |      'Score_Science':pd.Series([89,87,67,55,47,72,76,79,44,92,93,69])}
52 | 
53 | df = pd.DataFrame(d)
54 | print (df)
55 | 
56 | #Example 1:
57 | print (df.applymap(lambda x:x*2))
58 | #Example2:
59 | import math as m
60 | print (df.applymap(lambda x:m.sqrt(x)))
61 | 


--------------------------------------------------------------------------------
/12.Multiple Linear Regression.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Sep 19 15:26:33 2021
 4 | 
 5 | @author: Admin
 6 | """
 7 | #Multiple Regression
 8 | 
 9 | # Importing the libraries
10 | import pandas as pd
11 | import seaborn as sns
12 | 
13 | # Importing the dataset
14 | dataset = pd.read_csv('F:/WORK/pyWork/AnalyticsEdge_Python/pyData/stud_reg_2.csv')
15 | print(type(dataset))
16 | 
17 | #Data Visualization:
18 | sns.heatmap(dataset)
19 | 
20 | X = dataset.iloc[:, :-1].values
21 | y = dataset.iloc[:,2].values
22 | 
23 | # Splitting the dataset into the Training set and Test set
24 | from sklearn.model_selection import train_test_split
25 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1)
26 | 
27 | #Note: The parameter 'random_state' is used to randomly bifurcate the dataset into training &
28 | #testing datasets. That number should be supplied as arguments to parameter 'random_state'
29 | #which helps us get the max accuracy. And that number is decided by hit & trial method.
30 | 
31 | # Fitting Linear Regression to the Training set
32 | from sklearn.linear_model import LinearRegression
33 | regressor = LinearRegression()
34 | regressor.fit(X_train, y_train)
35 | 
36 | #Calculating the coefficients:
37 | print(regressor.coef_)
38 | 
39 | #Calculating the intercept:
40 | print(regressor.intercept_)
41 | 
42 | # Predicting the Test set results
43 | y_pred = regressor.predict(X_test)
44 | 
45 | # Accuracy of the model
46 | 
47 | #Calculating the r squared value:
48 | from sklearn.metrics import r2_score
49 | r2_score(y_test,y_pred)
50 | 
51 | #Create a DataFrame
52 | df1 = {'Actual Applicants':y_test,
53 | 'Predicted Applicants':y_pred}
54 | df1 = pd.DataFrame(df1,columns=['Actual Applicants','Predicted Applicants'])
55 | print(df1)
56 | 
57 | # Visualising the predicted results
58 | import matplotlib.pyplot as plt
59 | line_chart1 = plt.plot(y_pred,X_test, '--',c='green')
60 | line_chart2 = plt.plot(y_test,X_test, ':', c='red')
61 | plt.show()
62 | #------------------------------
63 | 


--------------------------------------------------------------------------------
/17.kmeans.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Oct 17 09:41:39 2021
 4 | 
 5 | @author: Admin
 6 | """
 7 | # K-Means Clustering
 8 | #Projects: Customer Segmentation
 9 | #A Company wants to identify segments of customers for targetted marketing.
10 | 
11 | # Importing the libraries
12 | import matplotlib.pyplot as plt
13 | import pandas as pd
14 | 
15 | # Importing the dataset
16 | dataset = pd.read_csv('D:\SkillEdge\Python\Final\Codes\pyData\Mall_Customers.csv')
17 | X = dataset.iloc[:, [3,4]].values
18 | 
19 | # Using the elbow method to find the optimal number of clusters
20 | from sklearn.cluster import KMeans
21 | help(KMeans())
22 | wcss = []
23 | for i in range(1, 11):
24 |     kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 0)
25 |     kmeans.fit(X)
26 |     wcss.append(kmeans.inertia_)
27 | plt.plot(range(1, 11), wcss)
28 | plt.title('The Elbow Method')
29 | plt.xlabel('Number of clusters')
30 | plt.ylabel('WCSS')
31 | #if you want save figure, use savefig method in returned figure object.
32 | plt.savefig('output.png')
33 | 
34 | # Fitting K-Means to the dataset
35 | kmeans = KMeans(n_clusters = 5, init = 'k-means++', random_state = 42)
36 | y_kmeans = kmeans.fit_predict(X)
37 | 
38 | kmeans = pd.DataFrame(y_kmeans)
39 | dataset_1 = pd.concat([dataset,kmeans],axis=1)
40 | 
41 | # Visualising the clusters
42 | plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1')
43 | plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Cluster 2')
44 | plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Cluster 3')
45 | plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4')
46 | plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5')
47 | #plt.scatter(X[y_kmeans == 5, 0], X[y_kmeans == 5, 1], s = 100, c = 'yellow', label = 'Cluster 3')
48 | #plt.scatter(X[y_kmeans == 6, 0], X[y_kmeans == 6, 1], s = 100, c = 'black', label = 'Cluster 4')
49 | #plt.scatter(X[y_kmeans == 7, 0], X[y_kmeans == 7, 1], s = 100, c = 'orange', label = 'Cluster 5')
50 | 
51 | 
52 | 
53 | 
54 | #plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroids')
55 | plt.title('Clusters of customers')
56 | plt.xlabel('Annual Income (k$)')
57 | plt.ylabel('Spending Score (1-100)')
58 | plt.legend()
59 | plt.show()
60 | 


--------------------------------------------------------------------------------
/07.Joins.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Tue Aug 31 18:44:17 2021
 4 | 
 5 | @author: Admin
 6 | """
 7 | #---------------------------------Joins----------------------------------------
 8 | #We can merge two data frames in python by using the merge() function of pandas
 9 | #Create dataframe:
10 | import pandas as pd
11 | 
12 | # Example 1:
13 | 
14 | # data frame 1
15 | d1 = {'Customer_id':pd.Series([1,2,3,4,5,6]),
16 |       'Product':pd.Series(['Oven','Oven','Oven','Television','Television','Television'])}
17 | df1 = pd.DataFrame(d1)
18 | print(df1)
19 | 
20 | # data frame 2
21 | d2 = {'Customer_id':pd.Series([2,4,6]),
22 |       'State':pd.Series(['California','California','Texas'])}
23 | df2 = pd.DataFrame(d2)
24 | print(df2)
25 | 
26 | #Inner join using pandas: 
27 | #Return only those rows where left table have matching keys in the right table
28 | print (pd.merge(df1, df2, on='Customer_id', how='inner'))
29 | 
30 | #Full join using pandas
31 | #Returns all rows from both tables.
32 | 
33 | print (pd.merge(df1, df2, on='Customer_id', how='outer'))
34 | #join records from left table which have matching keys in right table.
35 | 
36 | #Left Join using pandas
37 | #Returns all rows from left table and any rows with matching keys from right table.
38 | print (pd.merge(df1, df2, on='Customer_id', how='left'))
39 | 
40 | #Right Join using pandas
41 | #Returns all rows from right table and any rows with matching keys from left table.
42 | print (pd.merge(df1, df2, on='Customer_id', how='right'))
43 | 
44 | #Example 2:
45 | 
46 | # Dataset 1
47 | emp_1 = {"Name": ["Penn", "Smith", "William", "Parker"],  
48 | "Age": [21, 32, 29, 28]}  
49 | EmpList_1 = pd.DataFrame(emp_1)  
50 | print(EmpList_1)
51 | 
52 | # Dataset 2
53 | emp_2 = {"Name": ["Penn", "Suzzane", "William"],  
54 | "Education-Level": ["Under-Grad", "PG", "Grad"]}  
55 | EmpList_2 = pd.DataFrame(emp_2)  
56 | print(EmpList_2)
57 | 
58 | #Inner join using pandas: 
59 | print (pd.merge(EmpList_1, EmpList_2, on='Name', how='inner'))
60 | 
61 | #Full join using pandas
62 | print (pd.merge(EmpList_1, EmpList_2, on='Name', how='outer'))
63 | #join records from left table which have matching keys in right table.
64 | 
65 | #Left Join using pandas
66 | print (pd.merge(EmpList_1, EmpList_2, on='Name', how='left'))
67 | 
68 | #Right Join using pandas
69 | #Returns all rows from right table and any rows with matching keys from left table.
70 | print (pd.merge(EmpList_1, EmpList_2, on='Name', how='right'))
71 | 


--------------------------------------------------------------------------------
/08.Index_Select_Filter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Tue Aug 31 18:48:19 2021
 4 | 
 5 | @author: Admin
 6 | """
 7 | #------------------------Index, Select & Filter--------------------------------
 8 | #Create dataframe :
 9 | import pandas as pd
10 | 
11 | #Create a DataFrame
12 | d = {'Name':['Alisa','Bobby','Cathrine','Alisa','Bobby','Cathrine',
13 |             'Alisa','Bobby','Cathrine','Alisa','Bobby','Cathrine'],
14 |             'Exam':['Semester 1','Semester 1','Semester 1','Semester 1','Semester 1','Semester 1',
15 |                     'Semester 2','Semester 2','Semester 2','Semester 2','Semester 2','Semester 2'],
16 |                     'Subject':['Mathematics','Mathematics','Mathematics','Science','Science','Science',
17 |                                'Mathematics','Mathematics','Mathematics','Science','Science','Science'],
18 |                                'Score':[62,47,55,74,31,77,85,63,42,67,89,81]}
19 | 
20 | df = pd.DataFrame(d,columns=['Name','Exam','Subject','Score'])
21 | df
22 | 
23 | #View a column of the dataframe in pandas:
24 | df['Name']
25 | 
26 | #View two columns of the dataframe in pandas:
27 | df[['Name','Score','Exam']]
28 | 
29 | #View first two rows of the dataframe in pandas:
30 | df[0:2]
31 | 
32 | #-------Filter in Pandas dataframe:--------------
33 | #View all rows where score greater than 70  
34 | df['Score'] > 70
35 | df[df['Score'] > 70]
36 | 
37 | #View all the rows where score greater than 70 and less than 85
38 | df[(df['Score'] > 70) & (df['Score'] < 85)]
39 | 
40 | 
41 | #-----------------Select in Pandas dataframe-----------------------------------
42 | #select row by using row number in pandas  with .iloc
43 | #.iloc [1:m, 1:n] – is used to select or index rows based on their position 
44 | #from 1 to m rows and 1 to n columns
45 | 
46 | # select first 2 rows
47 | df.iloc[:2]
48 | # or
49 | df.iloc[:2,]
50 | 
51 | #select 3rd to 5th rows
52 | df.iloc[2:5]
53 | # or
54 | df.iloc[2:5,]
55 | 
56 | #select all rows starting from third row
57 | df.iloc[2:]
58 | # or
59 | df.iloc[2:,]
60 | 
61 | #Select column by using column number in pandas with .iloc
62 | # select first 2 columns
63 | df.iloc[:,:2]
64 | #select first 1st and 4th columns
65 | df.iloc[[2,4],[0,3]]
66 | 
67 | #Select value by using row name and column name in pandas with .loc:
68 | #.loc [[Row_names],[ column_names]] –used to select or index rows or columns based on their name
69 | 
70 | #select value by row label and column label using loc
71 | df.loc[[1,2,4,8,11],['Name','Score']]
72 | 


--------------------------------------------------------------------------------
/16.RF.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Tue Oct 12 19:15:52 2021
 4 | 
 5 | @author: Admin
 6 | """
 7 | #------------------------------Random Forest--------------------------------
 8 | # Random Forest Classification
 9 | 
10 | # Importing the libraries
11 | import pandas as pd
12 | 
13 | # Importing the dataset
14 | dataset = pd.read_csv('Purchase_History.csv')
15 | X = dataset.iloc[:, [2, 3]].values
16 | y = dataset.iloc[:, 4].values
17 | 
18 | # Splitting the dataset into the Training set and Test set
19 | from sklearn.model_selection import train_test_split
20 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
21 | 
22 | 
23 | # Fitting Random Forest Classification to the Training set
24 | from sklearn.ensemble import RandomForestClassifier
25 | classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy',max_depth = 3, min_samples_leaf=5)
26 | classifier.fit(X_train, y_train)
27 | 
28 | #To see no. of decision trees created
29 | len(classifier.estimators_)
30 | 
31 | #To see the decision trees created
32 | classifier.estimators_
33 | 
34 | #To access a particular decision tree, we can use indexing
35 | classifier.estimators_[0]
36 | 
37 | # Predicting the Test set results
38 | y_pred = classifier.predict(X_test)
39 | 
40 | # Making the Confusion Matrix
41 | from sklearn.metrics import confusion_matrix
42 | cm = confusion_matrix(y_test, y_pred)
43 | cm
44 | #Accuracy = 96%
45 | 
46 | # Random Forest visualization
47 | 
48 | #Since RF is quite big & clumpsy to draw due to large no. of DT, its not possible to 
49 | #visualiza an entire RF on a small system like our laptop.
50 | #Hence, we visualize individual DTs from this RF.
51 | 
52 | # Decision Tree -1 visualization-----------------
53 | from sklearn import tree
54 | #Lets create a blank chart of desired size using matplotlib library and place our Decision tree there.
55 | import matplotlib.pyplot as plt
56 | fig, axes= plt.subplots(nrows = 1,ncols = 1,figsize = (4,4), dpi=300)
57 | cn=['0','1']
58 | tree.plot_tree(classifier.estimators_[0],class_names=cn,filled = True)
59 | 
60 | #if you want save figure, use savefig method in returned figure object.
61 | fig.savefig('RF-DT-1.png')
62 | 
63 | # Decision Tree-2 visualization-----------------
64 | from sklearn import tree
65 | #Lets create a blank chart of desired size using matplotlib library and place our Decision tree there.
66 | import matplotlib.pyplot as plt
67 | fig, axes= plt.subplots(nrows = 1,ncols = 1,figsize = (4,4), dpi=300)
68 | cn=['0','1']
69 | tree.plot_tree(classifier.estimators_[1],class_names=cn,filled = True)
70 | 
71 | #if you want save figure, use savefig method in returned figure object.
72 | fig.savefig('RF-DT-2.png')
73 | 
74 | #-----------
75 | 


--------------------------------------------------------------------------------
/15.DecisionTree.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat Oct  9 16:48:32 2021
 4 | 
 5 | @author: Admin
 6 | """
 7 | 
 8 | # Importing the libraries
 9 | import pandas as pd
10 | 
11 | # Importing the dataset
12 | dataset = pd.read_csv('F:\WORK\pyWork\AnalyticsEdge_Python\pyData\Purchase_History.csv')
13 | 
14 | #Method-1 (Handling Categorical Variables)
15 | pd.get_dummies(dataset["Gender"])
16 | pd.get_dummies(dataset["Gender"],drop_first=True)
17 | S_Dummy = pd.get_dummies(dataset["Gender"],drop_first=True)
18 | S_Dummy.head(5)
19 | #Now, lets concatenate these dummy var columns in our dataset.
20 | dataset = pd.concat([dataset,S_Dummy],axis=1)
21 | dataset.head(5)
22 | dataset.tail(2)
23 | #dropping the columns whose dummy var have been created
24 | dataset.drop(["Gender",],axis=1,inplace=True)
25 | dataset.head(5)
26 | #------------------------------------------------------------------------------
27 | 
28 | #Obtaining DV & IV from the dataset
29 | X = dataset.iloc[:, [1,2,4]].values
30 | y = dataset.iloc[:, 3].values
31 | 
32 | # Splitting the dataset into the Training set and Test set
33 | from sklearn.model_selection import train_test_split
34 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 2)
35 | 
36 | 
37 | # Fitting Decision Tree Classification to the Training set
38 | from sklearn.tree import DecisionTreeClassifier
39 | #classifier = DecisionTreeClassifier(criterion = 'entropy')
40 | #If desired we can supply extra parameters to decision trees fxn, but 
41 | #it may or may not give better accuracy.                                    
42 | classifier = DecisionTreeClassifier(criterion = 'entropy',max_depth = 3, min_samples_leaf=5)
43 | 
44 | classifier.fit(X_train, y_train)
45 | 
46 | # Predicting the Test set results
47 | y_pred = classifier.predict(X_test)
48 | 
49 | # Making the Confusion Matrix
50 | from sklearn.metrics import confusion_matrix
51 | cm = confusion_matrix(y_test, y_pred)
52 | print(cm)
53 | #Accuracy = 91%
54 | 
55 | # Decision Tree visualization-----------------
56 | from sklearn import tree
57 | 
58 | #Simple Decision Tree
59 | tree.plot_tree(classifier)
60 | #image is quite blurred
61 | 
62 | #Lets try to make decision tree more interpretable by adding filling colors.
63 | tree.plot_tree(classifier,filled = True)
64 | #Although the Decision tree shows class name & leafs are colred but still its view is blurred.
65 | 
66 | #Lets create a blank chart of desired size using matplotlib library and place our Decision tree there.
67 | import matplotlib.pyplot as plt
68 | fig, axes = plt.subplots(nrows = 1,ncols = 1,figsize = (4,4), dpi=300)
69 | #The above line is used to set the pixels of the Decision Trees nodes so that
70 | #the content mentioned in each node of Decision tree is visible.
71 | cn=['0','1']
72 | tree.plot_tree(classifier,class_names=cn,filled = True)
73 | 
74 | #if you want save figure, use savefig method in returned figure object.
75 | fig.savefig('Skilledge-Python-April-batch.png')
76 | 


--------------------------------------------------------------------------------
/09.MissingValues.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Sep  2 20:05:47 2021
  4 | 
  5 | @author: Admin
  6 | """
  7 | #--------------------------Handling Missing Values----------------------------
  8 | 
  9 | #Counting the Missing Values---------------------------
 10 | import pandas as pd
 11 | import numpy as np
 12 | 
 13 | #Create a DataFrame
 14 | df1 = {'Subject':['semester1','semester2','semester3','semester4','semester1',
 15 |                   'semester2','semester3'],
 16 | 'Score':[62,47,np.nan,74,np.nan,77,85]}
 17 | 
 18 | df1 = pd.DataFrame(df1,columns=['Subject','Score'])
 19 | print(df1)
 20 | 
 21 | '''Is there any missing values in dataframe '''
 22 | df1.isnull()
 23 | df1.notnull()
 24 | 
 25 | '''Is there any missing values across columns'''
 26 | df1.isnull().any()
 27 | 
 28 | '''How many missing values are there across each column'''
 29 | df1.isnull().sum()
 30 | 
 31 | #Dropping rows with Missing Values-----------------------
 32 | 
 33 | #Create a DataFrame
 34 | df1 = {'Name':['George','Andrea','micheal','maggie','Ravi','Xien','Jalpa',np.nan],
 35 |        'State':['Arizona','Georgia','Newyork','Indiana','Florida','California',np.nan,np.nan],
 36 |        'Gender':["M","F","M","F","M","M",np.nan,np.nan],
 37 |        'Score':[63,48,56,75,np.nan,77,np.nan,np.nan]}
 38 | 
 39 | df1 = pd.DataFrame(df1,columns=['Name','State','Gender','Score'])
 40 | print(df1)
 41 | 
 42 | #Drop all rows that have any NaN (missing) values
 43 | df1.dropna()
 44 | 
 45 | #Drop only if entire row has NaN values
 46 | df1.dropna(how='all')
 47 | 
 48 | #Drop only if a row has more than 2 NaN values
 49 | df1.dropna(thresh=2)
 50 | 
 51 | #Drop NaN in a specific column
 52 | df1.dropna(subset=['Gender'])
 53 | df2 = df1.dropna(subset=['Gender','Score'])
 54 | df2
 55 | #Dropping rows using axis values:
 56 | df1
 57 | df1.dropna(axis=0)
 58 | 
 59 | #Dropping columns using axis values:
 60 | df1.dropna(axis=1)
 61 | 
 62 | #------------------Creating Data Frame Again-----------------------------------
 63 | df1 = {'Name':['George','Andrea','micheal','maggie','Ravi','Xien','Jalpa',np.nan],
 64 |        'State':['Arizona','Georgia','Newyork','Indiana','Florida','California',np.nan,np.nan],
 65 |        'Gender':["M","F","M","F","M","M",np.nan,np.nan],
 66 |        'Score':[63,48,56,75,np.nan,77,np.nan,np.nan]}
 67 | 
 68 | df1 = pd.DataFrame(df1,columns=['Name','State','Gender','Score'])
 69 | print(df1)
 70 | #------------------Replacing Missing Values with Zero--------------------------
 71 | 
 72 | df1
 73 | df1.fillna(0)
 74 | 
 75 | #-----------------Replacing Missing Values with Mean of the column-------------
 76 | 
 77 | df1
 78 | df1["Score"].fillna(df1["Score"].mean(),inplace=True)
 79 | print(df1)
 80 | 
 81 | #----------------Replacing Missing Value with Median of the column-------------
 82 | df1["Score"].fillna(df1["Score"].median(), inplace=True)
 83 | print(df1)
 84 | 
 85 | #Replace Missing (or) Generic Values using replace() method
 86 | #Many times, we have to replace a generic value with some specific value. 
 87 | #We can achieve this by applying the replace method.
 88 | df = pd.DataFrame({'one':[10,20,30,40,50,2000], 'two':[1000,0,30,40,50,60]})
 89 | print(df)
 90 | 
 91 | print (df.replace({1000:10,2000:60}))
 92 | 
 93 | #------------------Handling Duplicate Values--------------------------------
 94 | 
 95 | #The drop_duplicates() function performs common data cleaning task that deals with duplicate values
 96 | #in the DataFrame. This method helps in removing duplicate values from the DataFrame.
 97 | 
 98 | emp = {"Name": ["Parker", "Smith", "William", "Parker"],  
 99 | "Age": [21, 32, 29, 21]}  
100 | info = pd.DataFrame(emp)  
101 | print(info) 
102 | info = info.drop_duplicates() 
103 | print(info)
104 | 
105 | 
106 | emp = {"Name": ["Parker", "Smith", "William", "Parker"],  
107 | "Age": [21, 32, 29, 22]}  
108 | info = pd.DataFrame(emp) 
109 | print(info)
110 | info = info.drop_duplicates()  
111 | print(info)  
112 | 


--------------------------------------------------------------------------------
/04.Loops.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Tue Aug 24 18:51:52 2021
  4 | 
  5 | @author: 
  6 | """
  7 | #--------------------------------Loops----------------------------------------
  8 | #-------------For Loop---------------
  9 | # for loop is used in case where we need to execute some part of the code until the given condition
 10 | # is satisfied. It is better to use for loop if the number of iteration is known in advance.
 11 | #It is frequently used to traverse the data structures like list, tuple, or dictionary.
 12 | #Example1:
 13 | i=0  
 14 | for i in range(0,10):
 15 |     print(i,end =',')
 16 | 
 17 | #Example2:printing the table of the given number
 18 | i=1
 19 | num = int(input("Enter a number:"))
 20 | for i in range(1,11):
 21 |     print("%a X %a = %a" %(num,i,num*i))
 22 | 
 23 | #Example3:Nested For loop
 24 | n = int(input("Enter the number of rows you want to print?"))
 25 | i,j=0,0
 26 | for i in range(0,n):
 27 |     print()
 28 |     for j in range(0,i+1):
 29 |         print("*",end="")
 30 | 
 31 | #Exampl4: Else statement with For loop
 32 | for i in range(0,5):
 33 |     print(i)
 34 | else:print("for loop completely exhausted, since there is no break.")
 35 | 
 36 | #------------While Loop-------------
 37 | # while loop is to be used in the scenario where we don't know the number of iterations in advance. 
 38 | #The block of statements is executed in the while loop until the condition specified in while loop 
 39 | #is satisfied. 
 40 | #Example1:
 41 | i=1;
 42 | while i<=10:
 43 |     print(i);
 44 |     i=i+1;
 45 | 
 46 | #Example2:
 47 | i=1
 48 | number=0
 49 | 
 50 | number = int(input("Enter the number?"))
 51 | while i<=10:
 52 |     print("%a X %a = %a \n"%(number,i,number*i));
 53 |     i = i+1;
 54 | 
 55 | #Example3:Infinite while loop
 56 | var = 1
 57 | while var != 2:
 58 |     i = int(input("Enter the number?"))
 59 |     print ("Entered value is %d"%(i))
 60 | 
 61 | while (1):
 62 |     print("Hi! we are inside the infinite while loop");
 63 | 
 64 | # For loop is ran finite no. of times even if we give only one value
 65 | for i in range(0,1):
 66 |     print("Hi! we are inside the finite for loop");
 67 | 
 68 | #Example4: Using else with while loop
 69 | i=1;
 70 | while i<=5:
 71 |     print(i)
 72 |     i=i+1;
 73 | else:print("The while loop exhausted");
 74 | 
 75 | #-------------If Statement----------------
 76 | #The if statement is used to test a specific condition. 
 77 | #If the condition is true, a block of code (if-block) will be executed.
 78 | #Exampl1:
 79 | num = int(input("enter the number?"))
 80 | if num%2 == 0:
 81 |     print("Number is even")
 82 | 
 83 | #Example2:
 84 | a = int(input("Enter a? "));
 85 | b = int(input("Enter b? "));
 86 | c = int(input("Enter c? "));
 87 | if a>b and a>c:
 88 |     print("a is largest");
 89 | 
 90 | if b>a and b>c:
 91 |     print("b is largest");
 92 | 
 93 | if c>a and c>b:
 94 |     print("c is largest");
 95 | 
 96 | #-----------If Else Statement-------------
 97 | #If the condition provided in the if statement is false, then the else statement will be executed.
 98 | #Example1:
 99 | age = int (input("Enter your age? "))
100 | if age>=18:
101 |     print("You are eligible to vote !!");
102 | else:
103 |     print("Sorry! you have to wait !!");
104 | 
105 | #Example2:
106 | num = int (input("enter the number?"))
107 | if num%2 == 0:
108 |     print("Number is even...")
109 | else:
110 |     print("Number is odd...")
111 | 
112 | #-------Elif Statement------------------
113 | #The elif statement enables us to check multiple conditions and execute the specific block of 
114 | #statements depending upon the true condition among them.It works like if-else-if ladder statement.
115 | #Example:
116 | number = int(input("Enter the number?"))
117 | if number==10:
118 |     print("number is equals to 10")
119 | elif number==50:
120 |     print("number is equal to 50");
121 | elif number==100:
122 |     print("number is equal to 100");
123 | else:
124 |     print("number is not equal to 10, 50 or 100");
125 | 


--------------------------------------------------------------------------------
/02.Functions_Basics.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat Aug 21 15:32:07 2021
  4 | 
  5 | @author: Admin
  6 | """
  7 | # -*- coding: utf-8 -*-
  8 | """
  9 | 
 10 | #abs(): Returns the absolute value of a number.
 11 | #  integer number     
 12 | 
 13 | integer = -20
 14 | abs(integer)  
 15 | print('Absolute value of -20 is:', abs(integer))  
 16 |   
 17 | #  floating number  
 18 | 
 19 | floating = -20.83  
 20 | print('Absolute value of -20.83 is:', abs(floating))  
 21 | 
 22 | #all(): It returns true if all items passed in iterable object are true. 
 23 | #Otherwise, it returns False.
 24 | #This fxn accepts an iterable object (such as list, dictionary, etc.). 
 25 | # all values true  
 26 | 
 27 | k = [1, 3, 4, 6]  
 28 | print(all(k))
 29 |   
 30 | # all values false  
 31 | 
 32 | k = [0, False]  
 33 | print(all(k))  
 34 |   
 35 | # one false value  
 36 | k = [1, 3, 7, 0]  
 37 | print(all(k))
 38 |   
 39 | # empty iterable  
 40 | k = []  
 41 | print(all(k)) 
 42 | 
 43 | #------------------------------------------------------------------------------------
 44 | 
 45 | #bool(): Converts a value to boolean(True or False)
 46 | test1 = []  
 47 | print(test1,'is',bool(test1)) 
 48 | 
 49 | test1 = [0]
 50 | print(test1,'is',bool(test1))    
 51 | 
 52 | test1 = None  
 53 | print(test1,'is',bool(test1))  
 54 | 
 55 | test1 = 'Easy string'  
 56 | print(test1,'is',bool(test1)) 
 57 | 
 58 | #sum(): Used to get the sum of numbers of an iterable, i.e., list.
 59 | 
 60 | list_1 = [1,2,4]  
 61 | s = sum(list_1)  
 62 | print(s)  
 63 |   
 64 | s = sum(list_1, 10)  
 65 | print(s) 
 66 | 
 67 | #len(): Returns the length (the number of items) of an object.
 68 | 
 69 | strA = 'Python'  
 70 | print(len(strA))  
 71 | 
 72 | #list() creates a list in python.
 73 | # empty list  
 74 | 
 75 | Gaurav = list()
 76 | print(Gaurav)  
 77 |   
 78 | #Converting string to list
 79 | String = 'abcde'       
 80 | print(list(String)) 
 81 | 
 82 | #divmod(): Used to get quotient and remainder of two numbers. 
 83 | #This function takes two numeric arguments and returns a tuple. 
 84 | #Both arguments are required and numeric 
 85 | # Calling function  
 86 | result = divmod(10,2)  
 87 | # Displaying result  
 88 | print(result)  
 89 | 
 90 | #dict(): Its a constructor which creates a dictionary. 
 91 | # Calling function  
 92 | result = dict() # returns an empty dictionary 
 93 | print(result)
 94 |  
 95 | result2 = dict(a=1,b=2)  
 96 | # Displaying result  
 97 | print(result2)  
 98 | 
 99 | #set(): It is used to create a new set using elements passed during the call. 
100 | #It takes an iterable object as an argument and returns a new set object.
101 | # Calling function  
102 | result = set() # empty set  
103 | result2 = set('12')  
104 | result3 = set('javatpoint') 
105 | result4 = {1,2}
106 | print (result4)
107 | # Displaying result  
108 | print(result)  
109 | print(result2)  
110 | print(result3)  
111 | 
112 | #pow(): Used to compute the power of a number.
113 | # positive x, positive y (x**y)  
114 | print(pow(4, 2))  
115 |   
116 | # negative x, positive y  
117 | print(pow(-4, 2))  
118 | 
119 | #tuple(): Used to create a tuple object.
120 | t1 = tuple()  
121 | print('t1=', t1)  
122 |   
123 | # creating a tuple from a list 
124 | l =  [1, 6, 9]
125 | t2 = tuple(l)  
126 | print('t2=', t2)  
127 |   
128 | # creating a tuple from a string  
129 | t1 = tuple('Java') 
130 | print('t1=',t1)  
131 | 
132 | #----------------------------------------------------------------------
133 | #lambda()- Helps creating anonymous functions. 
134 | #Lambda functions can accept any number of arguments, 
135 | #but they can return only one value in the form of expression.
136 | 
137 | #Multiple arguments to Lambda function
138 | x = lambda a,b:a+b 
139 | # a and b are the arguments and a+b is the expression which gets evaluated and returned.   
140 | print("Addition = ",x(20,10)) 
141 | 
142 | #Program to filter out the list which contains numbers  divisible by 3.
143 | List = [1,2,3,4,10,123,22]  
144 | Oddlist = list(filter(lambda x:(x%3 == 0),List)) 
145 | # the list contains all the items of the list for which the lambda function evaluates to true  
146 | print(Oddlist) 
147 | 
148 | #program to triple each number of the list using map  
149 | List = [1,2,3,4,10,123,22] 
150 | new_list = list(map(lambda x:x*3,List)) 
151 | # this will return the triple of each item of the list and add it to new_list  
152 | print(new_list)  
153 | 


--------------------------------------------------------------------------------
/01.DataStructures.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat April 4 01:46:06 2020
  4 | 
  5 | @author: Admin
  6 | """
  7 | 
  8 | print("hello world")
  9 | 
 10 | #Numbers
 11 | #a=3 , b=5  #a and b are number objects 
 12 | 
 13 | #String
 14 | str1 = 'Hello Students' #string str1
 15 | str2 = ' how are you' #string str2  
 16 | str1
 17 | str2
 18 | print (str1[0:5]) #printing first five character using slice operator  
 19 | (str1[0:5])
 20 | print (str1[4]) #printing 5th character of the string  
 21 | print (str1*2) #printing the string twice  
 22 | print (str1 + str2) #printing the concatenation of str1 and str2
 23 | 
 24 | #Lists
 25 | l  = [1, "hi", "python", True] 
 26 | print (l[3:])  
 27 | print (l[0:2])
 28 | print (l) 
 29 | print (l + l) 
 30 | print (l * 3)
 31 | print (type(l))
 32 | #Lets try mutation 
 33 | l[1] = "Bye"
 34 | print (l)
 35 | 
 36 | #Tuple
 37 | t  = ('hi', 'python', 2, 4) 
 38 | t 
 39 | print (t[1:]);  
 40 | print (t[0:3]);  
 41 | print (t);  
 42 | print (t + t)
 43 | print (t * 3)  
 44 | print (type(t)) 
 45 | #Lets try mutation 
 46 | t[1] = "Bye"
 47 | print (t)
 48 | 
 49 | #Dictionary
 50 | d = {1:"Jimmy", 2:'Alex', 3:'john', 4:'mike'}
 51 | d
 52 | print("1st name is "+d[1])  
 53 | print("2nd name is "+ d[4]) 
 54 | print (d); 
 55 | print (d.keys());  
 56 | print (d.values());
 57 | 
 58 | #----ADVANCED----
 59 | #list
 60 | #ordered collection of items; sequence of items in a list
 61 | shoplist =['apple','carrot','mango', 'banana']
 62 | shoplist
 63 | len(shoplist)
 64 | print(shoplist)
 65 | 
 66 | #add item to list
 67 | shoplist.append('rice')
 68 | shoplist
 69 | 
 70 | #sort
 71 | shoplist.sort()  #inplace sort
 72 | shoplist
 73 | 
 74 | #index/select
 75 | shoplist[0]
 76 | shoplist[0:4]
 77 | 
 78 | #delete item
 79 | del shoplist[0]
 80 | shoplist
 81 | 
 82 | #Tuple
 83 | #Used to hold multiple object; similar to lists; less functionality than list
 84 | #immutable - cannot modify- fast ; ( )
 85 | zoo = ('python','lion','elephant','bird')
 86 | zoo
 87 | len(zoo)
 88 | languages = 'c', 'java', 'php' , 1 #better to put (), this also works
 89 | languages
 90 | type(languages)
 91 | 
 92 | #Dictionary - like an addressbook. use of associate keys with values
 93 | #key-value pairs { 'key1':value1, 'key2':value2} ; { } bracket, :colon
 94 | 
 95 | student = {'A101': 'Abhinav', 'A102': 'Ravi', 'A103':'Prafull', 'A104': 'Karan'}
 96 | student
 97 | student['A103']
 98 | print('Name of rollno A103 is ' +student['A103'])
 99 | del student['A104']
100 | student
101 | len(student)
102 | 
103 | #for rollno, name in student.items():
104 |     #print('name of {} is {} '.format(rollno, name) )
105 | 
106 | #Lets test Mutation: 
107 | #adding a value
108 | student['A104'] = 'Hitesh'
109 | student
110 | 
111 | #Set
112 | Anubhav = {1,2,3,4,5}
113 | Anubhav
114 | Aman_1 = set()
115 | Aman_1
116 | 
117 | #Sets are unordered collections of objects; ( [ , ])
118 | teamA = set(['india','england','australia','sri lanka','ireland'])
119 | teamA
120 | teamB = set(['pakistan', 'south africa','bangladesh','ireland'])
121 | teamB
122 | 
123 | #Checking whether a data value exists in a set or not.
124 | 'india' in teamA
125 | 'india' in teamB
126 | 
127 | #Adding values in a set
128 | teamA.add('China')
129 | teamA  #puts in order
130 | teamA.add('india')
131 | teamA  #no duplicates
132 | teamA.remove('australia')
133 | teamA
134 | 
135 | #Create dataframe :
136 | import pandas as pd
137 |  
138 | #Create a DataFrame
139 | d = {'Name':['Alisa','Bobby','Cathrine','Alisa','Bobby','Cathrine',
140 |             'Alisa','Bobby','Cathrine','Alisa','Bobby','Cathrine'],
141 |             'Exam':['Semester 1','Semester 1','Semester 1','Semester 1','Semester 1','Semester 1',
142 |                     'Semester 2','Semester 2','Semester 2','Semester 2','Semester 2','Semester 2'],
143 |                     'Subject':['Mathematics','Mathematics','Mathematics','Science','Science','Science',
144 |                                'Mathematics','Mathematics','Mathematics','Science','Science','Science'],
145 |                                'Score':[62,47,55,74,31,77,85,63,42,67,89,81]}
146 | 
147 | d
148 | 
149 | df = pd.DataFrame(d,columns=['Name','Exam','Subject','Score'])
150 | df
151 | 
152 | #View a column of the dataframe in pandas:
153 | df['Name']
154 | 
155 | #View two columns of the dataframe in pandas:
156 | df[['Name','Score','Exam']]
157 | 
158 | #View first two rows of the dataframe in pandas:
159 | df[0:2]
160 | 
161 | 
162 | 
163 | 
164 | 
165 | 


--------------------------------------------------------------------------------
/19.MarketBasketAnalysis_AprioriAlgo.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Tue Oct 26 14:46:14 2021
  4 | 
  5 | @author: Admin
  6 | """
  7 | #Import Libraries----------
  8 | import numpy as np 
  9 | import pandas as pd 
 10 | from mlxtend.frequent_patterns import apriori, association_rules 
 11 | 
 12 | #Loading and exploring the data-----------------
 13 | #Loading the Data 
 14 | data = pd.read_excel('Online_Retail_Store.xlsx') 
 15 | data.head() 
 16 | data.info()
 17 | # Exploring the columns of the data 
 18 | data.columns 
 19 | # Exploring the different regions of transactions 
 20 | data.Country.unique() 
 21 | 
 22 | #Cleaning the Data-----------------
 23 | #Identifying missing values:
 24 | '''Is there any missing values across columns'''
 25 | data.isnull().any()
 26 | 
 27 | '''How many missing values are there across each column'''
 28 | data.isnull().sum()
 29 | 
 30 | # Dropping the rows without any invoice number 
 31 | data.dropna(axis = 0, subset =['InvoiceNo'], inplace = True) 
 32 | data.isnull().sum()
 33 | 
 34 | # Dropping all transactions which were done on credit ('C')
 35 | data.info() 
 36 | data = data[~data['InvoiceNo'].str.contains('C')]
 37 | #For the above cmd to work, we need to ensure that we convert Column "Invoinve No." to string form.
 38 | data['InvoiceNo'] = data['InvoiceNo'].astype('str') 
 39 | data = data[~data['InvoiceNo'].str.contains('C')] 
 40 | #Hence, now we have been able to remove the rows with credit (C) type billing.
 41 | 
 42 | # Stripping extra spaces in the description 
 43 | data['Description'] = data['Description'].str.strip()
 44 | 
 45 | #Splitting the data according to the region of transaction-------
 46 | # Transactions done in France 
 47 | basket_France = (data[data['Country'] =="France"]
 48 |                  .groupby(['InvoiceNo', 'Description'])['Quantity']
 49 |                  .sum().unstack().reset_index()
 50 |                  .fillna(0)
 51 |                  .set_index('InvoiceNo')) 
 52 | 
 53 | # Transactions done in the United Kingdom 
 54 | basket_UK = (data[data['Country'] =="United Kingdom"] 
 55 | 		.groupby(['InvoiceNo', 'Description'])['Quantity'] 
 56 | 		.sum().unstack().reset_index().fillna(0) 
 57 | 		.set_index('InvoiceNo')) 
 58 | 
 59 | # Transactions done in Portugal 
 60 | basket_Por = (data[data['Country'] =="Portugal"] 
 61 | 		.groupby(['InvoiceNo', 'Description'])['Quantity'] 
 62 | 		.sum().unstack().reset_index().fillna(0) 
 63 | 		.set_index('InvoiceNo')) 
 64 | 
 65 | basket_Sweden = (data[data['Country'] =="Sweden"] 
 66 | 		.groupby(['InvoiceNo', 'Description'])['Quantity'] 
 67 | 		.sum().unstack().reset_index().fillna(0) 
 68 | 		.set_index('InvoiceNo')) 
 69 | 
 70 | #Hot encoding the Data------------
 71 | # Defining the hot encoding function to make the data suitable 
 72 | # for the concerned libraries 
 73 | def hot_encode(x): 
 74 | 	if(x<= 0): 
 75 | 		return 0
 76 | 	if(x>= 1): 
 77 | 		return 1
 78 | 
 79 | # Encoding the datasets 
 80 | basket_encoded = basket_France.applymap(hot_encode) 
 81 | basket_France = basket_encoded 
 82 | 
 83 | basket_encoded = basket_UK.applymap(hot_encode) 
 84 | basket_UK = basket_encoded 
 85 | 
 86 | basket_encoded = basket_Por.applymap(hot_encode) 
 87 | basket_Por = basket_encoded 
 88 | 
 89 | basket_encoded = basket_Sweden.applymap(hot_encode) 
 90 | basket_Sweden = basket_encoded 
 91 | 
 92 | #Building the models and analyzing the results-----------------
 93 | 
 94 | #France:
 95 | # Building the model 
 96 | frq_items = apriori(basket_France, min_support = 0.15, use_colnames = True) 
 97 | frq_items
 98 | 
 99 | # Collecting the inferred rules in a dataframe 
100 | rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 
101 | print(rules.head()) 
102 | France_rules=pd.DataFrame(rules)
103 | 
104 | #Portugal
105 | frq_items = apriori(basket_Por, min_support = 0.15, use_colnames = True) 
106 | rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 
107 | print(rules.head()) 
108 | Portugal_rules=pd.DataFrame(rules)
109 | 
110 | #Sweden
111 | frq_items = apriori(basket_Sweden, min_support = 0.10, use_colnames = True) 
112 | rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 
113 | print(rules.head()) 
114 | Sweden_rules=pd.DataFrame(rules)
115 | 
116 | #UK
117 | frq_items = apriori(basket_UK, min_support = 0.09, use_colnames = True) 
118 | rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 
119 | print(rules.head()) 
120 | UK_rules=pd.DataFrame(rules)
121 | 
122 | #Here Empty DataFrame signifies that none of the Rules in UK satisfy the levels mentioned for 
123 | #Support & Lift in above freq items sets


--------------------------------------------------------------------------------
/13.multiple_linear_regression_BackwardElimination.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Tue Sep 21 18:49:36 2021
  4 | 
  5 | @author: Admin
  6 | """
  7 | # Multiple Linear Regression
  8 | 
  9 | # Importing the libraries
 10 | 
 11 | 'import matplotlib.pyplot as plt'
 12 | import pandas as pd
 13 | 
 14 | # Importing the dataset
 15 | dataset = pd.read_csv('D:\SkillEdge\Python\Final\Codes\pyData/50_Startups.csv')
 16 | 
 17 | #Method-1 (Handling Categorical Variables)
 18 | pd.get_dummies(dataset["State"])
 19 | pd.get_dummies(dataset["State"],drop_first=True)
 20 | S_Dummy = pd.get_dummies(dataset["State"],drop_first=True)
 21 | S_Dummy.head(5)
 22 | #Now, lets concatenate these dummy var columns in our dataset.
 23 | dataset = pd.concat([dataset,S_Dummy],axis=1)
 24 | dataset.head(5)
 25 | #dropping the columns whose dummy var have been created
 26 | dataset.drop(["State",],axis=1,inplace=True)
 27 | dataset.head(5)
 28 | #------------------------------------------------------------------------------
 29 | 
 30 | #Obtaining DV & IV from the dataset
 31 | X = dataset.iloc[:,[0,1,2,4,5]].values
 32 | y = dataset.iloc[:,3].values
 33 | 
 34 | # Splitting the dataset into the Training set and Test set
 35 | from sklearn.model_selection import train_test_split
 36 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
 37 | 
 38 | 
 39 | # Fitting Multiple Linear Regression to the Training set
 40 | from sklearn.linear_model import LinearRegression
 41 | regressor = LinearRegression()
 42 | regressor.fit(X_train, y_train)
 43 | 
 44 | # Predicting the Test set results
 45 | y_pred = regressor.predict(X_test)
 46 | 
 47 | # Accuracy of the model
 48 | 
 49 | #Calculating the r squared value:
 50 | from sklearn.metrics import r2_score
 51 | r2_score(y_test,y_pred)
 52 | 
 53 | #Coefficient
 54 | regressor.coef_
 55 | 
 56 | # Intercept
 57 | regressor.intercept_
 58 | 
 59 | #The above score tells that our model is 93% accurate with the test dataset.
 60 | 
 61 | #--------------------------Backward Elimination--------------------------------
 62 | #Backward elimination is a feature selection technique while building a machine learning model. It is used
 63 | #to remove those features that do not have significant effect on dependent variable or prediction of output.
 64 | 
 65 | #Step: 1- Preparation of Backward Elimination:
 66 | 
 67 | #Importing the library:
 68 | import statsmodels.api as sm
 69 | 
 70 | #Adding a column in matrix of features:
 71 | import numpy as nm
 72 | X = nm.append(arr = nm.ones((50,1)).astype(int), values=X, axis=1)
 73 | 
 74 | #Applying backward elimination process now
 75 | #Firstly we will create a new feature vector x_opt, which will only contain a set of 
 76 | #independent features that are significantly affecting the dependent variable.
 77 | x_opt=X[:, [ 0,1,2,3,4,5]]
 78 | 
 79 | #for fitting the model, we will create a regressor_OLS object of new class OLS of 
 80 | #statsmodels library. Then we will fit it by using the fit() method.
 81 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
 82 | 
 83 | #We will use summary() method to get the summary table of all the variables.
 84 | regressor_OLS.summary()
 85 | 
 86 | #In the above summary table, we can clearly see the p-values of all the variables. 
 87 | #Here x1, x2 are dummy variables, x3 is R&D spend, x4 is Administration spend, and x5 is Marketing spend.
 88 | 
 89 | #Now since x5 has highest p-value greater than 0.05, hence, will remove the x1 variable
 90 | #(dummy variable) from the table and will refit the model.
 91 | x_opt= X[:, [0,1,2,3,4]]
 92 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
 93 | regressor_OLS.summary()
 94 | 
 95 | #Now since x4 has highest p-value greater than 0.05, hence, will remove the x4 variable
 96 | #(dummy variable) from the table and will refit the model.
 97 | x_opt= X[:, [0,1,2,3]]
 98 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
 99 | regressor_OLS.summary()
100 | 
101 | #Now we will remove the Admin spend (x2) which is having .602 p-value and
102 | # again refit the model.
103 | x_opt= X[:, [0,1,3]]
104 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
105 | regressor_OLS.summary()
106 | 
107 | #Finally, we will remove one more variable, which has .60 p-value for marketing spend,
108 | #that is more than significant level value of 0.05
109 | x_opt= X[:, [0,1]]
110 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
111 | regressor_OLS.summary()
112 | 
113 | #Hence,only  R&D independent variable is a significant variable for the prediction. 
114 | #So we can now predict efficiently using this variable.
115 | 
116 | #----------Building Multiple Regression model by only using R&D spend:-----------------
117 | #importing datasets  
118 | data_set= pd.read_csv('F:/WORK/pyWork/pyData/50_Startups.csv') 
119 | #Extracting Independent and dependent Variable  
120 | x_BE= data_set.iloc[:,:-4].values
121 | y_BE= data_set.iloc[:,4].values 
122 | # Splitting the dataset into training and test set.  
123 | from sklearn.model_selection import train_test_split
124 | x_BE_train, x_BE_test, y_BE_train, y_BE_test= train_test_split(x_BE, y_BE, test_size= 0.2, random_state=0)
125 | 
126 | #Fitting the MLR model to the training set:  
127 | from sklearn.linear_model import LinearRegression
128 | regressor= LinearRegression()
129 | regressor.fit(x_BE_train, y_BE_train)
130 | 
131 | #Predicting the Test set result;
132 | y_pred= regressor.predict(x_BE_test)
133 | 
134 | #Cheking the score  
135 | #Calculating the r squared value:
136 | from sklearn.metrics import r2_score
137 | r2_score(y_BE_test,y_pred)
138 | #The above score tells that our model is now more accurate with the test dataset with
139 | #accuracy equal to 95%
140 | 
141 | #Calculating the coefficients:
142 | print(regressor.coef_)
143 | 
144 | #Calculating the intercept:
145 | print(regressor.intercept_)
146 | 
147 | #Regression Eq'n: Profit = 48416 + 0.85*R&D_Spend
148 | © 2021 GitHub, Inc.
149 | 


--------------------------------------------------------------------------------
/10.Graphs.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Sep  5 17:32:15 2021
  4 | 
  5 | @author: Admin
  6 | """
  7 | import matplotlib.pyplot as plt
  8 | #-----------------------------------GRAPHS---------------------------------
  9 | 
 10 | #--------------------------Bar Chart------------------------------------------
 11 | #Vertical Bar Chart
 12 | import numpy as np
 13 |  
 14 | city=['Delhi','Beijing','Washington','Tokyo','Moscow']
 15 | Happiness_Index=[60,40,70,65,85]
 16 |  
 17 | plt.bar(city,Happiness_Index,color='pink',edgecolor='red')
 18 | plt.xlabel('City', fontsize=16)
 19 | plt.ylabel('Happiness_Index', fontsize=16)
 20 | plt.title('Barchart - Happiness index across cities',fontsize=20)
 21 | 
 22 | #Horizontal Bar Chart
 23 |  
 24 | city=['Delhi','Beijing','Washington','Tokyo','Moscow']
 25 | Happiness_Index=[60,40,70,65,85]
 26 |  
 27 | plt.barh(city,Happiness_Index,color='blue',edgecolor='black')
 28 | plt.xlabel('Happiness_Index', fontsize=16)
 29 | plt.ylabel('City', fontsize=16)
 30 | plt.title('Horizontal Barchart - Happiness index across cities',fontsize=20)
 31 | 
 32 | #Stacked Bar Chart in Python with legends:
 33 |  
 34 | city=['Delhi','Beijing','Washington','Tokyo','Moscow']
 35 | Gender=['Male','Female']
 36 | Happiness_Index_Male=[60,40,70,65,85]
 37 | Happiness_Index_Female=[30,60,70,55,75]
 38 |  
 39 | plt.bar(city,Happiness_Index_Male,color='blue',edgecolor='black')
 40 | plt.bar(city,Happiness_Index_Female,color='pink',edgecolor='black',bottom=Happiness_Index_Male)
 41 | #bar() function plots the Happiness_Index_Female on top of Happiness_Index_Male with the help of 
 42 | #argument  bottom=Happiness_Index_Male.
 43 | plt.xlabel('City', fontsize=16)
 44 | plt.ylabel('Happiness_Index', fontsize=16)
 45 | plt.title('Stacked Barchart - Happiness index across cities',fontsize=18)
 46 | plt.legend(Gender,loc=2)
 47 | 
 48 | #--------------------------Histogram-------------------------------------------
 49 | #Histogram with no Fills:
 50 | 
 51 | values = [82,76,24,40,67,62,75,78,71,32,98,89,78,67,72,82,87,66,56,52]
 52 | plt.hist(values,5, histtype='step', align='mid', color='green', label='Test Score Data')
 53 | #Here, second argument is the number of bins, 
 54 | #histype=’step’: it plots the histogram in step,
 55 | #format, aligned to mid, color chosen is green.
 56 | plt.legend(loc=2)
 57 | #argument loc=2 plots the legend on the top left corner.
 58 | plt.title('Histogram of score')
 59 | 
 60 | #Histogram with bar Filled:
 61 |  
 62 | values = [82,76,24,40,67,62,75,78,71,32,98,89,78,67,72,82,87,66,56,52]
 63 | plt.hist(values,10, histtype='bar', color='cyan', label='Test score Data',edgecolor='black')
 64 | #Argument histype=’bar’ plots the histogram in bar filled format.
 65 | plt.legend()
 66 | plt.title('Histogram of score')
 67 | 
 68 | #----------------------------Box Plot------------------------------------------
 69 | #Box Plot
 70 | 
 71 | value1=[82,76,24,40,67,62,75,78,71,32,98,89,78,67,72,82,87,66,56,52]
 72 | value2=[62,5,91,25,36,32,96,95,3,90,95,32,27,55,100,15,71,11,37,21]
 73 | value3=[23,89,12,78,72,89,25,69,68,86,19,49,15,16,16,75,65,31,25,52]
 74 | value4=[59,73,70,16,81,61,88,98,10,87,29,72,16,23,72,88,78,99,75,30]
 75 | 
 76 | box_plot_data=[value1,value2,value3,value4]
 77 | plt.boxplot(box_plot_data)
 78 | 
 79 | #Box plot with fills and labels:
 80 | 
 81 | value1 = [82,76,24,40,67,62,75,78,71,32,98,89,78,67,72,82,87,66,56,52]
 82 | value2=[62,5,91,25,36,32,96,95,3,90,95,32,27,55,100,15,71,11,37,21]
 83 | value3=[23,89,12,78,72,89,25,69,68,86,19,49,15,16,16,75,65,31,25,52]
 84 | value4=[59,73,70,16,81,61,88,98,10,87,29,72,16,23,72,88,78,99,75,30]
 85 | 
 86 | box_plot_data=[value1,value2,value3,value4]
 87 | plt.boxplot(box_plot_data,patch_artist=True,labels=['course1','course2','course3','course4'])
 88 | #argument "patch_artist=True", fills the boxplot and argument "label" takes label to be plotted.
 89 | 
 90 | #Horizontal box plot in python with different colors:
 91 | 
 92 | value1 = [82,76,24,40,67,62,75,78,71,32,98,89,78,67,72,82,87,66,56,52]
 93 | value2=[62,5,91,25,36,32,96,95,3,90,95,32,27,55,100,15,71,11,37,21]
 94 | value3=[23,89,12,78,72,89,25,69,68,86,19,49,15,16,16,75,65,31,25,52]
 95 | value4=[59,73,70,16,81,61,88,98,10,87,29,72,16,23,72,88,78,99,75,30]
 96 | 
 97 | box_plot_data=[value1,value2,value3,value4]
 98 | box=plt.boxplot(box_plot_data,vert=0,patch_artist=True,
 99 |                 labels=['course1','course2','course3','course4'],)
100 | #Adding argument vert =0 plots the horizontal box plot.
101 | colors = ['cyan', 'lightblue', 'lightgreen', 'tan']
102 | for patch, color in zip(box['boxes'], colors):
103 |     patch.set_facecolor(color)
104 | #Colors array takes four different colors and passes them to four different boxes of the boxplot
105 | #with patch.set_facecolor() function.
106 | #-------------------Line plot or Line chart --------------------
107 | 
108 | values = [1, 5, 8, 9, 7, 11, 8, 12, 14, 9]
109 | plt.plot(values)
110 | 
111 | 
112 | #Multiple Line charts with legends and Labels:
113 | #lets take an example of sale of units in 2016 and 2017 to demonstrate line charts.
114 | 
115 | sales1 = [1, 5, 8, 9, 7, 11, 8, 12, 14, 9, 5]
116 | sales2 = [3, 7, 9, 6, 4, 5, 14, 7, 6, 16, 12]
117 | line_chart1 = plt.plot( sales1,range(1,12))
118 | line_chart2 = plt.plot( sales2,range(1,12))
119 | plt.title('Monthly sales of 2016 and 2017')
120 | plt.xlabel('Sales')
121 | plt.ylabel('Month')
122 | plt.legend(['year 2016', 'year 2017'], loc=4)
123 | 
124 | 
125 | #Charts with different line styles:
126 | 
127 | sales1 = [1, 5, 8, 9, 7, 11, 8, 12, 14, 9, 5]
128 | sales2 = [3, 7, 9, 6, 4, 5, 14, 7, 6, 16, 12]
129 | line_chart1 = plt.plot(range(1,12), sales1,'--')
130 | line_chart2 = plt.plot(range(1,12), sales2,':')
131 | plt.title('Monthly sales of 2016 and 2017')
132 | 
133 | 
134 | #---------------------Pie Chart--------------------------------------------
135 | #Pie chart in Python with legends:
136 | 
137 | values = [60, 80, 90, 55, 10, 30]
138 | Col = ['b', 'g', 'r', 'c', 'm', 'y']
139 | labels = ['US', 'UK', 'India', 'Germany', 'Australia', 'South Korea']
140 | Exp = (0.5, 0, 0, 0, 0, 0)
141 | plt.pie(values, colors=Col, labels= values,explode=Exp,counterclock=False, shadow=True)
142 | plt.title('Population Density Index')
143 | plt.legend(labels,loc=3)
144 | 
145 | #Pie chart in Python with percentage values:
146 | 
147 | values = [60, 80, 90, 55, 10, 30]
148 | colors = ['b', 'g', 'r', 'c', 'm', 'y']
149 | labels = ['US', 'UK', 'India', 'Germany', 'Australia', 'South Korea']
150 | explode = (0.2, 0, 0, 0, 0, 0)
151 | plt.pie(values, colors=colors, labels=labels,
152 | explode=explode, autopct='%1.1f%%', shadow=True)
153 | plt.title('Population Density Index')
154 | 
155 | #-------------------------------Scatter Plot----------------------------------
156 | # Scatter plot in Python:
157 | 
158 | weight1=[63.3,57,64.3,63,71,61.8,62.9,65.6,64.8,63.1,68.3,69.7,65.4,66.3,60.7]
159 | height1=[156.3,100.7,114.8,156.3,237.1,123.9,151.8,164.7,105.4,136.1,175.2,137.4,164.2,151,124.3]
160 | plt.scatter(weight1,height1,c='r',marker='*')
161 | plt.xlabel('weight', fontsize=16)
162 | plt.ylabel('height', fontsize=16)
163 | plt.title('scatter plot - height vs weight',fontsize=20)
164 | 
165 | #Scatter plot for three different groups
166 |  
167 | weight1=[57,58.2,58.6,59.6,59.8,60.2,60.5,60.6,60.7,61.3,61.3,61.4,61.8,61.9,62.3]
168 | height1=[100.7,195.6,94.3,127.1,111.7,159.7,135,149.9,124.3,112.9,176.7,110.2,123.9,161.9,107.8]
169 |  
170 | weight2=[62.9,63,63.1,63.2,63.3,63.4,63.4,63.4,63.5,63.6,63.7,64.1,64.3,64.3,64.7,64.8,65]
171 | height2=[151.8,156.3,136.1,124.2,156.3,130,181.2,255.9,163.1,123.1,119.5,179.9,114.8,174.1,108.8,105.4,141.4]
172 |  
173 |  
174 | weight3=[69.2,69.2,69.4,69.7,70,70.3,70.8,71,71.1,71.7,71.9,72.4,73,73.1,76.2]
175 | height3=[166.8,172.9,193.8,137.4,162.4,137.1,169.1,237.1,189.1,179.3,174.8,213.3,198,191.1,220.6]
176 |  
177 | import numpy as np
178 | weight=np.concatenate((weight1,weight2,weight3))
179 | height=np.concatenate((height1,height2,height3))
180 |  
181 | color_array = ['b'] * 15 + ['g'] * 17 + ['r'] * 15
182 |  
183 | plt.scatter(weight, height, marker='*', c=color_array)
184 | 
185 | plt.xlabel('weight', fontsize=16)
186 | plt.ylabel('height', fontsize=16)
187 | plt.title('grouped scatter plot - height vs weight',fontsize=20)
188 | 


--------------------------------------------------------------------------------
/14.logistic_regression.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Sep 30 19:49:04 2021
  4 | 
  5 | @author: Admin
  6 | """
  7 | # Logistic Regression
  8 | 
  9 | #-------------Logistic Regression------------------------------
 10 | #Import Libraries
 11 | import pandas as pd
 12 | import seaborn as sns
 13 | import matplotlib.pyplot as plt
 14 | 
 15 | 
 16 | #Import data 
 17 | titanic_data = pd.read_csv("D:\SkillEdge\Python\Final\Codes\pyData/titanic.csv")
 18 | titanic_data.head(5)
 19 | titanic_data.tail(5)
 20 | 
 21 | print("No. of passengers in original dataset:" +str(len(titanic_data.index)))
 22 |       
 23 | #Analyzing Data
 24 | sns.countplot(x="survived",data=titanic_data)
 25 | 
 26 | sns.countplot(x="survived",hue="sex",data=titanic_data)
 27 | 
 28 | sns.countplot(x="survived",hue="pclass",data=titanic_data)
 29 | 
 30 | #CHECKING DATA TYPE OF A VARIABLE AND CONVERTING IT INTO ANOTHER TYPE-----
 31 | titanic_data.info()
 32 | titanic_data["age"].plot.hist()
 33 | plt.hist(titanic_data["age"])
 34 | 
 35 | 
 36 | #Converting var "age" from object type to float type
 37 | titanic_data["age"] = pd.to_numeric(titanic_data.age, errors='coerce')
 38 | titanic_data.info()
 39 | #Parameter: errors = 'coerce' in above fxn, replaces missing values (like "?") if any
 40 | #in "age" column by "nan" values.
 41 | 
 42 | titanic_data["age"].plot.hist()
 43 | 
 44 | #Converting var "fare" from object type to float type
 45 | titanic_data["fare"] = pd.to_numeric(titanic_data.fare, errors='coerce')
 46 | titanic_data.info()
 47 | #Parameter: errors = 'coerce' in above fxn, replaces missing values (like "?") if any
 48 | #in "fare" column by "nan" values.
 49 | 
 50 | titanic_data["fare"].plot.hist()
 51 | 
 52 | #Identifying/Finding missing values if any----
 53 | titanic_data.isnull()
 54 | titanic_data.isnull().sum()
 55 | 
 56 | sns.heatmap(titanic_data.isnull(),yticklabels=False, cmap="viridis")
 57 | 
 58 | #Note: 
 59 | #Since missing values in "fare" are quite less, we can delete such rows.
 60 | #Since missing values in "age" are high, its better we do imputation in it.
 61 | 
 62 | sns.boxplot(x="age",data=titanic_data)
 63 | sns.boxplot(x="fare",data=titanic_data)
 64 | 
 65 | #By boxplot we observe that the no. of outliers in "age" are quite less, hence,
 66 | #if we plan to do imputation in "age" we can do it by "mean" imputation.
 67 | 
 68 | #Handling Missing Values------------
 69 | titanic_data.head(5)
 70 | 
 71 | #Droping all the rows which have a missing value in column (Fare)
 72 | #Drop NaN in a specific column
 73 | titanic_data.dropna(subset=['fare'],inplace=True)
 74 | sns.heatmap(titanic_data.isnull(),yticklabels=False)
 75 | 
 76 | #Imputing missing values in column (Age) with mean imputation
 77 | titanic_data["age"].fillna(titanic_data["age"].mean(), inplace=True)
 78 | sns.heatmap(titanic_data.isnull(),yticklabels=False)
 79 | 
 80 | #Hence, we do not have any missing values in the dataset now.
 81 | titanic_data.isnull().sum()
 82 | 
 83 | #Note:
 84 | #A Heat map is usually drawn for either continuous of categorical var
 85 | #Lets take few cont var columns and draw the heat map
 86 | #Cont = titanic_data[:,[5,6,7]]
 87 | #sns.heatmap(Cont)
 88 | 
 89 | #There are lot of string value var in dataset which have to be converted to numerical
 90 | #values for applying machine learing algoritm. Hence, we will now convert string var 
 91 | #to numerical var.
 92 | titanic_data.info()
 93 | pd.get_dummies(titanic_data["sex"])
 94 | 
 95 | pd.get_dummies(titanic_data["sex"],drop_first=True)
 96 | 
 97 | Sex_Dummy = pd.get_dummies(titanic_data["sex"],drop_first=True)
 98 | Sex_Dummy.head(5)
 99 | 
100 | pd.get_dummies(titanic_data["embarked"])
101 | Embardked_Dummy = pd.get_dummies(titanic_data["embarked"],drop_first=True)
102 | Embardked_Dummy.head(5)
103 | 
104 | pd.get_dummies(titanic_data["pclass"])
105 | PClass_Dummy = pd.get_dummies(titanic_data["pclass"],drop_first=True)
106 | PClass_Dummy.head(5)
107 | 
108 | #Now, lets concatenate these dummy var columns in our dataset.
109 | titanic_data = pd.concat([titanic_data,Sex_Dummy,PClass_Dummy,Embardked_Dummy],axis=1)
110 | titanic_data.head(5)
111 | 
112 | #dropping the columns whose dummy var have been created
113 | titanic_data.drop(["sex","embarked","pclass","Passenger_id","name","ticket"],axis=1,inplace=True)
114 | titanic_data.head(5)
115 | 
116 | #Splitting the dataset into Train & Test dataset
117 | x=titanic_data.drop("survived",axis=1)
118 | y=titanic_data["survived"]
119 | 
120 | from sklearn.model_selection import train_test_split
121 | X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 0)
122 | 
123 | # Fitting Logistic Regression to the Training set
124 | from sklearn.linear_model import LogisticRegression
125 | help(LogisticRegression())
126 | logmodel = LogisticRegression(solver='liblinear') #It is the default solver for Scikit-learn versions earlier than 0.22.0.
127 | logmodel.fit(X_train, y_train)
128 | 
129 | predictions = logmodel.predict(X_test)
130 | 
131 | from sklearn.metrics import confusion_matrix
132 | confusion_matrix(y_test,predictions)
133 | 
134 | confusion_matrix(predictions,y_test)
135 | 
136 | #Hence, accuracy = (165+84)\(165+84+30+44) = 77.5%
137 | 
138 | #Calculating the coefficients:
139 | print(logmodel.coef_)
140 | 
141 | #Calculating the intercept:
142 | print(logmodel.intercept_)
143 | 
144 | #----To Improve the accuracy of the model, lets go with Backward ELimination Method &
145 | # rebuild the logisitc model again with few independent variables--------
146 | titanic_data_1 = titanic_data
147 | titanic_data_1.head(5)
148 | 
149 | #--------------------------Backward Elimination--------------------------------
150 | #Backward elimination is a feature selection technique while building a machine learning model. It is used
151 | #to remove those features that do not have significant effect on dependent variable or prediction of output.
152 | 
153 | #Step: 1- Preation of Backward Elimination:
154 | #Importing the library:
155 | import statsmodels.api as sm
156 | 
157 | #Adding a column in matrix of features:
158 | x1=titanic_data_1.drop("survived",axis=1)
159 | y1=titanic_data_1["survived"]
160 | import numpy as nm
161 | x1 = nm.append(arr = nm.ones((1291,1)).astype(int), values=x1, axis=1)
162 | 
163 | #Applying backward elimination process now
164 | #Firstly we will create a new feature vector x_opt, which will only contain a set of 
165 | #independent features that are significantly affecting the dependent variable.
166 | x_opt= x1[:, [0,1,2,3,4,5,6,7,8,9,10]]
167 | 
168 | #for fitting the model, we will create a regressor_OLS object of new class OLS of statsmodels library. 
169 | #Then we will fit it by using the fit() method.
170 | regressor_OLS=sm.OLS(endog = y1, exog=x_opt).fit()
171 | 
172 | #We will use summary() method to get the summary table of all the variables.
173 | regressor_OLS.summary()
174 | 
175 | #In the above summary table, we can clearly see the p-values of all the variables. 
176 | #And remove the ind var with p-value greater than 0.05
177 | x_opt= x1[:, [0,1,2,4,5,6,7,8,9,10]]
178 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
179 | regressor_OLS.summary()
180 | 
181 | x_opt= x1[:, [0,1,2,4,5,6,7,9,10]]
182 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
183 | regressor_OLS.summary()
184 | 
185 | x_opt= x1[:, [0,1,2,5,6,7,9,10]]
186 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
187 | regressor_OLS.summary()
188 | 
189 | x_opt= x1[:, [0,1,2,5,6,7,10]]
190 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
191 | regressor_OLS.summary()
192 | #Hence,independent var - age, sibsp, sex, pclass & embarked are significant variable 
193 | #for the predicting the value of Dependent Var "survived".
194 | #So we can now predict efficiently using these variables.
195 | 
196 | #-------Building Logistic Regression model using ind var: age, sibsip, sex, pclass & embarked--------  
197 | # Splitting the dataset into training and test set.  
198 | from sklearn.model_selection import train_test_split
199 | x_BE_train, x_BE_test, y_BE_train, y_BE_test= train_test_split(x_opt, y1, test_size= 0.25, random_state=0)
200 | 
201 | # Fitting Logistic Regression to the Training set
202 | from sklearn.linear_model import LogisticRegression
203 | logmodel = LogisticRegression(solver='liblinear')
204 | logmodel.fit(x_BE_train, y_BE_train)
205 | 
206 | predictions = logmodel.predict(x_BE_test)
207 | 
208 | from sklearn.metrics import confusion_matrix
209 | confusion_matrix(y_BE_test,predictions)
210 | 
211 | #Accuracy = (170+87)/(170+87+25+41) = 80%
212 | 
213 | #Calculating the coefficients:
214 | print(logmodel.coef_)
215 | 
216 | #Calculating the intercept:
217 | print(logmodel.intercept_)
218 | 
219 | #So, ur final Predicitve Modelling Equation becomes:
220 | #Survived = 
221 | #exp(3.74 -0.03*age -0.27*sibsp -2.52*sex(male) -1.03*pclass(2) -2.1*pclass(3) -0.33*embd(S))
222 | # \
223 | #exp(3.74 -0.03*age -0.27*sibsp -2.52*sex(male) -1.03*pclass(2) -2.1*pclass(3) -0.33*embd(S)) + 1
224 | 


--------------------------------------------------------------------------------
/31.Reading Files into Python.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"2. Reading Files into Python.ipynb","provenance":[],"authorship_tag":"ABX9TyM/mIl9ka9uA5pMkM+Ll0AK"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"1FzYccB24irQ","colab_type":"text"},"source":["## Reading Files into Python"]},{"cell_type":"code","metadata":{"id":"zDWO5w4jIiWL","colab_type":"code","colab":{}},"source":["# importing libraries\n","import pandas as pd"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"LFxGyLx3L5IS","colab_type":"code","colab":{}},"source":["#importing data\n","data = pd.read_csv('churn_prediction.csv')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"qPcIwT47NQX6","colab_type":"code","outputId":"dd64ac4c-6cd5-4f12-c536-2336c75475b9","executionInfo":{"status":"ok","timestamp":1580377658648,"user_tz":-330,"elapsed":1247,"user":{"displayName":"Sharoon Saxena","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64","userId":"01167841530696023488"}},"colab":{"base_uri":"https://localhost:8080/","height":224}},"source":["#first 5 instances using \"head()\" function\n","data.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>customer_id</th>\n","      <th>vintage</th>\n","      <th>age</th>\n","      <th>gender</th>\n","      <th>dependents</th>\n","      <th>occupation</th>\n","      <th>city</th>\n","      <th>customer_nw_category</th>\n","      <th>branch_code</th>\n","      <th>current_balance</th>\n","      <th>previous_month_end_balance</th>\n","      <th>average_monthly_balance_prevQ</th>\n","      <th>average_monthly_balance_prevQ2</th>\n","      <th>current_month_credit</th>\n","      <th>previous_month_credit</th>\n","      <th>current_month_debit</th>\n","      <th>previous_month_debit</th>\n","      <th>current_month_balance</th>\n","      <th>previous_month_balance</th>\n","      <th>churn</th>\n","      <th>last_transaction</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>1</td>\n","      <td>3135</td>\n","      <td>66</td>\n","      <td>Male</td>\n","      <td>0.0</td>\n","      <td>self_employed</td>\n","      <td>187.0</td>\n","      <td>2</td>\n","      <td>755</td>\n","      <td>1458.71</td>\n","      <td>1458.71</td>\n","      <td>1458.71</td>\n","      <td>1449.07</td>\n","      <td>0.20</td>\n","      <td>0.20</td>\n","      <td>0.20</td>\n","      <td>0.20</td>\n","      <td>1458.71</td>\n","      <td>1458.71</td>\n","      <td>0</td>\n","      <td>2019-05-21</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>2</td>\n","      <td>310</td>\n","      <td>35</td>\n","      <td>Male</td>\n","      <td>0.0</td>\n","      <td>self_employed</td>\n","      <td>NaN</td>\n","      <td>2</td>\n","      <td>3214</td>\n","      <td>5390.37</td>\n","      <td>8704.66</td>\n","      <td>7799.26</td>\n","      <td>12419.41</td>\n","      <td>0.56</td>\n","      <td>0.56</td>\n","      <td>5486.27</td>\n","      <td>100.56</td>\n","      <td>6496.78</td>\n","      <td>8787.61</td>\n","      <td>0</td>\n","      <td>2019-11-01</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>4</td>\n","      <td>2356</td>\n","      <td>31</td>\n","      <td>Male</td>\n","      <td>0.0</td>\n","      <td>salaried</td>\n","      <td>146.0</td>\n","      <td>2</td>\n","      <td>41</td>\n","      <td>3913.16</td>\n","      <td>5815.29</td>\n","      <td>4910.17</td>\n","      <td>2815.94</td>\n","      <td>0.61</td>\n","      <td>0.61</td>\n","      <td>6046.73</td>\n","      <td>259.23</td>\n","      <td>5006.28</td>\n","      <td>5070.14</td>\n","      <td>0</td>\n","      <td>NaT</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>5</td>\n","      <td>478</td>\n","      <td>90</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>self_employed</td>\n","      <td>1020.0</td>\n","      <td>2</td>\n","      <td>582</td>\n","      <td>2291.91</td>\n","      <td>2291.91</td>\n","      <td>2084.54</td>\n","      <td>1006.54</td>\n","      <td>0.47</td>\n","      <td>0.47</td>\n","      <td>0.47</td>\n","      <td>2143.33</td>\n","      <td>2291.91</td>\n","      <td>1669.79</td>\n","      <td>1</td>\n","      <td>2019-08-06</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>6</td>\n","      <td>2531</td>\n","      <td>42</td>\n","      <td>Male</td>\n","      <td>2.0</td>\n","      <td>self_employed</td>\n","      <td>1494.0</td>\n","      <td>3</td>\n","      <td>388</td>\n","      <td>927.72</td>\n","      <td>1401.72</td>\n","      <td>1643.31</td>\n","      <td>1871.12</td>\n","      <td>0.33</td>\n","      <td>714.61</td>\n","      <td>588.62</td>\n","      <td>1538.06</td>\n","      <td>1157.15</td>\n","      <td>1677.16</td>\n","      <td>1</td>\n","      <td>2019-11-03</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   customer_id  vintage  age  ... previous_month_balance  churn last_transaction\n","0            1     3135   66  ...                1458.71      0       2019-05-21\n","1            2      310   35  ...                8787.61      0       2019-11-01\n","2            4     2356   31  ...                5070.14      0              NaT\n","3            5      478   90  ...                1669.79      1       2019-08-06\n","4            6     2531   42  ...                1677.16      1       2019-11-03\n","\n","[5 rows x 21 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"_G5CwMDl2B_8","colab_type":"code","outputId":"14298801-9bf5-4aeb-a582-cfb9b03192d5","executionInfo":{"status":"ok","timestamp":1580377658649,"user_tz":-330,"elapsed":1035,"user":{"displayName":"Sharoon Saxena","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64","userId":"01167841530696023488"}},"colab":{"base_uri":"https://localhost:8080/","height":224}},"source":["#last 5 instances using \"tail()\" function\n","data.tail()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>customer_id</th>\n","      <th>vintage</th>\n","      <th>age</th>\n","      <th>gender</th>\n","      <th>dependents</th>\n","      <th>occupation</th>\n","      <th>city</th>\n","      <th>customer_nw_category</th>\n","      <th>branch_code</th>\n","      <th>current_balance</th>\n","      <th>previous_month_end_balance</th>\n","      <th>average_monthly_balance_prevQ</th>\n","      <th>average_monthly_balance_prevQ2</th>\n","      <th>current_month_credit</th>\n","      <th>previous_month_credit</th>\n","      <th>current_month_debit</th>\n","      <th>previous_month_debit</th>\n","      <th>current_month_balance</th>\n","      <th>previous_month_balance</th>\n","      <th>churn</th>\n","      <th>last_transaction</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>28377</th>\n","      <td>30297</td>\n","      <td>1845</td>\n","      <td>10</td>\n","      <td>Female</td>\n","      <td>0.0</td>\n","      <td>student</td>\n","      <td>1020.0</td>\n","      <td>2</td>\n","      <td>1207</td>\n","      <td>1076.43</td>\n","      <td>1076.43</td>\n","      <td>2282.19</td>\n","      <td>2787.70</td>\n","      <td>0.30</td>\n","      <td>0.30</td>\n","      <td>0.30</td>\n","      <td>0.30</td>\n","      <td>1076.43</td>\n","      <td>1076.43</td>\n","      <td>0</td>\n","      <td>2019-10-22</td>\n","    </tr>\n","    <tr>\n","      <th>28378</th>\n","      <td>30298</td>\n","      <td>4919</td>\n","      <td>34</td>\n","      <td>Female</td>\n","      <td>0.0</td>\n","      <td>self_employed</td>\n","      <td>1046.0</td>\n","      <td>2</td>\n","      <td>223</td>\n","      <td>3844.10</td>\n","      <td>4069.21</td>\n","      <td>3668.83</td>\n","      <td>3865.55</td>\n","      <td>1.71</td>\n","      <td>2.29</td>\n","      <td>901.00</td>\n","      <td>1014.07</td>\n","      <td>3738.54</td>\n","      <td>3690.32</td>\n","      <td>0</td>\n","      <td>2019-12-17</td>\n","    </tr>\n","    <tr>\n","      <th>28379</th>\n","      <td>30299</td>\n","      <td>297</td>\n","      <td>47</td>\n","      <td>Male</td>\n","      <td>0.0</td>\n","      <td>salaried</td>\n","      <td>1096.0</td>\n","      <td>2</td>\n","      <td>588</td>\n","      <td>65511.97</td>\n","      <td>61017.55</td>\n","      <td>53444.81</td>\n","      <td>21925.81</td>\n","      <td>4666.84</td>\n","      <td>3883.06</td>\n","      <td>168.23</td>\n","      <td>71.80</td>\n","      <td>61078.50</td>\n","      <td>57564.24</td>\n","      <td>1</td>\n","      <td>2019-12-31</td>\n","    </tr>\n","    <tr>\n","      <th>28380</th>\n","      <td>30300</td>\n","      <td>2585</td>\n","      <td>50</td>\n","      <td>Male</td>\n","      <td>3.0</td>\n","      <td>self_employed</td>\n","      <td>1219.0</td>\n","      <td>3</td>\n","      <td>274</td>\n","      <td>1625.55</td>\n","      <td>1625.55</td>\n","      <td>1683.20</td>\n","      <td>1857.42</td>\n","      <td>0.20</td>\n","      <td>0.20</td>\n","      <td>0.20</td>\n","      <td>0.20</td>\n","      <td>1625.55</td>\n","      <td>1625.55</td>\n","      <td>0</td>\n","      <td>NaT</td>\n","    </tr>\n","    <tr>\n","      <th>28381</th>\n","      <td>30301</td>\n","      <td>2349</td>\n","      <td>18</td>\n","      <td>Male</td>\n","      <td>0.0</td>\n","      <td>student</td>\n","      <td>1232.0</td>\n","      <td>2</td>\n","      <td>474</td>\n","      <td>2107.05</td>\n","      <td>2821.34</td>\n","      <td>3213.44</td>\n","      <td>4447.45</td>\n","      <td>0.11</td>\n","      <td>7.44</td>\n","      <td>714.40</td>\n","      <td>1094.09</td>\n","      <td>2402.62</td>\n","      <td>3260.58</td>\n","      <td>1</td>\n","      <td>2019-11-02</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["       customer_id  vintage  ...  churn last_transaction\n","28377        30297     1845  ...      0       2019-10-22\n","28378        30298     4919  ...      0       2019-12-17\n","28379        30299      297  ...      1       2019-12-31\n","28380        30300     2585  ...      0              NaT\n","28381        30301     2349  ...      1       2019-11-02\n","\n","[5 rows x 21 columns]"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"code","metadata":{"id":"5Iy0lfDNNZ8U","colab_type":"code","outputId":"5e2a976c-90ad-4dc4-af21-e2d551a8516a","executionInfo":{"status":"ok","timestamp":1580377659085,"user_tz":-330,"elapsed":1033,"user":{"displayName":"Sharoon Saxena","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64","userId":"01167841530696023488"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["#finding out the shape of the data using \"shape\" variable: Output (rows, columns)\n","data.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(28382, 21)"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"code","metadata":{"id":"b75gSeumN50y","colab_type":"code","outputId":"0276bf06-6ea1-4415-edd5-264c0c8d96a4","executionInfo":{"status":"ok","timestamp":1580377659609,"user_tz":-330,"elapsed":1118,"user":{"displayName":"Sharoon Saxena","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64","userId":"01167841530696023488"}},"colab":{"base_uri":"https://localhost:8080/","height":153}},"source":["#Printing all the columns present in data\n","data.columns"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Index(['customer_id', 'vintage', 'age', 'gender', 'dependents', 'occupation',\n","       'city', 'customer_nw_category', 'branch_code', 'current_balance',\n","       'previous_month_end_balance', 'average_monthly_balance_prevQ',\n","       'average_monthly_balance_prevQ2', 'current_month_credit',\n","       'previous_month_credit', 'current_month_debit', 'previous_month_debit',\n","       'current_month_balance', 'previous_month_balance', 'churn',\n","       'last_transaction'],\n","      dtype='object')"]},"metadata":{"tags":[]},"execution_count":15}]},{"cell_type":"code","metadata":{"id":"kKIUtLmZX5tg","colab_type":"code","outputId":"71c127fd-08af-4241-a936-e43dbf7ee0eb","executionInfo":{"status":"ok","timestamp":1580377660322,"user_tz":-330,"elapsed":820,"user":{"displayName":"Sharoon Saxena","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64","userId":"01167841530696023488"}},"colab":{"base_uri":"https://localhost:8080/","height":391}},"source":["data.dtypes"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["customer_id                         int64\n","vintage                             int64\n","age                                 int64\n","gender                             object\n","dependents                        float64\n","occupation                         object\n","city                              float64\n","customer_nw_category                int64\n","branch_code                         int64\n","current_balance                   float64\n","previous_month_end_balance        float64\n","average_monthly_balance_prevQ     float64\n","average_monthly_balance_prevQ2    float64\n","current_month_credit              float64\n","previous_month_credit             float64\n","current_month_debit               float64\n","previous_month_debit              float64\n","current_month_balance             float64\n","previous_month_balance            float64\n","churn                               int64\n","last_transaction                   object\n","dtype: object"]},"metadata":{"tags":[]},"execution_count":16}]}]}
2 | 


--------------------------------------------------------------------------------
/32.Min_Max_Range_Updated.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {
   6 |     "colab_type": "text",
   7 |     "id": "LIOaq8QL8CDm"
   8 |    },
   9 |    "source": [
  10 |     "## Min, Max and Range of Data"
  11 |    ]
  12 |   },
  13 |   {
  14 |    "cell_type": "code",
  15 |    "execution_count": 1,
  16 |    "metadata": {
  17 |     "colab": {},
  18 |     "colab_type": "code",
  19 |     "id": "Jn7K0qBlR_Kn"
  20 |    },
  21 |    "outputs": [],
  22 |    "source": [
  23 |     "#import libraries\n",
  24 |     "import pandas as pd\n",
  25 |     "import numpy as np"
  26 |    ]
  27 |   },
  28 |   {
  29 |    "cell_type": "markdown",
  30 |    "metadata": {
  31 |     "colab_type": "text",
  32 |     "id": "NxC7Fy8fOuRz"
  33 |    },
  34 |    "source": [
  35 |     "This is the dataset for __Customer Churn Problem__. \n"
  36 |    ]
  37 |   },
  38 |   {
  39 |    "cell_type": "code",
  40 |    "execution_count": 2,
  41 |    "metadata": {
  42 |     "colab": {},
  43 |     "colab_type": "code",
  44 |     "id": "jtGI5XRpR_Ku"
  45 |    },
  46 |    "outputs": [],
  47 |    "source": [
  48 |     "# importing dataset\n",
  49 |     "data = pd.read_csv('churn_prediction.csv')"
  50 |    ]
  51 |   },
  52 |   {
  53 |    "cell_type": "code",
  54 |    "execution_count": 3,
  55 |    "metadata": {},
  56 |    "outputs": [
  57 |     {
  58 |      "data": {
  59 |       "text/html": [
  60 |        "<div>\n",
  61 |        "<style scoped>\n",
  62 |        "    .dataframe tbody tr th:only-of-type {\n",
  63 |        "        vertical-align: middle;\n",
  64 |        "    }\n",
  65 |        "\n",
  66 |        "    .dataframe tbody tr th {\n",
  67 |        "        vertical-align: top;\n",
  68 |        "    }\n",
  69 |        "\n",
  70 |        "    .dataframe thead th {\n",
  71 |        "        text-align: right;\n",
  72 |        "    }\n",
  73 |        "</style>\n",
  74 |        "<table border=\"1\" class=\"dataframe\">\n",
  75 |        "  <thead>\n",
  76 |        "    <tr style=\"text-align: right;\">\n",
  77 |        "      <th></th>\n",
  78 |        "      <th>customer_id</th>\n",
  79 |        "      <th>vintage</th>\n",
  80 |        "      <th>age</th>\n",
  81 |        "      <th>gender</th>\n",
  82 |        "      <th>dependents</th>\n",
  83 |        "      <th>occupation</th>\n",
  84 |        "      <th>city</th>\n",
  85 |        "      <th>customer_nw_category</th>\n",
  86 |        "      <th>branch_code</th>\n",
  87 |        "      <th>current_balance</th>\n",
  88 |        "      <th>...</th>\n",
  89 |        "      <th>average_monthly_balance_prevQ</th>\n",
  90 |        "      <th>average_monthly_balance_prevQ2</th>\n",
  91 |        "      <th>current_month_credit</th>\n",
  92 |        "      <th>previous_month_credit</th>\n",
  93 |        "      <th>current_month_debit</th>\n",
  94 |        "      <th>previous_month_debit</th>\n",
  95 |        "      <th>current_month_balance</th>\n",
  96 |        "      <th>previous_month_balance</th>\n",
  97 |        "      <th>churn</th>\n",
  98 |        "      <th>last_transaction</th>\n",
  99 |        "    </tr>\n",
 100 |        "  </thead>\n",
 101 |        "  <tbody>\n",
 102 |        "    <tr>\n",
 103 |        "      <th>0</th>\n",
 104 |        "      <td>1</td>\n",
 105 |        "      <td>2101</td>\n",
 106 |        "      <td>66</td>\n",
 107 |        "      <td>Male</td>\n",
 108 |        "      <td>0.0</td>\n",
 109 |        "      <td>self_employed</td>\n",
 110 |        "      <td>187.0</td>\n",
 111 |        "      <td>2</td>\n",
 112 |        "      <td>755</td>\n",
 113 |        "      <td>1458.71</td>\n",
 114 |        "      <td>...</td>\n",
 115 |        "      <td>1458.71</td>\n",
 116 |        "      <td>1449.07</td>\n",
 117 |        "      <td>0.20</td>\n",
 118 |        "      <td>0.20</td>\n",
 119 |        "      <td>0.20</td>\n",
 120 |        "      <td>0.20</td>\n",
 121 |        "      <td>1458.71</td>\n",
 122 |        "      <td>1458.71</td>\n",
 123 |        "      <td>0</td>\n",
 124 |        "      <td>2019-05-21</td>\n",
 125 |        "    </tr>\n",
 126 |        "    <tr>\n",
 127 |        "      <th>1</th>\n",
 128 |        "      <td>2</td>\n",
 129 |        "      <td>2348</td>\n",
 130 |        "      <td>35</td>\n",
 131 |        "      <td>Male</td>\n",
 132 |        "      <td>0.0</td>\n",
 133 |        "      <td>self_employed</td>\n",
 134 |        "      <td>NaN</td>\n",
 135 |        "      <td>2</td>\n",
 136 |        "      <td>3214</td>\n",
 137 |        "      <td>5390.37</td>\n",
 138 |        "      <td>...</td>\n",
 139 |        "      <td>7799.26</td>\n",
 140 |        "      <td>12419.41</td>\n",
 141 |        "      <td>0.56</td>\n",
 142 |        "      <td>0.56</td>\n",
 143 |        "      <td>5486.27</td>\n",
 144 |        "      <td>100.56</td>\n",
 145 |        "      <td>6496.78</td>\n",
 146 |        "      <td>8787.61</td>\n",
 147 |        "      <td>0</td>\n",
 148 |        "      <td>2019-11-01</td>\n",
 149 |        "    </tr>\n",
 150 |        "    <tr>\n",
 151 |        "      <th>2</th>\n",
 152 |        "      <td>4</td>\n",
 153 |        "      <td>2194</td>\n",
 154 |        "      <td>31</td>\n",
 155 |        "      <td>Male</td>\n",
 156 |        "      <td>0.0</td>\n",
 157 |        "      <td>salaried</td>\n",
 158 |        "      <td>146.0</td>\n",
 159 |        "      <td>2</td>\n",
 160 |        "      <td>41</td>\n",
 161 |        "      <td>3913.16</td>\n",
 162 |        "      <td>...</td>\n",
 163 |        "      <td>4910.17</td>\n",
 164 |        "      <td>2815.94</td>\n",
 165 |        "      <td>0.61</td>\n",
 166 |        "      <td>0.61</td>\n",
 167 |        "      <td>6046.73</td>\n",
 168 |        "      <td>259.23</td>\n",
 169 |        "      <td>5006.28</td>\n",
 170 |        "      <td>5070.14</td>\n",
 171 |        "      <td>0</td>\n",
 172 |        "      <td>NaT</td>\n",
 173 |        "    </tr>\n",
 174 |        "    <tr>\n",
 175 |        "      <th>3</th>\n",
 176 |        "      <td>5</td>\n",
 177 |        "      <td>2329</td>\n",
 178 |        "      <td>90</td>\n",
 179 |        "      <td>NaN</td>\n",
 180 |        "      <td>NaN</td>\n",
 181 |        "      <td>self_employed</td>\n",
 182 |        "      <td>1020.0</td>\n",
 183 |        "      <td>2</td>\n",
 184 |        "      <td>582</td>\n",
 185 |        "      <td>2291.91</td>\n",
 186 |        "      <td>...</td>\n",
 187 |        "      <td>2084.54</td>\n",
 188 |        "      <td>1006.54</td>\n",
 189 |        "      <td>0.47</td>\n",
 190 |        "      <td>0.47</td>\n",
 191 |        "      <td>0.47</td>\n",
 192 |        "      <td>2143.33</td>\n",
 193 |        "      <td>2291.91</td>\n",
 194 |        "      <td>1669.79</td>\n",
 195 |        "      <td>1</td>\n",
 196 |        "      <td>2019-08-06</td>\n",
 197 |        "    </tr>\n",
 198 |        "    <tr>\n",
 199 |        "      <th>4</th>\n",
 200 |        "      <td>6</td>\n",
 201 |        "      <td>1579</td>\n",
 202 |        "      <td>42</td>\n",
 203 |        "      <td>Male</td>\n",
 204 |        "      <td>2.0</td>\n",
 205 |        "      <td>self_employed</td>\n",
 206 |        "      <td>1494.0</td>\n",
 207 |        "      <td>3</td>\n",
 208 |        "      <td>388</td>\n",
 209 |        "      <td>927.72</td>\n",
 210 |        "      <td>...</td>\n",
 211 |        "      <td>1643.31</td>\n",
 212 |        "      <td>1871.12</td>\n",
 213 |        "      <td>0.33</td>\n",
 214 |        "      <td>714.61</td>\n",
 215 |        "      <td>588.62</td>\n",
 216 |        "      <td>1538.06</td>\n",
 217 |        "      <td>1157.15</td>\n",
 218 |        "      <td>1677.16</td>\n",
 219 |        "      <td>1</td>\n",
 220 |        "      <td>2019-11-03</td>\n",
 221 |        "    </tr>\n",
 222 |        "  </tbody>\n",
 223 |        "</table>\n",
 224 |        "<p>5 rows × 21 columns</p>\n",
 225 |        "</div>"
 226 |       ],
 227 |       "text/plain": [
 228 |        "   customer_id  vintage  age gender  dependents     occupation    city  \\\n",
 229 |        "0            1     2101   66   Male         0.0  self_employed   187.0   \n",
 230 |        "1            2     2348   35   Male         0.0  self_employed     NaN   \n",
 231 |        "2            4     2194   31   Male         0.0       salaried   146.0   \n",
 232 |        "3            5     2329   90    NaN         NaN  self_employed  1020.0   \n",
 233 |        "4            6     1579   42   Male         2.0  self_employed  1494.0   \n",
 234 |        "\n",
 235 |        "   customer_nw_category  branch_code  current_balance  ...  \\\n",
 236 |        "0                     2          755          1458.71  ...   \n",
 237 |        "1                     2         3214          5390.37  ...   \n",
 238 |        "2                     2           41          3913.16  ...   \n",
 239 |        "3                     2          582          2291.91  ...   \n",
 240 |        "4                     3          388           927.72  ...   \n",
 241 |        "\n",
 242 |        "   average_monthly_balance_prevQ  average_monthly_balance_prevQ2  \\\n",
 243 |        "0                        1458.71                         1449.07   \n",
 244 |        "1                        7799.26                        12419.41   \n",
 245 |        "2                        4910.17                         2815.94   \n",
 246 |        "3                        2084.54                         1006.54   \n",
 247 |        "4                        1643.31                         1871.12   \n",
 248 |        "\n",
 249 |        "   current_month_credit  previous_month_credit  current_month_debit  \\\n",
 250 |        "0                  0.20                   0.20                 0.20   \n",
 251 |        "1                  0.56                   0.56              5486.27   \n",
 252 |        "2                  0.61                   0.61              6046.73   \n",
 253 |        "3                  0.47                   0.47                 0.47   \n",
 254 |        "4                  0.33                 714.61               588.62   \n",
 255 |        "\n",
 256 |        "   previous_month_debit  current_month_balance  previous_month_balance  churn  \\\n",
 257 |        "0                  0.20                1458.71                 1458.71      0   \n",
 258 |        "1                100.56                6496.78                 8787.61      0   \n",
 259 |        "2                259.23                5006.28                 5070.14      0   \n",
 260 |        "3               2143.33                2291.91                 1669.79      1   \n",
 261 |        "4               1538.06                1157.15                 1677.16      1   \n",
 262 |        "\n",
 263 |        "   last_transaction  \n",
 264 |        "0        2019-05-21  \n",
 265 |        "1        2019-11-01  \n",
 266 |        "2               NaT  \n",
 267 |        "3        2019-08-06  \n",
 268 |        "4        2019-11-03  \n",
 269 |        "\n",
 270 |        "[5 rows x 21 columns]"
 271 |       ]
 272 |      },
 273 |      "execution_count": 3,
 274 |      "metadata": {},
 275 |      "output_type": "execute_result"
 276 |     }
 277 |    ],
 278 |    "source": [
 279 |     "data.head()"
 280 |    ]
 281 |   },
 282 |   {
 283 |    "cell_type": "code",
 284 |    "execution_count": 4,
 285 |    "metadata": {},
 286 |    "outputs": [
 287 |     {
 288 |      "data": {
 289 |       "text/plain": [
 290 |        "(28382, 21)"
 291 |       ]
 292 |      },
 293 |      "execution_count": 4,
 294 |      "metadata": {},
 295 |      "output_type": "execute_result"
 296 |     }
 297 |    ],
 298 |    "source": [
 299 |     "data.shape"
 300 |    ]
 301 |   },
 302 |   {
 303 |    "cell_type": "markdown",
 304 |    "metadata": {
 305 |     "colab_type": "text",
 306 |     "id": "TkE9qtAvOuSD"
 307 |    },
 308 |    "source": [
 309 |     "Identification of __Datatypes__"
 310 |    ]
 311 |   },
 312 |   {
 313 |    "cell_type": "code",
 314 |    "execution_count": 0,
 315 |    "metadata": {
 316 |     "colab": {},
 317 |     "colab_type": "code",
 318 |     "id": "D1eopfppOuSG",
 319 |     "outputId": "7b2b8784-bce5-4424-e2a1-7e649fc2ce62"
 320 |    },
 321 |    "outputs": [
 322 |     {
 323 |      "data": {
 324 |       "text/plain": [
 325 |        "customer_id                         int64\n",
 326 |        "vintage                             int64\n",
 327 |        "age                                 int64\n",
 328 |        "gender                             object\n",
 329 |        "dependents                        float64\n",
 330 |        "occupation                         object\n",
 331 |        "city                              float64\n",
 332 |        "customer_nw_category                int64\n",
 333 |        "branch_code                         int64\n",
 334 |        "days_since_last_transaction       float64\n",
 335 |        "current_balance                   float64\n",
 336 |        "previous_month_end_balance        float64\n",
 337 |        "average_monthly_balance_prevQ     float64\n",
 338 |        "average_monthly_balance_prevQ2    float64\n",
 339 |        "current_month_credit              float64\n",
 340 |        "previous_month_credit             float64\n",
 341 |        "current_month_debit               float64\n",
 342 |        "previous_month_debit              float64\n",
 343 |        "current_month_balance             float64\n",
 344 |        "previous_month_balance            float64\n",
 345 |        "churn                               int64\n",
 346 |        "dtype: object"
 347 |       ]
 348 |      },
 349 |      "execution_count": 4,
 350 |      "metadata": {
 351 |       "tags": []
 352 |      },
 353 |      "output_type": "execute_result"
 354 |     }
 355 |    ],
 356 |    "source": [
 357 |     "data.dtypes"
 358 |    ]
 359 |   },
 360 |   {
 361 |    "cell_type": "markdown",
 362 |    "metadata": {
 363 |     "colab_type": "text",
 364 |     "id": "s3iJ7r43Ac0W"
 365 |    },
 366 |    "source": [
 367 |     "## Isolating numerical columns"
 368 |    ]
 369 |   },
 370 |   {
 371 |    "cell_type": "markdown",
 372 |    "metadata": {
 373 |     "colab_type": "text",
 374 |     "id": "-l0diipkOuSb"
 375 |    },
 376 |    "source": [
 377 |     "Storing indices of  __Integer and Float__ in numercial_cols because we are dealing with __numerical variables__"
 378 |    ]
 379 |   },
 380 |   {
 381 |    "cell_type": "code",
 382 |    "execution_count": 5,
 383 |    "metadata": {
 384 |     "colab": {
 385 |      "base_uri": "https://localhost:8080/",
 386 |      "height": 153
 387 |     },
 388 |     "colab_type": "code",
 389 |     "executionInfo": {
 390 |      "elapsed": 1441,
 391 |      "status": "ok",
 392 |      "timestamp": 1581504105970,
 393 |      "user": {
 394 |       "displayName": "Sharoon Saxena",
 395 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
 396 |       "userId": "01167841530696023488"
 397 |      },
 398 |      "user_tz": -330
 399 |     },
 400 |     "id": "0PrPd53w-rp6",
 401 |     "outputId": "6d46988f-e384-43d7-9622-353cd5cfc487"
 402 |    },
 403 |    "outputs": [
 404 |     {
 405 |      "data": {
 406 |       "text/plain": [
 407 |        "Index(['customer_id', 'vintage', 'age', 'dependents', 'city',\n",
 408 |        "       'customer_nw_category', 'branch_code', 'current_balance',\n",
 409 |        "       'previous_month_end_balance', 'average_monthly_balance_prevQ',\n",
 410 |        "       'average_monthly_balance_prevQ2', 'current_month_credit',\n",
 411 |        "       'previous_month_credit', 'current_month_debit', 'previous_month_debit',\n",
 412 |        "       'current_month_balance', 'previous_month_balance', 'churn'],\n",
 413 |        "      dtype='object')"
 414 |       ]
 415 |      },
 416 |      "execution_count": 5,
 417 |      "metadata": {},
 418 |      "output_type": "execute_result"
 419 |     }
 420 |    ],
 421 |    "source": [
 422 |     "# storing indices of all numerical data types in numerical_cols\n",
 423 |     "numerical_cols = data.select_dtypes(include=['int64', 'float64']).columns\n",
 424 |     "\n",
 425 |     "# checking\n",
 426 |     "numerical_cols"
 427 |    ]
 428 |   },
 429 |   {
 430 |    "cell_type": "markdown",
 431 |    "metadata": {
 432 |     "colab_type": "text",
 433 |     "id": "Vnm7rEvEAbel"
 434 |    },
 435 |    "source": [
 436 |     "### Min obseravtion"
 437 |    ]
 438 |   },
 439 |   {
 440 |    "cell_type": "code",
 441 |    "execution_count": 0,
 442 |    "metadata": {
 443 |     "colab": {},
 444 |     "colab_type": "code",
 445 |     "id": "6oGzqFxqR_K9",
 446 |     "outputId": "f19a1072-9be5-403c-9337-6dc22cce9394"
 447 |    },
 448 |    "outputs": [
 449 |     {
 450 |      "data": {
 451 |       "text/html": [
 452 |        "<div>\n",
 453 |        "<style scoped>\n",
 454 |        "    .dataframe tbody tr th:only-of-type {\n",
 455 |        "        vertical-align: middle;\n",
 456 |        "    }\n",
 457 |        "\n",
 458 |        "    .dataframe tbody tr th {\n",
 459 |        "        vertical-align: top;\n",
 460 |        "    }\n",
 461 |        "\n",
 462 |        "    .dataframe thead th {\n",
 463 |        "        text-align: right;\n",
 464 |        "    }\n",
 465 |        "</style>\n",
 466 |        "<table border=\"1\" class=\"dataframe\">\n",
 467 |        "  <thead>\n",
 468 |        "    <tr style=\"text-align: right;\">\n",
 469 |        "      <th></th>\n",
 470 |        "      <th>customer_id</th>\n",
 471 |        "      <th>vintage</th>\n",
 472 |        "      <th>age</th>\n",
 473 |        "      <th>gender</th>\n",
 474 |        "      <th>dependents</th>\n",
 475 |        "      <th>occupation</th>\n",
 476 |        "      <th>city</th>\n",
 477 |        "      <th>customer_nw_category</th>\n",
 478 |        "      <th>branch_code</th>\n",
 479 |        "      <th>days_since_last_transaction</th>\n",
 480 |        "      <th>...</th>\n",
 481 |        "      <th>previous_month_end_balance</th>\n",
 482 |        "      <th>average_monthly_balance_prevQ</th>\n",
 483 |        "      <th>average_monthly_balance_prevQ2</th>\n",
 484 |        "      <th>current_month_credit</th>\n",
 485 |        "      <th>previous_month_credit</th>\n",
 486 |        "      <th>current_month_debit</th>\n",
 487 |        "      <th>previous_month_debit</th>\n",
 488 |        "      <th>current_month_balance</th>\n",
 489 |        "      <th>previous_month_balance</th>\n",
 490 |        "      <th>churn</th>\n",
 491 |        "    </tr>\n",
 492 |        "  </thead>\n",
 493 |        "  <tbody>\n",
 494 |        "    <tr>\n",
 495 |        "      <td>12608</td>\n",
 496 |        "      <td>13467</td>\n",
 497 |        "      <td>3082</td>\n",
 498 |        "      <td>80</td>\n",
 499 |        "      <td>Male</td>\n",
 500 |        "      <td>0.0</td>\n",
 501 |        "      <td>retired</td>\n",
 502 |        "      <td>1096.0</td>\n",
 503 |        "      <td>1</td>\n",
 504 |        "      <td>27</td>\n",
 505 |        "      <td>5.0</td>\n",
 506 |        "      <td>...</td>\n",
 507 |        "      <td>423.06</td>\n",
 508 |        "      <td>1694.57</td>\n",
 509 |        "      <td>868.26</td>\n",
 510 |        "      <td>9471.01</td>\n",
 511 |        "      <td>2680.04</td>\n",
 512 |        "      <td>15229.44</td>\n",
 513 |        "      <td>7859.37</td>\n",
 514 |        "      <td>1050.17</td>\n",
 515 |        "      <td>2002.97</td>\n",
 516 |        "      <td>1</td>\n",
 517 |        "    </tr>\n",
 518 |        "  </tbody>\n",
 519 |        "</table>\n",
 520 |        "<p>1 rows × 21 columns</p>\n",
 521 |        "</div>"
 522 |       ],
 523 |       "text/plain": [
 524 |        "       customer_id  vintage  age gender  dependents occupation    city  \\\n",
 525 |        "12608        13467     3082   80   Male         0.0    retired  1096.0   \n",
 526 |        "\n",
 527 |        "       customer_nw_category  branch_code  days_since_last_transaction  ...  \\\n",
 528 |        "12608                     1           27                          5.0  ...   \n",
 529 |        "\n",
 530 |        "       previous_month_end_balance  average_monthly_balance_prevQ  \\\n",
 531 |        "12608                      423.06                        1694.57   \n",
 532 |        "\n",
 533 |        "       average_monthly_balance_prevQ2  current_month_credit  \\\n",
 534 |        "12608                          868.26               9471.01   \n",
 535 |        "\n",
 536 |        "       previous_month_credit  current_month_debit  previous_month_debit  \\\n",
 537 |        "12608                2680.04             15229.44               7859.37   \n",
 538 |        "\n",
 539 |        "       current_month_balance  previous_month_balance  churn  \n",
 540 |        "12608                1050.17                 2002.97      1  \n",
 541 |        "\n",
 542 |        "[1 rows x 21 columns]"
 543 |       ]
 544 |      },
 545 |      "execution_count": 6,
 546 |      "metadata": {
 547 |       "tags": []
 548 |      },
 549 |      "output_type": "execute_result"
 550 |     }
 551 |    ],
 552 |    "source": [
 553 |     "# observation with minimum current balance\n",
 554 |     "data[data['current_balance'] == data['current_balance'].min()]"
 555 |    ]
 556 |   },
 557 |   {
 558 |    "cell_type": "markdown",
 559 |    "metadata": {
 560 |     "colab_type": "text",
 561 |     "id": "yDJjIU9POuS9"
 562 |    },
 563 |    "source": [
 564 |     "* Customer's id is 13467\n",
 565 |     "* Customer has __minimum current balance__ is -5503.96 \n"
 566 |    ]
 567 |   },
 568 |   {
 569 |    "cell_type": "markdown",
 570 |    "metadata": {
 571 |     "colab_type": "text",
 572 |     "id": "SCHZvc0XOuTC"
 573 |    },
 574 |    "source": [
 575 |     "### Max observation"
 576 |    ]
 577 |   },
 578 |   {
 579 |    "cell_type": "code",
 580 |    "execution_count": 0,
 581 |    "metadata": {
 582 |     "colab": {},
 583 |     "colab_type": "code",
 584 |     "id": "YqXn9lcyR_LA",
 585 |     "outputId": "c48e8ca4-f9d6-471c-dd0d-9f8c14c8af28"
 586 |    },
 587 |    "outputs": [
 588 |     {
 589 |      "data": {
 590 |       "text/html": [
 591 |        "<div>\n",
 592 |        "<style scoped>\n",
 593 |        "    .dataframe tbody tr th:only-of-type {\n",
 594 |        "        vertical-align: middle;\n",
 595 |        "    }\n",
 596 |        "\n",
 597 |        "    .dataframe tbody tr th {\n",
 598 |        "        vertical-align: top;\n",
 599 |        "    }\n",
 600 |        "\n",
 601 |        "    .dataframe thead th {\n",
 602 |        "        text-align: right;\n",
 603 |        "    }\n",
 604 |        "</style>\n",
 605 |        "<table border=\"1\" class=\"dataframe\">\n",
 606 |        "  <thead>\n",
 607 |        "    <tr style=\"text-align: right;\">\n",
 608 |        "      <th></th>\n",
 609 |        "      <th>customer_id</th>\n",
 610 |        "      <th>vintage</th>\n",
 611 |        "      <th>age</th>\n",
 612 |        "      <th>gender</th>\n",
 613 |        "      <th>dependents</th>\n",
 614 |        "      <th>occupation</th>\n",
 615 |        "      <th>city</th>\n",
 616 |        "      <th>customer_nw_category</th>\n",
 617 |        "      <th>branch_code</th>\n",
 618 |        "      <th>days_since_last_transaction</th>\n",
 619 |        "      <th>...</th>\n",
 620 |        "      <th>previous_month_end_balance</th>\n",
 621 |        "      <th>average_monthly_balance_prevQ</th>\n",
 622 |        "      <th>average_monthly_balance_prevQ2</th>\n",
 623 |        "      <th>current_month_credit</th>\n",
 624 |        "      <th>previous_month_credit</th>\n",
 625 |        "      <th>current_month_debit</th>\n",
 626 |        "      <th>previous_month_debit</th>\n",
 627 |        "      <th>current_month_balance</th>\n",
 628 |        "      <th>previous_month_balance</th>\n",
 629 |        "      <th>churn</th>\n",
 630 |        "    </tr>\n",
 631 |        "  </thead>\n",
 632 |        "  <tbody>\n",
 633 |        "    <tr>\n",
 634 |        "      <td>24095</td>\n",
 635 |        "      <td>25712</td>\n",
 636 |        "      <td>1192</td>\n",
 637 |        "      <td>90</td>\n",
 638 |        "      <td>Male</td>\n",
 639 |        "      <td>0.0</td>\n",
 640 |        "      <td>retired</td>\n",
 641 |        "      <td>1020.0</td>\n",
 642 |        "      <td>2</td>\n",
 643 |        "      <td>5</td>\n",
 644 |        "      <td>18.0</td>\n",
 645 |        "      <td>...</td>\n",
 646 |        "      <td>24270.54</td>\n",
 647 |        "      <td>11728.39</td>\n",
 648 |        "      <td>111617.41</td>\n",
 649 |        "      <td>12269845.39</td>\n",
 650 |        "      <td>0.21</td>\n",
 651 |        "      <td>7637857.36</td>\n",
 652 |        "      <td>0.21</td>\n",
 653 |        "      <td>8399.62</td>\n",
 654 |        "      <td>24270.54</td>\n",
 655 |        "      <td>1</td>\n",
 656 |        "    </tr>\n",
 657 |        "  </tbody>\n",
 658 |        "</table>\n",
 659 |        "<p>1 rows × 21 columns</p>\n",
 660 |        "</div>"
 661 |       ],
 662 |       "text/plain": [
 663 |        "       customer_id  vintage  age gender  dependents occupation    city  \\\n",
 664 |        "24095        25712     1192   90   Male         0.0    retired  1020.0   \n",
 665 |        "\n",
 666 |        "       customer_nw_category  branch_code  days_since_last_transaction  ...  \\\n",
 667 |        "24095                     2            5                         18.0  ...   \n",
 668 |        "\n",
 669 |        "       previous_month_end_balance  average_monthly_balance_prevQ  \\\n",
 670 |        "24095                    24270.54                       11728.39   \n",
 671 |        "\n",
 672 |        "       average_monthly_balance_prevQ2  current_month_credit  \\\n",
 673 |        "24095                       111617.41           12269845.39   \n",
 674 |        "\n",
 675 |        "       previous_month_credit  current_month_debit  previous_month_debit  \\\n",
 676 |        "24095                   0.21           7637857.36                  0.21   \n",
 677 |        "\n",
 678 |        "       current_month_balance  previous_month_balance  churn  \n",
 679 |        "24095                8399.62                24270.54      1  \n",
 680 |        "\n",
 681 |        "[1 rows x 21 columns]"
 682 |       ]
 683 |      },
 684 |      "execution_count": 7,
 685 |      "metadata": {
 686 |       "tags": []
 687 |      },
 688 |      "output_type": "execute_result"
 689 |     }
 690 |    ],
 691 |    "source": [
 692 |     "# obseravtion with maxximum current month debit\n",
 693 |     "data[data['current_month_debit'] == data['current_month_debit'].max()]"
 694 |    ]
 695 |   },
 696 |   {
 697 |    "cell_type": "markdown",
 698 |    "metadata": {
 699 |     "colab_type": "text",
 700 |     "id": "qKH3ErymOuTU"
 701 |    },
 702 |    "source": [
 703 |     "* Customer's id is 25712\n",
 704 |     "* Customer has __maximum current month debit__ is   7637857.36 \n"
 705 |    ]
 706 |   },
 707 |   {
 708 |    "cell_type": "markdown",
 709 |    "metadata": {
 710 |     "colab_type": "text",
 711 |     "id": "X0UJT_w3OuTW"
 712 |    },
 713 |    "source": [
 714 |     "### Range "
 715 |    ]
 716 |   },
 717 |   {
 718 |    "cell_type": "markdown",
 719 |    "metadata": {
 720 |     "colab_type": "text",
 721 |     "id": "-RO9lwQeOuTa"
 722 |    },
 723 |    "source": [
 724 |     " __Range of Age__ in our datase indicating the difference of Age between the oldest and youngest customers"
 725 |    ]
 726 |   },
 727 |   {
 728 |    "cell_type": "code",
 729 |    "execution_count": 0,
 730 |    "metadata": {
 731 |     "colab": {
 732 |      "base_uri": "https://localhost:8080/",
 733 |      "height": 34
 734 |     },
 735 |     "colab_type": "code",
 736 |     "executionInfo": {
 737 |      "elapsed": 1044,
 738 |      "status": "ok",
 739 |      "timestamp": 1581505193463,
 740 |      "user": {
 741 |       "displayName": "Sharoon Saxena",
 742 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
 743 |       "userId": "01167841530696023488"
 744 |      },
 745 |      "user_tz": -330
 746 |     },
 747 |     "id": "kkHzHmLrR_LF",
 748 |     "outputId": "223f34f6-1355-49b8-86a5-33a50d4b7d24"
 749 |    },
 750 |    "outputs": [
 751 |     {
 752 |      "name": "stdout",
 753 |      "output_type": "stream",
 754 |      "text": [
 755 |       "1 90\n"
 756 |      ]
 757 |     }
 758 |    ],
 759 |    "source": [
 760 |     "# Range of Age \n",
 761 |     "\n",
 762 |     "print(data['age'].min(),  data['age'].max())"
 763 |    ]
 764 |   },
 765 |   {
 766 |    "cell_type": "markdown",
 767 |    "metadata": {
 768 |     "colab_type": "text",
 769 |     "id": "QRiwOGUTOuTp"
 770 |    },
 771 |    "source": [
 772 |     "* Oldest Customer Age is 90\n",
 773 |     "* Youngest Customer Age is 1\n",
 774 |     "* Range is [1,90]"
 775 |    ]
 776 |   },
 777 |   {
 778 |    "cell_type": "markdown",
 779 |    "metadata": {
 780 |     "colab_type": "text",
 781 |     "id": "ObUYzPs5OuTt"
 782 |    },
 783 |    "source": [
 784 |     "### Max, Min, Range for each column"
 785 |    ]
 786 |   },
 787 |   {
 788 |    "cell_type": "code",
 789 |    "execution_count": 0,
 790 |    "metadata": {
 791 |     "colab": {
 792 |      "base_uri": "https://localhost:8080/",
 793 |      "height": 408
 794 |     },
 795 |     "colab_type": "code",
 796 |     "executionInfo": {
 797 |      "elapsed": 1055,
 798 |      "status": "ok",
 799 |      "timestamp": 1581505222868,
 800 |      "user": {
 801 |       "displayName": "Sharoon Saxena",
 802 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
 803 |       "userId": "01167841530696023488"
 804 |      },
 805 |      "user_tz": -330
 806 |     },
 807 |     "id": "VusaDbjqR_LH",
 808 |     "outputId": "b045a934-0606-4298-d0c6-edb1df53eae1"
 809 |    },
 810 |    "outputs": [
 811 |     {
 812 |      "data": {
 813 |       "text/plain": [
 814 |        "customer_id                          30301.00\n",
 815 |        "vintage                              12899.00\n",
 816 |        "age                                     90.00\n",
 817 |        "dependents                              52.00\n",
 818 |        "city                                  1649.00\n",
 819 |        "customer_nw_category                     3.00\n",
 820 |        "branch_code                           4782.00\n",
 821 |        "days_since_last_transaction            365.00\n",
 822 |        "current_balance                    5905904.03\n",
 823 |        "previous_month_end_balance         5740438.63\n",
 824 |        "average_monthly_balance_prevQ      5700289.57\n",
 825 |        "average_monthly_balance_prevQ2     5010170.10\n",
 826 |        "current_month_credit              12269845.39\n",
 827 |        "previous_month_credit              2361808.29\n",
 828 |        "current_month_debit                7637857.36\n",
 829 |        "previous_month_debit               1414168.06\n",
 830 |        "current_month_balance              5778184.77\n",
 831 |        "previous_month_balance             5720144.50\n",
 832 |        "churn                                    1.00\n",
 833 |        "dtype: float64"
 834 |       ]
 835 |      },
 836 |      "execution_count": 9,
 837 |      "metadata": {
 838 |       "tags": []
 839 |      },
 840 |      "output_type": "execute_result"
 841 |     }
 842 |    ],
 843 |    "source": [
 844 |     "# Printing Max of evey numerical column\n",
 845 |     "data[numerical_cols].max()"
 846 |    ]
 847 |   },
 848 |   {
 849 |    "cell_type": "markdown",
 850 |    "metadata": {
 851 |     "colab_type": "text",
 852 |     "id": "MM_lBwUHYXa2"
 853 |    },
 854 |    "source": [
 855 |     "* Maximum value of vintage for a customer is 12899.\n",
 856 |     "* Maximum age of a customer in our dataset is 90\n",
 857 |     "* Maximum number of dependents in our dataset is 52\n",
 858 |     "* Maximum day since last transaction is 365\n",
 859 |     "* Maximum values for __current_balance, previous_month_end_balance,average_monthly_balance_prevQ,    current_month_balance, previous_month_balance__ are close to 57 lakhs.\n",
 860 |     "* Maximum value for current_month_credit is 12269845.39\n",
 861 |     "* Maximum value for previous_month_credit is 2361808.29\n",
 862 |     "* maximum value for current_month_debit and previous_month debit is respectively 7637857.36 and 1414168.06.\n",
 863 |     "* The features like __customer_id, city, customer_nw_category, branch_code, churn__ are required to be treated as categorcial variable so their maximum value don't represent numerical significance.\n"
 864 |    ]
 865 |   },
 866 |   {
 867 |    "cell_type": "code",
 868 |    "execution_count": 0,
 869 |    "metadata": {
 870 |     "colab": {
 871 |      "base_uri": "https://localhost:8080/",
 872 |      "height": 408
 873 |     },
 874 |     "colab_type": "code",
 875 |     "executionInfo": {
 876 |      "elapsed": 1084,
 877 |      "status": "ok",
 878 |      "timestamp": 1581505242279,
 879 |      "user": {
 880 |       "displayName": "Sharoon Saxena",
 881 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
 882 |       "userId": "01167841530696023488"
 883 |      },
 884 |      "user_tz": -330
 885 |     },
 886 |     "id": "8OSHRZvfR_LK",
 887 |     "outputId": "a6e584fc-e627-4ca4-bdf6-e02d6348760b"
 888 |    },
 889 |    "outputs": [
 890 |     {
 891 |      "data": {
 892 |       "text/plain": [
 893 |        "customer_id                           1.00\n",
 894 |        "vintage                             180.00\n",
 895 |        "age                                   1.00\n",
 896 |        "dependents                            0.00\n",
 897 |        "city                                  0.00\n",
 898 |        "customer_nw_category                  1.00\n",
 899 |        "branch_code                           1.00\n",
 900 |        "days_since_last_transaction           0.00\n",
 901 |        "current_balance                   -5503.96\n",
 902 |        "previous_month_end_balance        -3149.57\n",
 903 |        "average_monthly_balance_prevQ      1428.69\n",
 904 |        "average_monthly_balance_prevQ2   -16506.10\n",
 905 |        "current_month_credit                  0.01\n",
 906 |        "previous_month_credit                 0.01\n",
 907 |        "current_month_debit                   0.01\n",
 908 |        "previous_month_debit                  0.01\n",
 909 |        "current_month_balance             -3374.18\n",
 910 |        "previous_month_balance            -5171.92\n",
 911 |        "churn                                 0.00\n",
 912 |        "dtype: float64"
 913 |       ]
 914 |      },
 915 |      "execution_count": 10,
 916 |      "metadata": {
 917 |       "tags": []
 918 |      },
 919 |      "output_type": "execute_result"
 920 |     }
 921 |    ],
 922 |    "source": [
 923 |     "# printing min of every numercial column\n",
 924 |     "data[numerical_cols].min()"
 925 |    ]
 926 |   },
 927 |   {
 928 |    "cell_type": "code",
 929 |    "execution_count": 0,
 930 |    "metadata": {
 931 |     "colab": {},
 932 |     "colab_type": "code",
 933 |     "id": "ZMnoIdSmYXa5",
 934 |     "outputId": "08e05724-a51a-4a81-9815-c2f869813b45"
 935 |    },
 936 |    "outputs": [
 937 |     {
 938 |      "name": "stdout",
 939 |      "output_type": "stream",
 940 |      "text": [
 941 |       "range of customer_id: [1, 30301]\n",
 942 |       "range of vintage: [180, 12899]\n",
 943 |       "range of age: [1, 90]\n",
 944 |       "range of dependents: [0.0, 52.0]\n",
 945 |       "range of city: [0.0, 1649.0]\n",
 946 |       "range of customer_nw_category: [1, 3]\n",
 947 |       "range of branch_code: [1, 4782]\n",
 948 |       "range of days_since_last_transaction: [0.0, 365.0]\n",
 949 |       "range of current_balance: [-5503.96, 5905904.03]\n",
 950 |       "range of previous_month_end_balance: [-3149.57, 5740438.63]\n",
 951 |       "range of average_monthly_balance_prevQ: [1428.69, 5700289.57]\n",
 952 |       "range of average_monthly_balance_prevQ2: [-16506.1, 5010170.1]\n",
 953 |       "range of current_month_credit: [0.01, 12269845.39]\n",
 954 |       "range of previous_month_credit: [0.01, 2361808.29]\n",
 955 |       "range of current_month_debit: [0.01, 7637857.36]\n",
 956 |       "range of previous_month_debit: [0.01, 1414168.06]\n",
 957 |       "range of current_month_balance: [-3374.18, 5778184.77]\n",
 958 |       "range of previous_month_balance: [-5171.92, 5720144.5]\n",
 959 |       "range of churn: [0, 1]\n"
 960 |      ]
 961 |     }
 962 |    ],
 963 |    "source": [
 964 |     "for col in numerical_cols:\n",
 965 |     "    print(\"range of {}{}{}{}{}{}{}{}\".format(col,\":\",\" \",\"[\",data[col].min(), \", \",data[col].max(),\"]\"))"
 966 |    ]
 967 |   },
 968 |   {
 969 |    "cell_type": "markdown",
 970 |    "metadata": {
 971 |     "colab_type": "text",
 972 |     "id": "TzIxUenlYXa8"
 973 |    },
 974 |    "source": [
 975 |     "* Range of current_month_credit is highest among all features.\n",
 976 |     "* Range of days_since_last_transaction is 1 year."
 977 |    ]
 978 |   },
 979 |   {
 980 |    "cell_type": "code",
 981 |    "execution_count": 0,
 982 |    "metadata": {
 983 |     "colab": {},
 984 |     "colab_type": "code",
 985 |     "id": "rZ-87c8xYXa8"
 986 |    },
 987 |    "outputs": [],
 988 |    "source": []
 989 |   }
 990 |  ],
 991 |  "metadata": {
 992 |   "colab": {
 993 |    "collapsed_sections": [],
 994 |    "name": "Min_Max_Range_Updated.ipynb",
 995 |    "provenance": []
 996 |   },
 997 |   "kernelspec": {
 998 |    "display_name": "Python 3",
 999 |    "language": "python",
1000 |    "name": "python3"
1001 |   },
1002 |   "language_info": {
1003 |    "codemirror_mode": {
1004 |     "name": "ipython",
1005 |     "version": 3
1006 |    },
1007 |    "file_extension": ".py",
1008 |    "mimetype": "text/x-python",
1009 |    "name": "python",
1010 |    "nbconvert_exporter": "python",
1011 |    "pygments_lexer": "ipython3",
1012 |    "version": "3.8.5"
1013 |   }
1014 |  },
1015 |  "nbformat": 4,
1016 |  "nbformat_minor": 1
1017 | }
1018 | 


--------------------------------------------------------------------------------
/33.Mean_Variance.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 14,
   6 |    "metadata": {
   7 |     "colab": {},
   8 |     "colab_type": "code",
   9 |     "id": "UtrBkZutQ_nz"
  10 |    },
  11 |    "outputs": [],
  12 |    "source": [
  13 |     "# importing libraries\n",
  14 |     "import pandas as pd\n",
  15 |     "import numpy as np\n",
  16 |     "import matplotlib.pyplot as plt"
  17 |    ]
  18 |   },
  19 |   {
  20 |    "cell_type": "markdown",
  21 |    "metadata": {},
  22 |    "source": [
  23 |     "### This is our dataset of Customer Churn Prediction."
  24 |    ]
  25 |   },
  26 |   {
  27 |    "cell_type": "code",
  28 |    "execution_count": 15,
  29 |    "metadata": {
  30 |     "colab": {},
  31 |     "colab_type": "code",
  32 |     "id": "BSs0Y24MQ_n6"
  33 |    },
  34 |    "outputs": [],
  35 |    "source": [
  36 |     "# importing data\n",
  37 |     "data = pd.read_csv('churn_prediction.csv')"
  38 |    ]
  39 |   },
  40 |   {
  41 |    "cell_type": "code",
  42 |    "execution_count": 16,
  43 |    "metadata": {
  44 |     "colab": {
  45 |      "base_uri": "https://localhost:8080/",
  46 |      "height": 211
  47 |     },
  48 |     "colab_type": "code",
  49 |     "executionInfo": {
  50 |      "elapsed": 1240,
  51 |      "status": "ok",
  52 |      "timestamp": 1581056281581,
  53 |      "user": {
  54 |       "displayName": "Sharoon Saxena",
  55 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
  56 |       "userId": "01167841530696023488"
  57 |      },
  58 |      "user_tz": -330
  59 |     },
  60 |     "id": "36nlgrybQ_n9",
  61 |     "outputId": "3903402a-332e-44c7-9d45-7511ca730b3c"
  62 |    },
  63 |    "outputs": [
  64 |     {
  65 |      "data": {
  66 |       "text/html": [
  67 |        "<div>\n",
  68 |        "<style scoped>\n",
  69 |        "    .dataframe tbody tr th:only-of-type {\n",
  70 |        "        vertical-align: middle;\n",
  71 |        "    }\n",
  72 |        "\n",
  73 |        "    .dataframe tbody tr th {\n",
  74 |        "        vertical-align: top;\n",
  75 |        "    }\n",
  76 |        "\n",
  77 |        "    .dataframe thead th {\n",
  78 |        "        text-align: right;\n",
  79 |        "    }\n",
  80 |        "</style>\n",
  81 |        "<table border=\"1\" class=\"dataframe\">\n",
  82 |        "  <thead>\n",
  83 |        "    <tr style=\"text-align: right;\">\n",
  84 |        "      <th></th>\n",
  85 |        "      <th>customer_id</th>\n",
  86 |        "      <th>vintage</th>\n",
  87 |        "      <th>age</th>\n",
  88 |        "      <th>gender</th>\n",
  89 |        "      <th>dependents</th>\n",
  90 |        "      <th>occupation</th>\n",
  91 |        "      <th>city</th>\n",
  92 |        "      <th>customer_nw_category</th>\n",
  93 |        "      <th>branch_code</th>\n",
  94 |        "      <th>current_balance</th>\n",
  95 |        "      <th>...</th>\n",
  96 |        "      <th>previous_month_credit</th>\n",
  97 |        "      <th>current_month_debit</th>\n",
  98 |        "      <th>previous_month_debit</th>\n",
  99 |        "      <th>current_month_balance</th>\n",
 100 |        "      <th>previous_month_balance</th>\n",
 101 |        "      <th>churn</th>\n",
 102 |        "      <th>doy_ls_tran</th>\n",
 103 |        "      <th>woy_ls_tran</th>\n",
 104 |        "      <th>moy_ls_tran</th>\n",
 105 |        "      <th>dow_ls_tran</th>\n",
 106 |        "    </tr>\n",
 107 |        "  </thead>\n",
 108 |        "  <tbody>\n",
 109 |        "    <tr>\n",
 110 |        "      <th>0</th>\n",
 111 |        "      <td>1</td>\n",
 112 |        "      <td>2101</td>\n",
 113 |        "      <td>66</td>\n",
 114 |        "      <td>Male</td>\n",
 115 |        "      <td>0.0</td>\n",
 116 |        "      <td>self_employed</td>\n",
 117 |        "      <td>187.0</td>\n",
 118 |        "      <td>2</td>\n",
 119 |        "      <td>755</td>\n",
 120 |        "      <td>1458.71</td>\n",
 121 |        "      <td>...</td>\n",
 122 |        "      <td>0.20</td>\n",
 123 |        "      <td>0.20</td>\n",
 124 |        "      <td>0.20</td>\n",
 125 |        "      <td>1458.71</td>\n",
 126 |        "      <td>1458.71</td>\n",
 127 |        "      <td>0</td>\n",
 128 |        "      <td>141.0</td>\n",
 129 |        "      <td>21.0</td>\n",
 130 |        "      <td>5.0</td>\n",
 131 |        "      <td>1.0</td>\n",
 132 |        "    </tr>\n",
 133 |        "    <tr>\n",
 134 |        "      <th>1</th>\n",
 135 |        "      <td>2</td>\n",
 136 |        "      <td>2348</td>\n",
 137 |        "      <td>35</td>\n",
 138 |        "      <td>Male</td>\n",
 139 |        "      <td>0.0</td>\n",
 140 |        "      <td>self_employed</td>\n",
 141 |        "      <td>NaN</td>\n",
 142 |        "      <td>2</td>\n",
 143 |        "      <td>3214</td>\n",
 144 |        "      <td>5390.37</td>\n",
 145 |        "      <td>...</td>\n",
 146 |        "      <td>0.56</td>\n",
 147 |        "      <td>5486.27</td>\n",
 148 |        "      <td>100.56</td>\n",
 149 |        "      <td>6496.78</td>\n",
 150 |        "      <td>8787.61</td>\n",
 151 |        "      <td>0</td>\n",
 152 |        "      <td>305.0</td>\n",
 153 |        "      <td>44.0</td>\n",
 154 |        "      <td>11.0</td>\n",
 155 |        "      <td>4.0</td>\n",
 156 |        "    </tr>\n",
 157 |        "    <tr>\n",
 158 |        "      <th>2</th>\n",
 159 |        "      <td>4</td>\n",
 160 |        "      <td>2194</td>\n",
 161 |        "      <td>31</td>\n",
 162 |        "      <td>Male</td>\n",
 163 |        "      <td>0.0</td>\n",
 164 |        "      <td>salaried</td>\n",
 165 |        "      <td>146.0</td>\n",
 166 |        "      <td>2</td>\n",
 167 |        "      <td>41</td>\n",
 168 |        "      <td>3913.16</td>\n",
 169 |        "      <td>...</td>\n",
 170 |        "      <td>0.61</td>\n",
 171 |        "      <td>6046.73</td>\n",
 172 |        "      <td>259.23</td>\n",
 173 |        "      <td>5006.28</td>\n",
 174 |        "      <td>5070.14</td>\n",
 175 |        "      <td>0</td>\n",
 176 |        "      <td>NaN</td>\n",
 177 |        "      <td>NaN</td>\n",
 178 |        "      <td>NaN</td>\n",
 179 |        "      <td>NaN</td>\n",
 180 |        "    </tr>\n",
 181 |        "    <tr>\n",
 182 |        "      <th>3</th>\n",
 183 |        "      <td>5</td>\n",
 184 |        "      <td>2329</td>\n",
 185 |        "      <td>90</td>\n",
 186 |        "      <td>NaN</td>\n",
 187 |        "      <td>NaN</td>\n",
 188 |        "      <td>self_employed</td>\n",
 189 |        "      <td>1020.0</td>\n",
 190 |        "      <td>2</td>\n",
 191 |        "      <td>582</td>\n",
 192 |        "      <td>2291.91</td>\n",
 193 |        "      <td>...</td>\n",
 194 |        "      <td>0.47</td>\n",
 195 |        "      <td>0.47</td>\n",
 196 |        "      <td>2143.33</td>\n",
 197 |        "      <td>2291.91</td>\n",
 198 |        "      <td>1669.79</td>\n",
 199 |        "      <td>1</td>\n",
 200 |        "      <td>218.0</td>\n",
 201 |        "      <td>32.0</td>\n",
 202 |        "      <td>8.0</td>\n",
 203 |        "      <td>1.0</td>\n",
 204 |        "    </tr>\n",
 205 |        "    <tr>\n",
 206 |        "      <th>4</th>\n",
 207 |        "      <td>6</td>\n",
 208 |        "      <td>1579</td>\n",
 209 |        "      <td>42</td>\n",
 210 |        "      <td>Male</td>\n",
 211 |        "      <td>2.0</td>\n",
 212 |        "      <td>self_employed</td>\n",
 213 |        "      <td>1494.0</td>\n",
 214 |        "      <td>3</td>\n",
 215 |        "      <td>388</td>\n",
 216 |        "      <td>927.72</td>\n",
 217 |        "      <td>...</td>\n",
 218 |        "      <td>714.61</td>\n",
 219 |        "      <td>588.62</td>\n",
 220 |        "      <td>1538.06</td>\n",
 221 |        "      <td>1157.15</td>\n",
 222 |        "      <td>1677.16</td>\n",
 223 |        "      <td>1</td>\n",
 224 |        "      <td>307.0</td>\n",
 225 |        "      <td>44.0</td>\n",
 226 |        "      <td>11.0</td>\n",
 227 |        "      <td>6.0</td>\n",
 228 |        "    </tr>\n",
 229 |        "  </tbody>\n",
 230 |        "</table>\n",
 231 |        "<p>5 rows × 24 columns</p>\n",
 232 |        "</div>"
 233 |       ],
 234 |       "text/plain": [
 235 |        "   customer_id  vintage  age gender  dependents     occupation    city  \\\n",
 236 |        "0            1     2101   66   Male         0.0  self_employed   187.0   \n",
 237 |        "1            2     2348   35   Male         0.0  self_employed     NaN   \n",
 238 |        "2            4     2194   31   Male         0.0       salaried   146.0   \n",
 239 |        "3            5     2329   90    NaN         NaN  self_employed  1020.0   \n",
 240 |        "4            6     1579   42   Male         2.0  self_employed  1494.0   \n",
 241 |        "\n",
 242 |        "   customer_nw_category  branch_code  current_balance  ...  \\\n",
 243 |        "0                     2          755          1458.71  ...   \n",
 244 |        "1                     2         3214          5390.37  ...   \n",
 245 |        "2                     2           41          3913.16  ...   \n",
 246 |        "3                     2          582          2291.91  ...   \n",
 247 |        "4                     3          388           927.72  ...   \n",
 248 |        "\n",
 249 |        "   previous_month_credit  current_month_debit  previous_month_debit  \\\n",
 250 |        "0                   0.20                 0.20                  0.20   \n",
 251 |        "1                   0.56              5486.27                100.56   \n",
 252 |        "2                   0.61              6046.73                259.23   \n",
 253 |        "3                   0.47                 0.47               2143.33   \n",
 254 |        "4                 714.61               588.62               1538.06   \n",
 255 |        "\n",
 256 |        "   current_month_balance  previous_month_balance  churn  doy_ls_tran  \\\n",
 257 |        "0                1458.71                 1458.71      0        141.0   \n",
 258 |        "1                6496.78                 8787.61      0        305.0   \n",
 259 |        "2                5006.28                 5070.14      0          NaN   \n",
 260 |        "3                2291.91                 1669.79      1        218.0   \n",
 261 |        "4                1157.15                 1677.16      1        307.0   \n",
 262 |        "\n",
 263 |        "   woy_ls_tran  moy_ls_tran  dow_ls_tran  \n",
 264 |        "0         21.0          5.0          1.0  \n",
 265 |        "1         44.0         11.0          4.0  \n",
 266 |        "2          NaN          NaN          NaN  \n",
 267 |        "3         32.0          8.0          1.0  \n",
 268 |        "4         44.0         11.0          6.0  \n",
 269 |        "\n",
 270 |        "[5 rows x 24 columns]"
 271 |       ]
 272 |      },
 273 |      "execution_count": 16,
 274 |      "metadata": {},
 275 |      "output_type": "execute_result"
 276 |     }
 277 |    ],
 278 |    "source": [
 279 |     "# First look\n",
 280 |     "data.head()"
 281 |    ]
 282 |   },
 283 |   {
 284 |    "cell_type": "code",
 285 |    "execution_count": 17,
 286 |    "metadata": {
 287 |     "colab": {
 288 |      "base_uri": "https://localhost:8080/",
 289 |      "height": 33
 290 |     },
 291 |     "colab_type": "code",
 292 |     "executionInfo": {
 293 |      "elapsed": 1196,
 294 |      "status": "ok",
 295 |      "timestamp": 1581056282914,
 296 |      "user": {
 297 |       "displayName": "Sharoon Saxena",
 298 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
 299 |       "userId": "01167841530696023488"
 300 |      },
 301 |      "user_tz": -330
 302 |     },
 303 |     "id": "VRCymO1kQ_oA",
 304 |     "outputId": "1fbba1c9-53da-4e3d-c6fd-edb0a8eed4bf"
 305 |    },
 306 |    "outputs": [
 307 |     {
 308 |      "data": {
 309 |       "text/plain": [
 310 |        "(28382, 24)"
 311 |       ]
 312 |      },
 313 |      "execution_count": 17,
 314 |      "metadata": {},
 315 |      "output_type": "execute_result"
 316 |     }
 317 |    ],
 318 |    "source": [
 319 |     "#shape of the data\n",
 320 |     "data.shape"
 321 |    ]
 322 |   },
 323 |   {
 324 |    "cell_type": "code",
 325 |    "execution_count": 18,
 326 |    "metadata": {
 327 |     "colab": {
 328 |      "base_uri": "https://localhost:8080/",
 329 |      "height": 163
 330 |     },
 331 |     "colab_type": "code",
 332 |     "executionInfo": {
 333 |      "elapsed": 1135,
 334 |      "status": "ok",
 335 |      "timestamp": 1581056285543,
 336 |      "user": {
 337 |       "displayName": "Sharoon Saxena",
 338 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
 339 |       "userId": "01167841530696023488"
 340 |      },
 341 |      "user_tz": -330
 342 |     },
 343 |     "id": "0zQJF63XQ_oD",
 344 |     "outputId": "de4bdf95-994e-4964-d223-b01626604ea8"
 345 |    },
 346 |    "outputs": [
 347 |     {
 348 |      "data": {
 349 |       "text/plain": [
 350 |        "Index(['customer_id', 'vintage', 'age', 'gender', 'dependents', 'occupation',\n",
 351 |        "       'city', 'customer_nw_category', 'branch_code', 'current_balance',\n",
 352 |        "       'previous_month_end_balance', 'average_monthly_balance_prevQ',\n",
 353 |        "       'average_monthly_balance_prevQ2', 'current_month_credit',\n",
 354 |        "       'previous_month_credit', 'current_month_debit', 'previous_month_debit',\n",
 355 |        "       'current_month_balance', 'previous_month_balance', 'churn',\n",
 356 |        "       'doy_ls_tran', 'woy_ls_tran', 'moy_ls_tran', 'dow_ls_tran'],\n",
 357 |        "      dtype='object')"
 358 |       ]
 359 |      },
 360 |      "execution_count": 18,
 361 |      "metadata": {},
 362 |      "output_type": "execute_result"
 363 |     }
 364 |    ],
 365 |    "source": [
 366 |     "#columns of data\n",
 367 |     "data.columns"
 368 |    ]
 369 |   },
 370 |   {
 371 |    "cell_type": "markdown",
 372 |    "metadata": {},
 373 |    "source": [
 374 |     "### Identificaiton of Data types "
 375 |    ]
 376 |   },
 377 |   {
 378 |    "cell_type": "code",
 379 |    "execution_count": 19,
 380 |    "metadata": {
 381 |     "colab": {
 382 |      "base_uri": "https://localhost:8080/",
 383 |      "height": 440
 384 |     },
 385 |     "colab_type": "code",
 386 |     "executionInfo": {
 387 |      "elapsed": 964,
 388 |      "status": "ok",
 389 |      "timestamp": 1581056286919,
 390 |      "user": {
 391 |       "displayName": "Sharoon Saxena",
 392 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
 393 |       "userId": "01167841530696023488"
 394 |      },
 395 |      "user_tz": -330
 396 |     },
 397 |     "id": "IoHu82GiQ_oG",
 398 |     "outputId": "6fed1d1f-4fc8-42a2-ab9e-88dcc4f67557"
 399 |    },
 400 |    "outputs": [
 401 |     {
 402 |      "data": {
 403 |       "text/plain": [
 404 |        "customer_id                         int64\n",
 405 |        "vintage                             int64\n",
 406 |        "age                                 int64\n",
 407 |        "gender                             object\n",
 408 |        "dependents                        float64\n",
 409 |        "occupation                         object\n",
 410 |        "city                              float64\n",
 411 |        "customer_nw_category                int64\n",
 412 |        "branch_code                         int64\n",
 413 |        "current_balance                   float64\n",
 414 |        "previous_month_end_balance        float64\n",
 415 |        "average_monthly_balance_prevQ     float64\n",
 416 |        "average_monthly_balance_prevQ2    float64\n",
 417 |        "current_month_credit              float64\n",
 418 |        "previous_month_credit             float64\n",
 419 |        "current_month_debit               float64\n",
 420 |        "previous_month_debit              float64\n",
 421 |        "current_month_balance             float64\n",
 422 |        "previous_month_balance            float64\n",
 423 |        "churn                               int64\n",
 424 |        "doy_ls_tran                       float64\n",
 425 |        "woy_ls_tran                       float64\n",
 426 |        "moy_ls_tran                       float64\n",
 427 |        "dow_ls_tran                       float64\n",
 428 |        "dtype: object"
 429 |       ]
 430 |      },
 431 |      "execution_count": 19,
 432 |      "metadata": {},
 433 |      "output_type": "execute_result"
 434 |     }
 435 |    ],
 436 |    "source": [
 437 |     "#different data types\n",
 438 |     "data.dtypes"
 439 |    ]
 440 |   },
 441 |   {
 442 |    "cell_type": "markdown",
 443 |    "metadata": {},
 444 |    "source": [
 445 |     "### Mean Value"
 446 |    ]
 447 |   },
 448 |   {
 449 |    "cell_type": "code",
 450 |    "execution_count": 20,
 451 |    "metadata": {
 452 |     "colab": {
 453 |      "base_uri": "https://localhost:8080/",
 454 |      "height": 33
 455 |     },
 456 |     "colab_type": "code",
 457 |     "executionInfo": {
 458 |      "elapsed": 1018,
 459 |      "status": "ok",
 460 |      "timestamp": 1581056294995,
 461 |      "user": {
 462 |       "displayName": "Sharoon Saxena",
 463 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
 464 |       "userId": "01167841530696023488"
 465 |      },
 466 |      "user_tz": -330
 467 |     },
 468 |     "id": "5VbQGrVYQ_oJ",
 469 |     "outputId": "8ef087dd-dc40-4932-fb78-fa71905f2b85"
 470 |    },
 471 |    "outputs": [
 472 |     {
 473 |      "name": "stdout",
 474 |      "output_type": "stream",
 475 |      "text": [
 476 |       "48.208336269466564\n",
 477 |       "47.461216730038025\n"
 478 |      ]
 479 |     }
 480 |    ],
 481 |    "source": [
 482 |     "#mean of age\n",
 483 |     "print(data['age'].mean())\n",
 484 |     "\n",
 485 |     "#mean of age who are likely to churn\n",
 486 |     "print(data[data['churn'] == 1]['age'].mean())"
 487 |    ]
 488 |   },
 489 |   {
 490 |    "cell_type": "markdown",
 491 |    "metadata": {},
 492 |    "source": [
 493 |     "The __mean value of Age of Customers__ and the __mean value of Age of Customers who are likely to churn__ is around 48"
 494 |    ]
 495 |   },
 496 |   {
 497 |    "cell_type": "code",
 498 |    "execution_count": 21,
 499 |    "metadata": {
 500 |     "colab": {
 501 |      "base_uri": "https://localhost:8080/",
 502 |      "height": 33
 503 |     },
 504 |     "colab_type": "code",
 505 |     "executionInfo": {
 506 |      "elapsed": 781,
 507 |      "status": "ok",
 508 |      "timestamp": 1581056297085,
 509 |      "user": {
 510 |       "displayName": "Sharoon Saxena",
 511 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
 512 |       "userId": "01167841530696023488"
 513 |      },
 514 |      "user_tz": -330
 515 |     },
 516 |     "id": "WvLj2Fo9Q_oR",
 517 |     "outputId": "05b175ec-f135-471c-f91f-d348d2ecd3a7"
 518 |    },
 519 |    "outputs": [
 520 |     {
 521 |      "name": "stdout",
 522 |      "output_type": "stream",
 523 |      "text": [
 524 |       "7380.55180360792\n",
 525 |       "5220.884321292776\n"
 526 |      ]
 527 |     }
 528 |    ],
 529 |    "source": [
 530 |     "#current balance mean\n",
 531 |     "print(data['current_balance'].mean())\n",
 532 |     "\n",
 533 |     "#current balance of customers who are likely to churn\n",
 534 |     "print(data[data['churn']==1]['current_balance'].mean())"
 535 |    ]
 536 |   },
 537 |   {
 538 |    "cell_type": "markdown",
 539 |    "metadata": {},
 540 |    "source": [
 541 |     "*  The __mean value of current balance of Customers___  is __7380.55180360792__ \n",
 542 |     "*  The __mean value of Current Balance of Customers__ who are likely to churn is __5220.884321292776__."
 543 |    ]
 544 |   },
 545 |   {
 546 |    "cell_type": "markdown",
 547 |    "metadata": {},
 548 |    "source": [
 549 |     "### Median"
 550 |    ]
 551 |   },
 552 |   {
 553 |    "cell_type": "code",
 554 |    "execution_count": 22,
 555 |    "metadata": {
 556 |     "colab": {
 557 |      "base_uri": "https://localhost:8080/",
 558 |      "height": 33
 559 |     },
 560 |     "colab_type": "code",
 561 |     "executionInfo": {
 562 |      "elapsed": 659,
 563 |      "status": "ok",
 564 |      "timestamp": 1581056300650,
 565 |      "user": {
 566 |       "displayName": "Sharoon Saxena",
 567 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
 568 |       "userId": "01167841530696023488"
 569 |      },
 570 |      "user_tz": -330
 571 |     },
 572 |     "id": "aHIkif6QQ_oW",
 573 |     "outputId": "cbf009f7-8780-425e-b2ec-3c703be9759b"
 574 |    },
 575 |    "outputs": [
 576 |     {
 577 |      "data": {
 578 |       "text/plain": [
 579 |        "46.0"
 580 |       ]
 581 |      },
 582 |      "execution_count": 22,
 583 |      "metadata": {},
 584 |      "output_type": "execute_result"
 585 |     }
 586 |    ],
 587 |    "source": [
 588 |     "#median of age\n",
 589 |     "data['age'].median()"
 590 |    ]
 591 |   },
 592 |   {
 593 |    "cell_type": "markdown",
 594 |    "metadata": {},
 595 |    "source": [
 596 |     "*  __Median__ of age is 46. \n",
 597 |     "* That means 46 is the __50th percentile__ of the age of customers."
 598 |    ]
 599 |   },
 600 |   {
 601 |    "cell_type": "markdown",
 602 |    "metadata": {},
 603 |    "source": [
 604 |     "### Standard Deviation and Variance"
 605 |    ]
 606 |   },
 607 |   {
 608 |    "cell_type": "code",
 609 |    "execution_count": 23,
 610 |    "metadata": {
 611 |     "colab": {
 612 |      "base_uri": "https://localhost:8080/",
 613 |      "height": 33
 614 |     },
 615 |     "colab_type": "code",
 616 |     "executionInfo": {
 617 |      "elapsed": 1079,
 618 |      "status": "ok",
 619 |      "timestamp": 1581056392277,
 620 |      "user": {
 621 |       "displayName": "Sharoon Saxena",
 622 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
 623 |       "userId": "01167841530696023488"
 624 |      },
 625 |      "user_tz": -330
 626 |     },
 627 |     "id": "_pRqupeJQ_ok",
 628 |     "outputId": "309e9107-c905-44ff-8e68-c0705e2907d4"
 629 |    },
 630 |    "outputs": [
 631 |     {
 632 |      "name": "stdout",
 633 |      "output_type": "stream",
 634 |      "text": [
 635 |       "42598.711923233204\n",
 636 |       "1814650257.5186107\n"
 637 |      ]
 638 |     }
 639 |    ],
 640 |    "source": [
 641 |     "print(data['current_balance'].std())\n",
 642 |     "print(data['current_balance'].var())"
 643 |    ]
 644 |   },
 645 |   {
 646 |    "cell_type": "markdown",
 647 |    "metadata": {},
 648 |    "source": [
 649 |     "*  __standard deviation__  for current balance of the customers is 42598.711923233204\n",
 650 |     "*  __variance__ for current balance of the customers is 1814650257.5186107"
 651 |    ]
 652 |   },
 653 |   {
 654 |    "cell_type": "markdown",
 655 |    "metadata": {},
 656 |    "source": [
 657 |     "### Describe Function"
 658 |    ]
 659 |   },
 660 |   {
 661 |    "cell_type": "markdown",
 662 |    "metadata": {
 663 |     "colab": {
 664 |      "base_uri": "https://localhost:8080/",
 665 |      "height": 33
 666 |     },
 667 |     "colab_type": "code",
 668 |     "executionInfo": {
 669 |      "elapsed": 659,
 670 |      "status": "ok",
 671 |      "timestamp": 1581056393920,
 672 |      "user": {
 673 |       "displayName": "Sharoon Saxena",
 674 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
 675 |       "userId": "01167841530696023488"
 676 |      },
 677 |      "user_tz": -330
 678 |     },
 679 |     "id": "TZ6WONNhQ_oo",
 680 |     "outputId": "c73c8bda-6079-4f12-85c1-068f1632537a"
 681 |    },
 682 |    "source": [
 683 |     "describe() function is used to view some basic __descriptive statistical details__ like __percentile, mean, std etc.__ of a data frame."
 684 |    ]
 685 |   },
 686 |   {
 687 |    "cell_type": "code",
 688 |    "execution_count": 24,
 689 |    "metadata": {
 690 |     "colab": {
 691 |      "base_uri": "https://localhost:8080/",
 692 |      "height": 297
 693 |     },
 694 |     "colab_type": "code",
 695 |     "executionInfo": {
 696 |      "elapsed": 1279,
 697 |      "status": "ok",
 698 |      "timestamp": 1581056402110,
 699 |      "user": {
 700 |       "displayName": "Sharoon Saxena",
 701 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
 702 |       "userId": "01167841530696023488"
 703 |      },
 704 |      "user_tz": -330
 705 |     },
 706 |     "id": "G2aklW5KQ_ow",
 707 |     "outputId": "3a64f669-809f-41b6-a077-06cf350af978"
 708 |    },
 709 |    "outputs": [
 710 |     {
 711 |      "data": {
 712 |       "text/html": [
 713 |        "<div>\n",
 714 |        "<style scoped>\n",
 715 |        "    .dataframe tbody tr th:only-of-type {\n",
 716 |        "        vertical-align: middle;\n",
 717 |        "    }\n",
 718 |        "\n",
 719 |        "    .dataframe tbody tr th {\n",
 720 |        "        vertical-align: top;\n",
 721 |        "    }\n",
 722 |        "\n",
 723 |        "    .dataframe thead th {\n",
 724 |        "        text-align: right;\n",
 725 |        "    }\n",
 726 |        "</style>\n",
 727 |        "<table border=\"1\" class=\"dataframe\">\n",
 728 |        "  <thead>\n",
 729 |        "    <tr style=\"text-align: right;\">\n",
 730 |        "      <th></th>\n",
 731 |        "      <th>customer_id</th>\n",
 732 |        "      <th>vintage</th>\n",
 733 |        "      <th>age</th>\n",
 734 |        "      <th>dependents</th>\n",
 735 |        "      <th>city</th>\n",
 736 |        "      <th>customer_nw_category</th>\n",
 737 |        "      <th>branch_code</th>\n",
 738 |        "      <th>current_balance</th>\n",
 739 |        "      <th>previous_month_end_balance</th>\n",
 740 |        "      <th>average_monthly_balance_prevQ</th>\n",
 741 |        "      <th>...</th>\n",
 742 |        "      <th>previous_month_credit</th>\n",
 743 |        "      <th>current_month_debit</th>\n",
 744 |        "      <th>previous_month_debit</th>\n",
 745 |        "      <th>current_month_balance</th>\n",
 746 |        "      <th>previous_month_balance</th>\n",
 747 |        "      <th>churn</th>\n",
 748 |        "      <th>doy_ls_tran</th>\n",
 749 |        "      <th>woy_ls_tran</th>\n",
 750 |        "      <th>moy_ls_tran</th>\n",
 751 |        "      <th>dow_ls_tran</th>\n",
 752 |        "    </tr>\n",
 753 |        "  </thead>\n",
 754 |        "  <tbody>\n",
 755 |        "    <tr>\n",
 756 |        "      <th>count</th>\n",
 757 |        "      <td>28382.000000</td>\n",
 758 |        "      <td>28382.000000</td>\n",
 759 |        "      <td>28382.000000</td>\n",
 760 |        "      <td>25919.000000</td>\n",
 761 |        "      <td>27579.000000</td>\n",
 762 |        "      <td>28382.000000</td>\n",
 763 |        "      <td>28382.000000</td>\n",
 764 |        "      <td>2.838200e+04</td>\n",
 765 |        "      <td>2.838200e+04</td>\n",
 766 |        "      <td>2.838200e+04</td>\n",
 767 |        "      <td>...</td>\n",
 768 |        "      <td>2.838200e+04</td>\n",
 769 |        "      <td>2.838200e+04</td>\n",
 770 |        "      <td>2.838200e+04</td>\n",
 771 |        "      <td>2.838200e+04</td>\n",
 772 |        "      <td>2.838200e+04</td>\n",
 773 |        "      <td>28382.000000</td>\n",
 774 |        "      <td>25159.000000</td>\n",
 775 |        "      <td>25159.000000</td>\n",
 776 |        "      <td>25159.000000</td>\n",
 777 |        "      <td>25159.000000</td>\n",
 778 |        "    </tr>\n",
 779 |        "    <tr>\n",
 780 |        "      <th>mean</th>\n",
 781 |        "      <td>15143.508667</td>\n",
 782 |        "      <td>2091.144105</td>\n",
 783 |        "      <td>48.208336</td>\n",
 784 |        "      <td>0.347236</td>\n",
 785 |        "      <td>796.109576</td>\n",
 786 |        "      <td>2.225530</td>\n",
 787 |        "      <td>925.975019</td>\n",
 788 |        "      <td>7.380552e+03</td>\n",
 789 |        "      <td>7.495771e+03</td>\n",
 790 |        "      <td>7.496780e+03</td>\n",
 791 |        "      <td>...</td>\n",
 792 |        "      <td>3.261694e+03</td>\n",
 793 |        "      <td>3.658745e+03</td>\n",
 794 |        "      <td>3.339761e+03</td>\n",
 795 |        "      <td>7.451133e+03</td>\n",
 796 |        "      <td>7.495177e+03</td>\n",
 797 |        "      <td>0.185329</td>\n",
 798 |        "      <td>295.045709</td>\n",
 799 |        "      <td>39.116300</td>\n",
 800 |        "      <td>10.142255</td>\n",
 801 |        "      <td>3.042728</td>\n",
 802 |        "    </tr>\n",
 803 |        "    <tr>\n",
 804 |        "      <th>std</th>\n",
 805 |        "      <td>8746.454456</td>\n",
 806 |        "      <td>272.676775</td>\n",
 807 |        "      <td>17.807163</td>\n",
 808 |        "      <td>0.997661</td>\n",
 809 |        "      <td>432.872102</td>\n",
 810 |        "      <td>0.660443</td>\n",
 811 |        "      <td>937.799129</td>\n",
 812 |        "      <td>4.259871e+04</td>\n",
 813 |        "      <td>4.252935e+04</td>\n",
 814 |        "      <td>4.172622e+04</td>\n",
 815 |        "      <td>...</td>\n",
 816 |        "      <td>2.968889e+04</td>\n",
 817 |        "      <td>5.198542e+04</td>\n",
 818 |        "      <td>2.430111e+04</td>\n",
 819 |        "      <td>4.203394e+04</td>\n",
 820 |        "      <td>4.243198e+04</td>\n",
 821 |        "      <td>0.388571</td>\n",
 822 |        "      <td>86.284356</td>\n",
 823 |        "      <td>15.889797</td>\n",
 824 |        "      <td>2.788671</td>\n",
 825 |        "      <td>1.712724</td>\n",
 826 |        "    </tr>\n",
 827 |        "    <tr>\n",
 828 |        "      <th>min</th>\n",
 829 |        "      <td>1.000000</td>\n",
 830 |        "      <td>73.000000</td>\n",
 831 |        "      <td>1.000000</td>\n",
 832 |        "      <td>0.000000</td>\n",
 833 |        "      <td>0.000000</td>\n",
 834 |        "      <td>1.000000</td>\n",
 835 |        "      <td>1.000000</td>\n",
 836 |        "      <td>-5.503960e+03</td>\n",
 837 |        "      <td>-3.149570e+03</td>\n",
 838 |        "      <td>1.428690e+03</td>\n",
 839 |        "      <td>...</td>\n",
 840 |        "      <td>1.000000e-02</td>\n",
 841 |        "      <td>1.000000e-02</td>\n",
 842 |        "      <td>1.000000e-02</td>\n",
 843 |        "      <td>-3.374180e+03</td>\n",
 844 |        "      <td>-5.171920e+03</td>\n",
 845 |        "      <td>0.000000</td>\n",
 846 |        "      <td>1.000000</td>\n",
 847 |        "      <td>1.000000</td>\n",
 848 |        "      <td>1.000000</td>\n",
 849 |        "      <td>0.000000</td>\n",
 850 |        "    </tr>\n",
 851 |        "    <tr>\n",
 852 |        "      <th>25%</th>\n",
 853 |        "      <td>7557.250000</td>\n",
 854 |        "      <td>1958.000000</td>\n",
 855 |        "      <td>36.000000</td>\n",
 856 |        "      <td>0.000000</td>\n",
 857 |        "      <td>409.000000</td>\n",
 858 |        "      <td>2.000000</td>\n",
 859 |        "      <td>176.000000</td>\n",
 860 |        "      <td>1.784470e+03</td>\n",
 861 |        "      <td>1.906000e+03</td>\n",
 862 |        "      <td>2.180945e+03</td>\n",
 863 |        "      <td>...</td>\n",
 864 |        "      <td>3.300000e-01</td>\n",
 865 |        "      <td>4.100000e-01</td>\n",
 866 |        "      <td>4.100000e-01</td>\n",
 867 |        "      <td>1.996765e+03</td>\n",
 868 |        "      <td>2.074408e+03</td>\n",
 869 |        "      <td>0.000000</td>\n",
 870 |        "      <td>270.000000</td>\n",
 871 |        "      <td>33.000000</td>\n",
 872 |        "      <td>9.000000</td>\n",
 873 |        "      <td>1.000000</td>\n",
 874 |        "    </tr>\n",
 875 |        "    <tr>\n",
 876 |        "      <th>50%</th>\n",
 877 |        "      <td>15150.500000</td>\n",
 878 |        "      <td>2154.000000</td>\n",
 879 |        "      <td>46.000000</td>\n",
 880 |        "      <td>0.000000</td>\n",
 881 |        "      <td>834.000000</td>\n",
 882 |        "      <td>2.000000</td>\n",
 883 |        "      <td>572.000000</td>\n",
 884 |        "      <td>3.281255e+03</td>\n",
 885 |        "      <td>3.379915e+03</td>\n",
 886 |        "      <td>3.542865e+03</td>\n",
 887 |        "      <td>...</td>\n",
 888 |        "      <td>6.300000e-01</td>\n",
 889 |        "      <td>9.193000e+01</td>\n",
 890 |        "      <td>1.099600e+02</td>\n",
 891 |        "      <td>3.447995e+03</td>\n",
 892 |        "      <td>3.465235e+03</td>\n",
 893 |        "      <td>0.000000</td>\n",
 894 |        "      <td>335.000000</td>\n",
 895 |        "      <td>47.000000</td>\n",
 896 |        "      <td>12.000000</td>\n",
 897 |        "      <td>3.000000</td>\n",
 898 |        "    </tr>\n",
 899 |        "    <tr>\n",
 900 |        "      <th>75%</th>\n",
 901 |        "      <td>22706.750000</td>\n",
 902 |        "      <td>2292.000000</td>\n",
 903 |        "      <td>60.000000</td>\n",
 904 |        "      <td>0.000000</td>\n",
 905 |        "      <td>1096.000000</td>\n",
 906 |        "      <td>3.000000</td>\n",
 907 |        "      <td>1440.000000</td>\n",
 908 |        "      <td>6.635820e+03</td>\n",
 909 |        "      <td>6.656535e+03</td>\n",
 910 |        "      <td>6.666887e+03</td>\n",
 911 |        "      <td>...</td>\n",
 912 |        "      <td>7.492350e+02</td>\n",
 913 |        "      <td>1.360435e+03</td>\n",
 914 |        "      <td>1.357553e+03</td>\n",
 915 |        "      <td>6.667958e+03</td>\n",
 916 |        "      <td>6.654693e+03</td>\n",
 917 |        "      <td>0.000000</td>\n",
 918 |        "      <td>354.000000</td>\n",
 919 |        "      <td>50.000000</td>\n",
 920 |        "      <td>12.000000</td>\n",
 921 |        "      <td>5.000000</td>\n",
 922 |        "    </tr>\n",
 923 |        "    <tr>\n",
 924 |        "      <th>max</th>\n",
 925 |        "      <td>30301.000000</td>\n",
 926 |        "      <td>2476.000000</td>\n",
 927 |        "      <td>90.000000</td>\n",
 928 |        "      <td>52.000000</td>\n",
 929 |        "      <td>1649.000000</td>\n",
 930 |        "      <td>3.000000</td>\n",
 931 |        "      <td>4782.000000</td>\n",
 932 |        "      <td>5.905904e+06</td>\n",
 933 |        "      <td>5.740439e+06</td>\n",
 934 |        "      <td>5.700290e+06</td>\n",
 935 |        "      <td>...</td>\n",
 936 |        "      <td>2.361808e+06</td>\n",
 937 |        "      <td>7.637857e+06</td>\n",
 938 |        "      <td>1.414168e+06</td>\n",
 939 |        "      <td>5.778185e+06</td>\n",
 940 |        "      <td>5.720144e+06</td>\n",
 941 |        "      <td>1.000000</td>\n",
 942 |        "      <td>365.000000</td>\n",
 943 |        "      <td>52.000000</td>\n",
 944 |        "      <td>12.000000</td>\n",
 945 |        "      <td>6.000000</td>\n",
 946 |        "    </tr>\n",
 947 |        "  </tbody>\n",
 948 |        "</table>\n",
 949 |        "<p>8 rows × 22 columns</p>\n",
 950 |        "</div>"
 951 |       ],
 952 |       "text/plain": [
 953 |        "        customer_id       vintage           age    dependents          city  \\\n",
 954 |        "count  28382.000000  28382.000000  28382.000000  25919.000000  27579.000000   \n",
 955 |        "mean   15143.508667   2091.144105     48.208336      0.347236    796.109576   \n",
 956 |        "std     8746.454456    272.676775     17.807163      0.997661    432.872102   \n",
 957 |        "min        1.000000     73.000000      1.000000      0.000000      0.000000   \n",
 958 |        "25%     7557.250000   1958.000000     36.000000      0.000000    409.000000   \n",
 959 |        "50%    15150.500000   2154.000000     46.000000      0.000000    834.000000   \n",
 960 |        "75%    22706.750000   2292.000000     60.000000      0.000000   1096.000000   \n",
 961 |        "max    30301.000000   2476.000000     90.000000     52.000000   1649.000000   \n",
 962 |        "\n",
 963 |        "       customer_nw_category   branch_code  current_balance  \\\n",
 964 |        "count          28382.000000  28382.000000     2.838200e+04   \n",
 965 |        "mean               2.225530    925.975019     7.380552e+03   \n",
 966 |        "std                0.660443    937.799129     4.259871e+04   \n",
 967 |        "min                1.000000      1.000000    -5.503960e+03   \n",
 968 |        "25%                2.000000    176.000000     1.784470e+03   \n",
 969 |        "50%                2.000000    572.000000     3.281255e+03   \n",
 970 |        "75%                3.000000   1440.000000     6.635820e+03   \n",
 971 |        "max                3.000000   4782.000000     5.905904e+06   \n",
 972 |        "\n",
 973 |        "       previous_month_end_balance  average_monthly_balance_prevQ  ...  \\\n",
 974 |        "count                2.838200e+04                   2.838200e+04  ...   \n",
 975 |        "mean                 7.495771e+03                   7.496780e+03  ...   \n",
 976 |        "std                  4.252935e+04                   4.172622e+04  ...   \n",
 977 |        "min                 -3.149570e+03                   1.428690e+03  ...   \n",
 978 |        "25%                  1.906000e+03                   2.180945e+03  ...   \n",
 979 |        "50%                  3.379915e+03                   3.542865e+03  ...   \n",
 980 |        "75%                  6.656535e+03                   6.666887e+03  ...   \n",
 981 |        "max                  5.740439e+06                   5.700290e+06  ...   \n",
 982 |        "\n",
 983 |        "       previous_month_credit  current_month_debit  previous_month_debit  \\\n",
 984 |        "count           2.838200e+04         2.838200e+04          2.838200e+04   \n",
 985 |        "mean            3.261694e+03         3.658745e+03          3.339761e+03   \n",
 986 |        "std             2.968889e+04         5.198542e+04          2.430111e+04   \n",
 987 |        "min             1.000000e-02         1.000000e-02          1.000000e-02   \n",
 988 |        "25%             3.300000e-01         4.100000e-01          4.100000e-01   \n",
 989 |        "50%             6.300000e-01         9.193000e+01          1.099600e+02   \n",
 990 |        "75%             7.492350e+02         1.360435e+03          1.357553e+03   \n",
 991 |        "max             2.361808e+06         7.637857e+06          1.414168e+06   \n",
 992 |        "\n",
 993 |        "       current_month_balance  previous_month_balance         churn  \\\n",
 994 |        "count           2.838200e+04            2.838200e+04  28382.000000   \n",
 995 |        "mean            7.451133e+03            7.495177e+03      0.185329   \n",
 996 |        "std             4.203394e+04            4.243198e+04      0.388571   \n",
 997 |        "min            -3.374180e+03           -5.171920e+03      0.000000   \n",
 998 |        "25%             1.996765e+03            2.074408e+03      0.000000   \n",
 999 |        "50%             3.447995e+03            3.465235e+03      0.000000   \n",
1000 |        "75%             6.667958e+03            6.654693e+03      0.000000   \n",
1001 |        "max             5.778185e+06            5.720144e+06      1.000000   \n",
1002 |        "\n",
1003 |        "        doy_ls_tran   woy_ls_tran   moy_ls_tran   dow_ls_tran  \n",
1004 |        "count  25159.000000  25159.000000  25159.000000  25159.000000  \n",
1005 |        "mean     295.045709     39.116300     10.142255      3.042728  \n",
1006 |        "std       86.284356     15.889797      2.788671      1.712724  \n",
1007 |        "min        1.000000      1.000000      1.000000      0.000000  \n",
1008 |        "25%      270.000000     33.000000      9.000000      1.000000  \n",
1009 |        "50%      335.000000     47.000000     12.000000      3.000000  \n",
1010 |        "75%      354.000000     50.000000     12.000000      5.000000  \n",
1011 |        "max      365.000000     52.000000     12.000000      6.000000  \n",
1012 |        "\n",
1013 |        "[8 rows x 22 columns]"
1014 |       ]
1015 |      },
1016 |      "execution_count": 24,
1017 |      "metadata": {},
1018 |      "output_type": "execute_result"
1019 |     }
1020 |    ],
1021 |    "source": [
1022 |     "data.describe(include=[int, float])"
1023 |    ]
1024 |   },
1025 |   {
1026 |    "cell_type": "markdown",
1027 |    "metadata": {},
1028 |    "source": [
1029 |     "* count represents total number of data points.\n",
1030 |     "* mean represents average value\n",
1031 |     "* std represents standard deviation\n",
1032 |     "* min represents the minimum value of the column\n",
1033 |     "* 25% represents 25th percentile that means 25% data fall below this value.\n",
1034 |     "* 50% represents 50th percentile that means 50% data fall below this value. This is __Median__\n",
1035 |     "* 75% represents the 75th percentile here\n",
1036 |     "* max represents the maximum value of the column"
1037 |    ]
1038 |   },
1039 |   {
1040 |    "cell_type": "code",
1041 |    "execution_count": 25,
1042 |    "metadata": {
1043 |     "colab": {},
1044 |     "colab_type": "code",
1045 |     "id": "RFier7inQ_o1"
1046 |    },
1047 |    "outputs": [],
1048 |    "source": [
1049 |     "churn_age = data[data['churn']==1]['age']"
1050 |    ]
1051 |   },
1052 |   {
1053 |    "cell_type": "markdown",
1054 |    "metadata": {},
1055 |    "source": [
1056 |     "A __Histogram__ visualises the distribution of data over a continuous interval. \n",
1057 |     "Each bar in a histogram represents the tabulated __frequency__ at each __interval/bin__. \n",
1058 |     "In simple words height represents the frequency for respective bin (interval)"
1059 |    ]
1060 |   },
1061 |   {
1062 |    "cell_type": "code",
1063 |    "execution_count": 26,
1064 |    "metadata": {
1065 |     "colab": {
1066 |      "base_uri": "https://localhost:8080/",
1067 |      "height": 573
1068 |     },
1069 |     "colab_type": "code",
1070 |     "executionInfo": {
1071 |      "elapsed": 1734,
1072 |      "status": "ok",
1073 |      "timestamp": 1581058880316,
1074 |      "user": {
1075 |       "displayName": "Sharoon Saxena",
1076 |       "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
1077 |       "userId": "01167841530696023488"
1078 |      },
1079 |      "user_tz": -330
1080 |     },
1081 |     "id": "gi_dN-BNQ_o7",
1082 |     "outputId": "97e6261e-6b33-4050-a41b-74a0ec8852c0"
1083 |    },
1084 |    "outputs": [
1085 |     {
1086 |      "data": {
1087 |       "text/plain": [
1088 |        "<matplotlib.legend.Legend at 0x7f39dbebd4a8>"
1089 |       ]
1090 |      },
1091 |      "execution_count": 26,
1092 |      "metadata": {},
1093 |      "output_type": "execute_result"
1094 |     },
1095 |     {
1096 |      "data": {
1097 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAA9kAAAIcCAYAAADi5m1ZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAVhwAAFYcBshnuugAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nOzde7xWVZ348c8XQThxp0QhVDQtnXC8oCajjpImdtXMUnMqYzCLRglLxelHgWZhhKg5eY0ca7SmHLPRwlsiqeAFxtRSNBVTERIQEEIUWL8/9j7w8PA85wL7nOecw+f9eu3XPmevtfZaez+bw/metddakVJCkiRJkiRtvU61boAkSZIkSR2FQbYkSZIkSQUxyJYkSZIkqSAG2ZIkSZIkFcQgW5IkSZKkghhkS5IkSZJUEINsSZIkSZIKYpAtSZIkSVJBDLIlSZIkSSqIQbYkSZIkSQUxyJYkSZIkqSAG2ZIkSZIkFcQgW5IkSZKkghhkS1IbExEp3wbXui2SWk5EzM//rR9Zdvy0/PiM2rRMkrQ1DLIlSZIkSSqIQbYkSVLbshyYB/y11g2RJDVf51o3QJIkSRullG4Bbql1OyRJW8aebEmSJEmSCmKQLUktLDLHR8StEbEgItZExKKImB0R4yNiQANl94mIX+T510TEvIj4VkRsXyHvkflkSfMbON/1eZ4JDZWNiFMi4r6IWFo6MVNETMi/vz4itouIsRHxeET8PSJej4jbImLolt2pzdq6f0RcEBEPRMTLEfFWRCyJiN9HxOcjIhoo2ysiLo6I5yLizYh4JSJ+HBGDGptUKr+uL0bEPRGxOK/3lYj4r4jYt4hrK6tvQ3vyZ+XfIuKxiFgVEQsj4qcRsXNJ/qMi4o78XqyKiD9ExBGN1HFARPxnPtHWmxGxLCJm5nVv9rtA3o4PR8QVEfF/EfFa/vy9EhG/iojDG6hrw8R9zXl+t0ZbvIcl5Y6MiDvz/G9ExCMRMbKp11Mh7b0RMS4i7o2IF0va8mBEnFXt3kbJv/2IqIuIifnn8Wb++f4iIvZsqF1boux5ODh/fhZGxLrIfw7V8nmLiKGR/WxeEtnPsccj+7nWKX+eUkScVqXsgIj4fkQ8GREr82fp8Yj4dkT03Np7J6kdSym5ubm5ubXQBtQBvwZSvi0GHgH+AryVHzutrEx93i8Bq4E3gEeBV0rSflWhriPztPkNtOf6PM+EamWBqfnXC4GH83qPzPNNyNN+BtyRf/0s8BjwZv7934GDCrh3j+bnWwY8ld+30ntwY5Vy7wL+VJLvz8Dc/H6/BkzMj8+oULYvMLOk7Ct52RX5928BJ1ept77Mac28ztPq2wPcWHJPHy95Rl4A3gmMBtbnn82c/NlIwBrg0CrnPycvk/LreAx4qaS9twDblZXpkaetB/4G/DEvt7Tk+JcbuQ/Nen638llpc/cwLzeqpNyy/BlekH8/lezfWyL/91Xpeiqc81d52hv5NT5ccp76e7B9A//2L8nbv57s38aTwNts/Pm0a8GfTX27vp7XU/88zAO+XcvnDTiu5PlYmX8+z+ff/zK/lxX/TQNH5Z9p/bPzdL6tzY89DQws8l66ubm1n63mDXBzc3PryFvJL7avAycCnUrS6oDPAYeVlan/xfAt4GKgW0naZ0t+aR9eVu5Itj7IXksWLJ8KRJ4WQNf86wklbXsBGFpyjncBD+Tp9xVw7z4LDKlw/CDgmbyezQJe4L/ztBeBfUuODwD+UPJL9YwKZX+bp/2htG6yN7++BqzLf5F/b4WyWxtkvwW8CgwrSds9v88JuJXsDxijSj6b7mz8Y8f9Fc59Usnz9/my5+8gsiAtAePLym1PFrQMLDu+HfAZYFXe3p0buA/Nen638llpi/dwb7LgKwGXsfHfUOT3di0bg9sjq1xPpWf0OODg+vaXHN8LmJWXG9fAv/23yP64sEdJ2m5kQWEC/rOoz6XseVgL/KDseair1fNG9vOgPkj+GdCjJO1YsmC92h9C92DjH96+U1Z2ABt/jtxT5L10c3NrP1vNG+Dm5ubWUTfgH0t+AfxgM8rVl7m7Svpv8vRLyo4fydYH2Qn49wbKTyjJd3iF9APY2PPUuwXv7dF5Pb8rO757yS/Vh1Uo17/kl+MZVc75ItCnSr2X53l+VCHt5Xz7dDOv5bSSe7pZWbKe1/r0/6iQ/v6S9D4lxzuzsYfzk1XqHprfr9ep0PvZQJu/k5/3vKKe3618HtrcPQR+nJebW6XcNSV1HlnlemY0dN0VzrlHXu6pCmnX52lvAu+pkH5Cnr60qM+l7Hm4YyvOUfjzxsY3Wp4GOlco9+WSc59Wlvaz/PhlVersmf8sSMDBRd5PNze39rE5u7gktZwT8v2slNLvt6D8f1Q5Pgv4ONkv1C3huibkeTyl9IcKx/+PrPeuK/Aeslett1hE7AqcTBa8vys/LyX7/cuKjCDrKZyXUrq//Hwppb9FxK/J3iAod1K+vymltKxKk24GziR7VbT83IMauJSmeD2l9MsKx+eUfH1NhXr/FBFvAt3I7nl9/g8AuwILUzZb9WZSSnMi4kVgMFmwOKs0PSIOBj5J1ivbh42rkvTP9+X3v1Qtnt+2dA8/ku9/WKWtlwKnV7+U6iKiP9m/i4OBHfN2l85R8L6IqEspra5Q/I6U0nMVjte3u29E9EspLd2StjXgx41laOXn7dh8f31KaW2Fcv9J9hl1LT0YEV3yNgJcWanClNIbEXEX2R9LjiJ7pV/SNsQgW5Jazj75/sEtLP9MleOL8n1LTKyzOKX0tybkq9i2lFKKiL8BO7OV7YuIs4DJZK+SVvPOsu/fl+8fa6DM/1E5yK6f1OyEiDisStlu+X7nKulbo1LgA9kY1Xp/aSDPLmRjW+vVX09dRGz2B4cS9fdwZ/JAKyI6A9OofJ8qla2kFs9vm7iHEdEb2Ck/9qcqZeaRvULdrN/FIuJE4Cdl7dwsG9CPbFxyucY+F8g+m6KD7Gr3oVbPW4M/K1JKqyNiHtkbSaX2BN6Rf31dVJ9/cdd83xI/KyS1cQbZktRyeuX7ar2ijVlV5fj6fF/1t7utUK3O5uTb6vZFxDCycayQ9VD9J9nY1zdSSusiYneygKr8/7H6wOONBk5fLa1vvt8z3xpS10j6lqh2T9OGL1JqLE/pPa+/nt7AoU2o/x0lX3+DLOB5EzifbMzyX4G/539IGUnWM9mlgfO1pee3te9haUC3qFLG/DleQtYT3SQRMZjsVeWuZHMPXE72uvPylNLafJbzdXn2ap9NxetPKa0vCRhb+2dLLZ63Lf1Z0bfk6+b+u5K0jTDIlqSWsyLf92ml+ioFCeW6t0ZDCvCFfP+rlNK/VUiv1qO1Mt831EtaLa2+7MiU0k8aaV97UH89M1NKRzSz7Gn5/hsppUqv4TbUo9iRbOk9LA3OdiQb57+JiNiO5t/Hk8kC7IeBU1JK68vS2+vnclq+b83nbSXZH0+a+7Oi/plIQJeU0roKeSRt41wnW5JazuP5/p9aqb76npz+DeR5b2s0pAC75fuZVdIPqXJ8Xr5vaD3r/aocfyLfl78e2l7VX8/7G1rHuYotvf8dzRbdw5TScrIlwgD+oUq299H8zo76z+X+CgE2tN/PpRbPW4M/KyKijso/L58hm3cigCEt0C5JHYBBtiS1nJvJejuGRcSRrVDfX/L6ukXEZhMERcShtJ8A8u/5fkB5QkR0I5t8rJL6pZj2iojN/rgREe8Cjq9S9r/z/ecjosmv8LZh95OtyfxO4F+bWbah+78X2URS24KtuYe/y/fVntUxW9Cehj6XIHvtuj2qxfM2Pd9/IR8TXu5zbJyDYYN8Mrnb8m/PaYF2SeoADLIlqYWklJ4kWzYH4OaI+GSUDnqM6BYR/9LAJFvNrW8Z2frOAJdFxIZXLPOg+waydXnbg/vy/eiIOKj+YD6r8q+oMplQSul5oH526f+KiH1Kyu5E9oePiuOpU0q3AXeSTRh1b6XPJSJ2j4hzI2JUhbT5+XZiUy6wpaWU3mJjEPDDiPha3ju3QUT0iIhPRUT5jPL19/+7ETGgJP++wP+ycdxvoSJiQkSkiJjfEudvrq28h1PI/r0dEBGXRMT2ef6IiH8lC9orzWrdkPrP5dMR8dGSNvQkWxXg4Gaer0kiYnD+uaQW+oNhLZ63q4DlZDOZ/zgiNgyliYhjyNb0rvbz8ptkQwJOjYhr8p8tG0RE54g4IiKmRcS7W6Dtkto4g2xJallfJVuntR/wP8BrEfFwRDxLNmb7pxS7lNE5ZJMHHQ68HBGPRcQzZEtpPcvG3tq27lrgKbIxkw9FxLyImEu29uzRQKVx2vW+mpcdDPwxIv6Ul/0r2XrI38vzVfrF/STgbrJfvP8QEYvyz2tOPmv6c8DFQKXlunbNt4ZmfW5VKaUbga8B2wFTgaX5MzE7Iv5CFmT8iuyelhpPNvxgKPBCRPwxIp4mm4m5K3BBa11DrW3pPUwp/YmsFzsBY4FFEfEw2TN8HdnSXpVm/27Ib4AZZK+Z3xYRz0fEo2Svpp8GfGlLrrENaPXnLaX0KtncD2uBzwMLI+KRiHiO7I2YO9m4rNm6srLzgE8Ai8mWYXslIp6OiFkR8STZuO0ZwBdpeLI2SR2UQbYktaD81cLjgc+QvT66jmxMcE+ytXj/HxtfWyyivoeBw4DbyYLt9wFvAecCH6P5PWc1kc8AfTjZOrSvko3ZHADcQtZbd08DZReTjeGcDMwn+yPGTsCNZOttv5ZnXVGh7DKytbY/DdzKxs9r7zz/TcApwCVbd4WtJ6V0Gdlycv8BvEB2P4aS/TFgJnAe8KGyMk8Aw8juwWqy56gL2WzW+5N9Ji2hvhdzq9ZXL9qW3MO83NVkwffdZEH6P5C9fj4qpTR2C9qxnmz97Ul5OwaRLTt2L/DBlNJPm3vOJqr/XN4E/lz0yWv1vKWUbiX7WfG/ZL3WQ/L6zyH7g1t973alnxUzgL2AbwGPkP2MOQh4N/Ak2c+fQ6kw6Z2kji9SSo3nkiSpg4iI/wBGA1NTSmfXuj3aKCKeIgtcDk4pPVLr9igTEeeRBfY/Sil9tdbtaQ357O9LyZZi3Del9HgjRSRpA3uyJUnbjIjoTdZLDdVnMlYNRMQOZAH2PQbYbc7hZG/BTK51Q1rRZ8gC7CW0QO+9pI7NIFuS1KFERF1EXBARg8qO7072OuoOZK/a3l6L9qmq+onmJtW0FdpEvnTZocAvUkrza9ycQkXEsRFxUkR0LTkWEfFJ4Ef5oStTSu1imI2ktsPXxSVJLSoi7m9mkRNTSgsbz1a1vh5kM/8CPE82Brsf2TjaAF4HPpxSemhL65DU/kXEv5FNQPcm2frXbwK7A+/Ks9xL9rNiTW1aKKm9MsiWJLWoiGjufzS7bU2PWT6W8hyyCczeS7bG8XqyCYjuAKaklF7a0vNL6hjyNbi/ChxJNrlbb7I/0D1BNsnhj1NK7WXZQ0ltiEG2JEmSJEkFcUy2JEmSJEkFMciWJEmSJKkgBtmSJEmSJBXEIFuSJEmSpIJ0rnUDVF1EPA30JVuCRpIkSZLUenYHXk8p7dWcQgbZbVvfHj169B8yZEj/WjdEkiRJkrYlTz75JCtXrmx2OYPstu35IUOG9J81a1at2yFJkiRJ25Rhw4Yxe/bsZr9V7JhsSZIkSZIKYpAtSZIkSVJBDLIlSZIkSSqIQbYkSZIkSQUxyJYkSZIkqSAG2ZIkSZIkFcQgW5IkSZKkgrhOtiRJkqR2af369SxZsoQ33niDtWvXsn79+lo3SW1Up06d6Ny5Mz179uSd73wnnTq1XH+zQbYkSZKkdmfNmjX89a9/Ze3atRuORUQNW6S2bN26daxbt441a9awbNkydtllF7p27doidRlkS5IkSWp3Fi9ezNq1a6mrq2PHHXeka9euLdo7qfZt/fr1rFmzhkWLFrF69WoWL17Mu9/97hapy6dQkiRJUruzcuVKAAYNGkRdXZ0BthrUqVMn6urqGDRoELDx+WmRulrszJIkSZLUQlJKRASdO/tyrpquc+fORAQppRarwyBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohT8UmS1IjB426vdRParfmTPlrrJkiS1KoMsiVJkiSpmtWr4dFHYcUK6NULDjwQ6upq3Sq1YQbZkiRJklTulVdg6lSYNg1ef33j8b59YeRIOPtsGDiwdu1Tm+WYbEmSJEkq9dhjMHQoTJmyaYAN2fdTpsABB2T5amjGjBlEBBMmTODBBx9k+PDh9OzZkx122IHRo0ezevVqAG6//XaGDRtG9+7d2XHHHTn33HNZu3btZue79dZbOeqoo+jbty/dunVjyJAh/OAHP2DdunWb5Fu+fDkXX3wxRxxxBAMHDmT77bdn4MCBfP7zn+e5557b7LwTJkwgIpgxYwY33ngj++23H3V1dQwYMIAxY8ZsaGdHYZAtSZIkSfVeeQWOPRYWLWo436JFWb4FC1qnXQ146KGHOOqoo+jduzdnnHEGu+yyC1deeSWnn346v/jFLzjxxBPZddddOeOMM+jTpw+TJ0/mu9/97ibnOP/88zn++OOZN28eJ5xwAqNHj6auro5zzjmHk08+eZO8Tz31FN/61reoq6vjk5/8JF/72tc48MADufHGGzn44IN58cUXK7bziiuu4Etf+hLvf//7+cpXvkLfvn25/PLLGTVqVIvdm1rwdXFJkiRJqjd1auMBdr1Fi7L8kye3bJsaMX36dH79619z3HHHAfD2229vCHrvuOMOZs6cyUEHHQTAxIkT2WOPPbjssss4//zz6dKlC3fddReTJk1ixIgR3HzzzXTv3h2AlBKjR4/mqquu4uabb+ZTn/oUAHvvvTevvvoq/fr126Qd9957L0cffTTf+c53uPbaazdr5913382cOXN43/veB8BFF13Efvvtx89//nMmT57MwA7y+r092ZIkSZIE2SRn06Y1r8y0aVm5Gho+fPiGABugS5cunHjiiaSU+PjHP74hwAbo2bMnH/vYx1i6dCkvv/wykPUwA1xzzTUbAmyAiGDSpElEBDfddNOG4717994swK5vx/vf/37uvvvuiu0cM2bMhgAboK6ujlNOOYX169czZ86cLbz6tseebEmSJEmCbBbx8jHYjVm6FObMgcMOa5k2NcF+++232bEBAwY0mrZgwQJ22203Zs+eTffu3ZlW5Q8MdXV1PP3005scmzFjBpdeeikPPfQQixcv3mSM9/bbb1/xPEOHDt3s2KBBgwBYtmxZxTLtkUG2JEmSJEG2TNeWWL682HY0U69evTY71rlz50bT3n77bQCWLl3K2rVrmThxYtU6Vq1ateHrX/7yl5x00kn06NGDESNGMHjwYN7xjncQEVx//fVVx2Q31JbyydXaM4NsSZIkSYJsHewt0bt3se1oZb169SIiWLx4cZPyT5gwgW7dujFnzhz23HPPTdJ+/vOft0QT2xXHZEuSJEkSwIEHZutgN0e/ftlyX+3YBz7wAZYsWcKzzz7bpPzPPfcce++992YB9quvvsrzzz/fEk1sVwyyJUmSJAmgrg5GjmxemZEjs3Lt2FlnnQXAyJEjWbJkyWbpCxcu5Kmnntrw/a677spf/vIXFpXMwv7mm2/yla98ZcMr6Nsyg2xJkiRJqnf22bDjjk3Lu9NOMHZsy7anFRx77LGMHz+e+++/nz322INTTjmFcePGcfrppzN8+HAGDRrErbfeuiH/mWeeyYoVK9h///0566yzGD16NPvssw9PPvkk++67bw2vpG0wyJYkSZKkegMHwvTpjQfaO+2U5esgaztfcMEF3HXXXRx++OHcc889XHLJJdx2222sWbOGCRMmcOqpp27I+9WvfpWrrrqKfv36ce2113LLLbdwxBFHMGvWLPr06VPDq2gbIqVU6zaoioiYdcghhxwya9asWjdFkrZpg8fdXusmtFvzJ3201k2Q1EHVLym11157tUwFCxbA1KnZOthLl2483q9f9or42LEdJsDe1jT12Rk2bBizZ8+enVIa1pzzO7u4JEmSJJUbOBAmT4YLLsjWwV6+PJtFfOjQdj8GWy3LIFuSJEmSqqmrg8MOq3Ur1I44JluSJEmSpIIYZEuSJEmSVBCDbEmSJEmSCmKQLUmSJElSQQyyJUmSJEkqiEG2JEmSJEkFMciWJEmSJKkgBtmSJEmSJBXEIFuSJEmSpIIYZEuSJEmSVBCDbEmSJEmSCmKQLUmSJEkqxIwZM4gIJkyYsMnxwYMHM3jw4Jq0qbUZZEuSJEmSVJDOtW6AJEmSJLVVq99ezaMLHmXFmhX06tqLAwceSF2Xulo3q9255557at2EVmOQLUmSJEllXlnxClNnT2Xa/03j9Tdf33C8b7e+jNx/JGcPO5uBPQfWsIXty3ve855aN6HV+Lq4JEmSJJV4bOFjDL1mKFNmTdkkwAZ4/c3XmTJrCgdcfQCPLXysRi3MlI5/fvDBBxk+fDg9e/Zkhx12YPTo0axevRqA22+/nWHDhtG9e3d23HFHzj33XNauXbvZ+W699VaOOuoo+vbtS7du3RgyZAg/+MEPWLdu3WZ5V69ezbhx49h555035L322murtrXSmOwFCxbw7W9/m0MOOYT+/fvTtWtXBg8ezOjRo/nb3/622TlOO+00IoIXXniByy+/nL322ouuXbuy6667MnHiRNavX9/MO9gyDLIlSZIkKffKilc49mfHsmjVogbzLVq1iGN/diwL3ljQSi2r7qGHHuKoo46id+/enHHGGeyyyy5ceeWVnH766fziF7/gxBNPZNddd+WMM86gT58+TJ48me9+97ubnOP888/n+OOPZ968eZxwwgmMHj2auro6zjnnHE4++eRN8q5fv55PfOITXHzxxfTt25cxY8ZwyCGHMHbsWKZMmdLkds+cOZMpU6aw4447csopp3DmmWfynve8hyuvvJJhw4axfPnyiuXOOeccLrzwQoYNG8aXv/xlACZMmMD48eObeedahq+LS5IkSVJu6uypjQbY9RatWsTUWVOZfMzkFm5Vw6ZPn86vf/1rjjvuOADefvttDjzwQG688UbuuOMOZs6cyUEHHQTAxIkT2WOPPbjssss4//zz6dKlC3fddReTJk1ixIgR3HzzzXTv3h2AlBKjR4/mqquu4uabb+ZTn/oUADfccAN33303xx57LLfddhvbbbcdAGPGjOHAAw9scrs/+MEPsnDhQnr06LHJ8RtuuIEvfOELXHHFFXzzm9/crNzcuXN5/PHHGTBgAADjx49nzz335Ic//CHf/va32X777Zt5B4tlT7YkSZIkkU1yNu3/pjWrzLTHprH67dUt1KKmGT58+IYAG6BLly6ceOKJpJT4+Mc/viHABujZsycf+9jHWLp0KS+//DIAV1xxBQDXXHPNhgAbICKYNGkSEcFNN9204fgNN9wAwEUXXbQhwAbYZ599+NznPtfkdvfv33+zABvgc5/7HL169eLuu++uWG78+PEbAmyAd73rXRx33HG88cYbzJs3r8n1txR7siVpGzF43O21boIkSW3aowse3WwMdmOWrl7KnFfncNguh7VQqxq33377bXasPghtKG3BggXstttuzJ49m+7duzNtWuU/MNTV1fH0009v+P6Pf/wj3bt354ADDtgs7+GHH86Pf/zjJrf9f/7nf7j66quZO3cur7/++ibjvxcsqPwq/tChQzc7NmjQIACWLVvW5LpbSrsKsiPiX4DDgaHAPsD2wBdTStdXyd8LmAB8CtgJeBX4JTAxpbSyQv5OwFeBLwF7ACuBu4FvppSer1LHCODfgQOABMwBvpNS2nbmqJckSZI6gBVrVmxRueVvVh473Fp69eq12bHOnTs3mvb2228DsHTpUtauXcvEiROr1rFq1aoNXy9fvpydd965Yr4dd9yxye2eMmUK3/jGN9hhhx045phjGDRoEHV12fJol156KWvWrKlYrqFrqjRJW2trV0E28B1gV2AxWcC8a7WMEdEduA/YD7gTuAnYH/gGcERE/HNK6c2yYlcDo4A/AZcDA4HPAMdExCEppWfL6vgX4KfAa8D1+eGTgLsi4jMppV9t+aVKkiRJak29um4evDVF7269C25J6+rVqxcRweLFi5uUv3fv3rz22msV0xYtatp49rVr13LhhRcyYMAAHnvsMfr3778hLaXE97///Sadpy1qb2OyRwGDU0o7AFc1kvdcsgD74pTSiJTSuJTSCOBi4CBgbGnmiBien38mcEBK6byU0ueA44F+wBVl+fsCPyQL+A9IKZ2ZUjqTrEd7CXBlRPTcusuVJEmS1FoOHHggfbv1bVaZfnX9GDpg89eX25MPfOADLFmyhGeffbbxzMC+++7LqlWrmDt37mZpf/jDH5p0jsWLF7N8+XKGDRu2SYAN8Oijj25Yfqw9aldBdkrp7pTSi43li4ggC5hXAheWJV+YHx9Vdvz0fD8+pfRWSZ2/A2aQ9WbvUpL/00Af4IcppZdL8r9MFpC/C/hkEy5LkiRJUhtQ16WOkfuPbFaZkfuNpK5LXQu1qHWcddZZAIwcOZIlS5Zslr5w4UKeeuqpDd/XT272zW9+c5PXs5944gl++tOfNqnO/v37U1dXx9y5c/n73/++4fjrr7/OmWeeuUXX0Va0qyC7GfYke9X7gZTSqtKE/PsHgN0jonQgwZFAfVq5O/L9EWX5IXsVvSn5JUmSJLVxZw87mx27N21c8U49dmLssLGNZ2zjjj32WMaPH8/999/PHnvswSmnnMK4ceM4/fTTGT58OIMGDeLWW2/dkP8LX/gCRx99NNOnT2f//ffnvPPO4/TTT2fYsGEcc8wxTaqzU6dOjB49mvnz57Pvvvty9tlnM2rUKIYMGUKnTp0YOHBgS11ui+vIQTZAtfcdni3Nl4/fHgC8kFKqNFJ+k/xNqKNS/qoiYlalDRjSlPKSJEmSijGw50Cm/8v0RgPtnXrsxPRTpzOwZ/sNBktdcMEF3HXXXRx++OHcc889XHLJJdx2222sWbOGCRMmcOqpp27I26lTJ2699VbOPfdcli5dymWXXcaDDz7I1KlT+frXv97kOr/3ve9x0UUXERH86Ec/4q677uKUU07hzons33wAACAASURBVDvvpEuXLi1xma0iUkq1bsMWiYhxwPeoMLt4RHwW+C/gopTS/6tQ9iKyGcFPSCndEhEDgVfIer43m3s/Ij5E1mN9eUppTH7sGbIguktKaW1Z/i7AW8DjKaV9m3Ats6okDTnkkEN6zJpVLVmSms4lvFQL8yd9tNZNkNRB1S8ptddee7XI+Re8sYCps6Yy7bFpLF29dMPxfnX9GLnfSMYOG9thAuxtTVOfnWHDhjF79uzZKaVhzTl/e5tdvEOq9qHlwfchrdwcSZIkaZs3sOdAJh8zmQuGX8CcV+ew/M3l9O7Wm6EDhrb7MdhqWR01yK5fqK7aXPq9yvI1N395mfLZASrllyRJktTO1HWp47BdNnvZVaqqo47JbmxM9CbjqfPJ0F4FdouI7RrL34Q6GhsTLkmSJEnqgDpykL0AODSf1GyD/PtDySY5e6kk6T6gPq3ciHw/syw/QKXp80aU5ZEkSZIkbQM6ZJCdstncrgN6AOPLksfnx68tO35Nvr8wIravPxgRHyZbruvOsjW6/5vsdfAzI2JQSf5BwL8Bi4FbtvpiJEmSJEntRrsakx0Ro4D6ARH75PtREXFk/vX9KaXr8q+/DxwHnBcR+wNzgQPIep4fAS4tPXdK6d6IuA4YBcyNiNvJlvU6CVgKnFmW//WI+Dfgp3n+X+RJJwHvBE5KKb2x9VctSZIkSWov2lWQTRZgf6Hs2KFs+or3dZCNs46II4AJwKeA4WTjrqcAE1NKqyuc/wzgCeBLwBhgJVlv9DdTSs+VZ04p/SwiFpMtB/ZFIAFzgO+klO7ewmuUJEmS1IiIYP369aSUiIhaN0ftREqJlBKdOrXcS93tKshOKZ0GnNaM/MuBsfnWlPzrgcvzral1TAemNzW/JEmSpK3XtWtXVq9ezapVq+jRo0etm6N2YtWqVUD2/LSUDjkmW5IkSVLH1rNnTwAWLlzIypUryaZlkipLKbFy5UoWLlwIQK9evRopseXaVU+2JEmSJAH07duXVatWsWrVKl56KVs0yNfGVU3pH2G6d+9Onz59Wqwug2xJkiRJ7U6nTp0YNGgQy5YtY8WKFaxZs8bebFXVqVMnunbtSq9evejTp49jsiVJkiSpXKdOnejXrx/9+vWrdVOkDRyTLUmSJElSQezJliRJLWbwuNtr3YR2af6kj9a6CZKkLWRPtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkE6dJAdmRMi4t6IeDUi/h4R8yLi6ojYvUL+XhFxSUS8GBFrImJ+REyOiB5Vzt8pIs6MiCciYnVEvBYRN1U6tyRJkiSp4+vQQTbwA+Bm4H3Ar4EfAi8ApwOPRcSQ+owR0R24DxgLPA1MBeYB3wB+HxHdKpz/auByIPL9dOAE4JGI2LOFrkmSJEmS1EZ1rnUDWkpE7AR8DXgR2DeltLwkbSxwCXA2MDI/fC6wH3BxSmlcSd5JwHlkwff3So4PB0YBM4EPpZTeyo/fCPwWuAIY0VLXJ0mSJElqezpyT/Zgsut7oDTAzt2W73eA7LVysoB5JXBhWd4L8+Ojyo6fnu/H1wfYACml3wEzgGMiYpetuwRJkiRJUnvSkYPsZ4G3gEMjoldZ2sfy/T35fk9gIFlAvqo0Y/79A8DuEbFzSdKRQH1auTvy/RFb3HpJkiRJUrvTYV8XTyktiYhxwBTg6Yi4FVgB7At8EPgR2SvdkAXZkAXmlTxL9ur3nsBL+fjtAcCTKaV1VfKXnrdBETGrStKQKsclSZIkSW1Qhw2yAVJKUyPiFeA64MslSfcDN6aU1ubf98735a+V11tRlq+5+SVJkiRJ24CO/Lo4EfEt4GfAd4GdgZ7A4UA3YEZEfKKGzdsgpTSs0gY8Weu2SZIkSZKarsMG2RFxNDARuCKlNCml9HJKaWVK6X7g48DbZK+Sw8Ye6Wo9z73K8jU3vyRJkiRpG9Bhg2zgw/n+3vKElNJCsrWw94iIHjQ+hnqTMdv5ZGivArtFxHaN5ZckSZIkbRs6cpC9fb7foUr6DsB6sh7tZ4EFZDORdy/NlH9/KPBCSumlkqT7gPq0cvXrY8/csqZLkiRJktqjjhxk1y+tdXZEbPJad0R8GRgEzEoprUkpJbLJ0XoA48vOMz4/fm3Z8Wvy/YURUR/QExEfJlve686U0otFXIgkSZIkqX3oyLOL/xL4CvDPwDMR8RtgGXAA2RJeq4GzS/J/HzgOOC8i9gfm5nmPAR4BLi09eUrp3oi4DhgFzI2I28mW9ToJWAqc2XKXJkmSJElqizpsT3a+fvUxwPnAK8Bnga8B7yObcXxoSunhkvyrgCPIgum9ga8De5FNjnZUSml1hWrOAMbkX48BPgLcAhycUnqmBS5LkiRJktSGdeSebFJKa4BJ+daU/MuBsfnWlPzrgcvzTZIkSZK0jeuwPdmSJEmSJLU2g2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgrSudYNkCRJ0qYGj7u91k1ot+ZP+mitmyBpG2dPtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEKCbIj4ryI6F/EuSRJkiRJaq+K6sn+HvBSRPwqIkYUdE5JkiRJktqVooLsUcBc4ATgtxExPyLGR8Sggs4vSZIkSVKbV0iQnVKallIaBgwBLge6AxOBFyLifyPiExHh+G9JkiRJUodWaOCbUvpzSmksMBA4BZgBfAS4hex18u9ExO5F1ilJkiRJUlvRIr3LKaW3U0q/SCl9CDgMeBUYAPw78ExE/DYiPtASdUuSJEmSVCstEmRH5iMRcQtZb/ZA4EXgO8B0YATwQER8viXqlyRJkiSpFjoXebKI2AX4V+CLwLuB9cDtwNXA9JRSyvP9A3Ab8C3ghiLbIEmSJElSrRQSZEfEiWQzjB9N1jv+CnABcF1K6ZXy/CmlP0fET8leH5ckSZIkqUMoqif7v8l6re8ArgJuTymtb6TM08D9BdUvSZIkSVLNFRVkXwRcm1L6a1MLpJRuAm4qqH5JkiRJkmqukCA7pTS+iPNIkiRJktSeFTK7eET8U0RcEhE7VUkfkKcfUkR9kiRJkiS1RUUt4fV14OMppYWVElNKrwIfA8YWVJ8kSZIkSW1OUUH2QTQ+idlMwJ5sSZIkSVKHVVSQ3Z9s2a6GLMzzSZIkSZLUIRUVZC8Ddmkkz67AyoLqkyRJkiSpzSkqyJ4NfDIidq6UGBG7AMcDDxZUnyRJkiRJbU5RQfYlwDuAByLi8xExADbMKv4F4AGgDphSUH2SJEmSJLU5Ra2TPTMiziYLon8CEBEJiDzLemBMSmlmEfVJkiRJktQWFRJkA6SULouIe4Evk8023ptsrPbDwFUppSeLqkuSJEmSpLaosCAbIKX0ODC6yHNKkiRJktReFDUmW5IkSZKkbV6hPdkRsRMwFOgDbFcpT0rphiLrlCRJkiSprSgkyI6IbsC1wMlU7x0PIAEG2ZIkSZKkDqmonuxJwKnAM8BNwMvA2oLOLUmSJElSu1BUkP0Z4M/A0JTSmoLOKUmSJElSu1LUxGd9gOkG2JIkSZKkbVlRQfY8YMeCzlW4iPhkRNwVEUsi4s2IeCEiboqIncvy9YqISyLixYhYExHzI2JyRPSoct5OEXFmRDwREasj4rX8vLu3zpVJkiRJktqSooLsycBxEbFHQecrRGSuBv4H2A34OXAp8Afgn4BdS/J2B+4DxgJPA1PJ/njwDeD3+eRu5a4GLieb1O1yYDpwAvBIROzZQpclSZIkSWqjihqT/TJwB/BwRFwKzAVWVMqYUppZUJ1NcRbwJeBHwFkppXWliRFRev3nAvsBF6eUxpXkmQScRxZ8f6/k+HBgFDAT+FBK6a38+I3Ab4ErgBEtcE2SJEmSpDaqqCB7BtnyXAFMyL+upuL62UWLiDrg28DzwJjyABsgpbQ2zxtkAfNK4MKybBcCX83Tv1dy/PR8P74+wM7P+buImAEcExG7pJT+WswVSZIkSZLauqKC7AtoOLCuhWOAvsBPgO0i4hPAe4FlwN0ppb+U5N0TGAjckVJaVXqSlNKqiHgAGBERO6eUXsqTjgRWAQ9UqPuOPP0I4KeNNTQiZlVJGtJYWUmSJElS21FIkJ1SmlDEeQo2NN+vAx4nC7DrrY+IqSmlb+Tf14+ffrbKuZ4le/V7T+ClfPz2AODJSj3kJedxXLYkSZIkbUOK6slui/rn+7PJxogfDDwF7A9cA3w9Ip5LKV0J9M7zLq9yrvrx5b3L9k3N36CU0rBKx/Me7kOacg5JkiRJUu0VNbs4ABGxf0R8PyJ+ExF3lxzfNSI+ExH9iqyvEfXX9hZwfErpkZTSypTSH4BPA+uBr7dieyRJkiRJHVxhPdkR8X2yoDXyQ6VjtAO4MU+/rKg6G1Hfy/xoSmlBaUJK6cmIeB7YIyL6lOSt1vPcq+yczc0vSZIkSdoGFNKTHRFfJFtP+jbgH9l0Fm5SSvOBh4FPFFFfE83L98uqpNcfr6PxMdSbjNnOJ0d7FdgtIirNlt7YGG9JkiRJUgdU1Ovio8nGO38qpfQk2Sva5Z6mdScCuzff712eEBFdgD3IZgd/jSwYXgAcmk9qVpq3O3Ao8ELJzOIA9wH1aeXq18duzTXBJUmSJEk1VlSQ/Q/AXfXrTlexiI2TkbW4lNJzwJ1kr4SPKkseB/QBbkkprU0pJeA6oAcwvizv+Pz4tWXHr8n3F0bE9vUHI+LDZMt33ZlSerGIa5EkSZIktQ9FjcleC2zfSJ6BwMqC6muq0cCDwLURcTxZb/r+wAeBF4FzSvJ+HzgOOC8i9iebkfwAsvW2HwEuLT1xSuneiLgOGAXMjYjbyZb1OglYCpzZgtclSZIkSWqDiurJfgL4YJXxyUTEO4CjgTkF1dckeW/2gcD1ZOtmn0X2yvp/AAenlBaW5F0FHEEWTO9NNknbXsAU4KiU0uoKVZwBjMm/HgN8BLglP/czLXBJkiRJkqQ2rKie7Glkr1tfFRH/VpoQEb3ytJ3YGJC2mnwc9RebmHc5MDbfmpJ/PXB5vkmSJEmStnGFBNkppWkRcTTwr2SvSy8DiIiHyXqFuwPXp5R+VUR9kiRJkiS1RUW9Lk5K6bNkr0+/ALybbG3sA4G/Al9JKY0sqi5JkiRJktqiol4XByCldC3ZJGN1QF9gRUqptSc7kyRJkiSpJgoNsuvlk4RVmihMkiRJkqQOq7DXxSVJkiRJ2tYV0pMdEeuB1ISsKaXUIr3nkiRJkiTVWlEB70wqB9m9ydal7g78kXzWcUmSJEmSOqKilvA6slpaRLwDmAQcC3yoiPokSZIkSWqLWnxMdkrp7ymls4DlwOSWrk+SJEmSpFppzYnP/gB8tBXrkyRJkiSpVbVmkL0D0KMV65MkSZIkqVW1+EzfEdEJOBU4CXi0peuTJEmSJKlWilrC6/kGzt8f6AK8DZxfRH2SJEmSJLVFRfVkd6LyEl5vA08CjwBXpJT+VFB9kiRJkiS1OUUt4TW4iPNIkiRJktSetebEZ5IkSZIkdWgG2ZIkSZIkFaSoic9+v4VFU0rpqCLaIEmSJElSrRU18dmR+T4BUSG9oeOSJEmSJHUIRb0uXgfcBjwDfA4YnB8bDHw+P/6/QF1KqVPJtl1B9UuSJEmSVHNFBdkTgX2Ag1JK/5VS+mtKaU2+/xnwAWDfPJ8kSZIkSR1SUUH2Z4GbU0orKyWmlFYANwOnFFSfJEmSJEltTlFB9g5Al0bydAb6F1SfJEmSJEltTlFB9nPApyPinZUSI2IH4DPAXwqqT5IkSZKkNqeoIPtSYCdgbkSMiYihEbFzvv8aMIesF3tqQfVJkiRJktTmFLKEV0rpuogYAIwHLilLDmAdMCGlNK2I+iRJkiRJaouKWieblNKFEXEjcCrwj0BvYDnwR+DGlNJzRdUlSZIkSVJbVFiQDZAH0hcUeU5JkiRJktqLosZkbyIi+kXEzi1xbkmSJEmS2qrCguyI6B0Rl0XEIuA14IWStA9ExG8jYmhR9UmSJEmS1NYUEmRHRD/gIeBM4CXgKbIJz+o9DhxKNl5bkiRJkqQOqaie7AnAe4GTU0oHAr8sTUwprQbuAz5YUH2SJEmSJLU5RQXZnwBuSyn9dwN55gODCqpPkiRJkqQ2p6ggewDw50byrAG6F1SfJEmSJEltTlFB9hKgsdnE9wJeLag+SZIkSZLanKLWyZ4JHBcRg1JKL5cnRsQ/AMcCPymoPkmSJGkzg8fdXusmtFvzJ3201k2QOoSierIvArYDHoiIU4F3AUTE3hHxr8DvyV4Xn1xQfZIkSZIktTmF9GSnlJ6IiJOAnwI35IcDeDLfvwF8JqX0bBH1SZIkSZLUFhX1ujgppd9ExG7AF4APAP2AFWTrZ/8kpbS4qLokSZIkSWqLCguyAVJKS4GpRZ5TkiRJkqT2opAx2RGxLiL+q4hzSZIkSZLUXhU18dkK4KWCziVJkiRJUrtUVJD9MLBvQeeSJEmSJKldKirIngB8MCI+X9D5JEmSJElqd4qa+OxDwAzgJxFxJvAIsAhIZflSSunCguqUJEmSJKlNKSrInlDy9dB8qyQBBtmSJEmSpA5pi4LsiPgE8HRK6Zn80PDimiRJkiRJUvu0pT3ZtwATgQvy738CXJpSuryQVkmSJEmS1A5t6cRnbwNdSr4fDPTZ6tZIkiRJktSObWmQ/VfgsIjYruRY+SRnkiRJkiRtU7b0dfEbgW8BSyNiSX5sbER8sZFyKaX0ni2sU5IkSZKkNm1Lg+zvAG8CHwUGkvViR741pLF0SZIkSZLarS0KslNKa4FJ+UZErAemppQuaLCgJEmSJEkd2JaOyS43EZhR0LlaVEScFxEp3w6pkN4rIi6JiBcjYk1EzI+IyRHRo8r5OkXEmRHxRESsjojXIuKmiNi95a9GkiRJktSWFBJkp5QmppRmFnGulhQRQ8j+ILCqSnp34D5gLPA0MBWYB3wD+H1EdKtQ7GrgcrJX4S8HpgMnAI9ExJ5FX4MkSZIkqe0qqie7zYuILsB/Ao+RrfNdybnAfsDFKaURKaVxKaURwMXAQWTBd+k5hwOjgJnAASml81JKnwOOB/oBV7TIxUiSJEmS2qRtJsgGvgm8HxgJrCtPjIggC5hXAheWJV+YHx9Vdvz0fD8+pfRW/cGU0u/IXp8/JiJ2KaLxkiRJkqS2b5sIsiPiALIge2JK6c9Vsu1JNlP6AymlTV4nz79/ANg9InYuSTqS7NXzByqc7458f8RWNF2SJEmS1I5s6RJe7UZEdAVuIHtN/PsNZK0fP/1slfRngRF5vpfy8dsDgCdTSpv1jJecp9Fx2RExq0rSkMbKSpIkSZLajg4fZAMXkAW6Q6sEw/V65/vlVdJXlOVrbn5JkiRJUgfXoYPsiBhGNjP4hJTSk7VuTzUppWGVjuc93JstMyZJkiRJaps6bJAdEZ3JZhN/HJjUhCL1PdLVep57leVrbn5JBRg87vZaN0GSJEmqqsMG2UAPNo6HfiubPHwzs/LjnwTqJ0SrNoZ6kzHbKaVVEfEqsFtEbFfhVfTGxnhLkiRJkjqYjhxkrwF+XCXtn8mC4N8ArwHzyYLhBcChEdG9dIbxfJKzQ4EXUkovlZznPuDkPG1mWR0j8n35cUmSJElSB9Vhg+yU0mo2X9cagIi4nizI/l5KaXbJ8euAbwHjgXElRcaT9Yx/t+xU15AF2RdGxIfq18qOiA+TLe91Z0rpxSKuR5IkSZLU9nXYIHsLfR84DjgvIvYH5gIHAMcAjwCXlmZOKd2bB+ajgLkRcTvZsl4nAUuBM1ux7ZIkSZKkGutU6wa0Jfkr4keQBdN7A18H9gKmAEflvePlzgDG5F+PAT4C3AIcnFJ6psUbLUmSJElqM7bJnuyU0mnAaVXSlgNj860p51oPXJ5vkiRJkqRtmD3ZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiT9//buPdiyqr4T+PcnIHRoaSGjNG0URZmYDBnQ9kEH5GEZ1BKTKI4PNMJQqFRFR8EewagRNaOog46WGaMYnxmRGCNxBIHxHQgM2AwJZBQZFVABMSrPNO81f+x99XC4l+57e/c9p29/PlW79u291l5nna5V99zvWXvtDTAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAxEyAYAAICBCNkAAAAwECEbAAAABiJkAwAAwECEbAAAABiIkA0AAAADEbIBAABgIEI2AAAADETIBgAAgIEI2QAAADAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAxEyAYAAICBCNkAAAAwECEbAAAABiJkAwAAwEC2nXQHAACAyXvkCWdMugtbrCtPetaku8AUMZMNAAAAAxGyAQAAYCAuFwcAANgELrVfuKV4qb2ZbAAAABiIkA0AAAADWbIhu6oeVlWvqapzqurqqrqjqq6rqs9V1ZPnOGenqnpPVV1VVbdX1ZVV9e6qWj5H/QdU1auq6tKqWl9VP62qU6tqj8377gAAAJhGSzZkJ3lVkvcm2SPJOUlOTnJukj9I8g9V9YLRylW1Y5JvJDk2yXf6cy9PsjbJV6tqh1le40NJ3p+k+v1ZSZ6b5KKq2nMzvCcAAACm2FK+8dmFSQ5qrX1j9GBVPSXJV5J8sKpOb63d3he9Lsk+Sd7ZWjthpP5JSY5PF77fMXL84CRHJ/lmkt9rrd3RH/90kjOTfCDJ0zfTewMAAGAKLdmZ7Nba344H7P743yf5WpKdk/xOklRVpQvMtyR529gpb+uPHz12/GX9/k0zAbtv/0tJvp7kkKp6xKa/EwAAALYUSzZkb8Cd/f6ufr9nklVJzmut3Tpasf/3eUn2qKqHjxQdlGSmbNzZ/f7AoToMAADA9FvKl4vPqp9dflqSa5Nc2h+eWT99xRynXZHu0u89k/ywX7+9W5LLWmt3z1F/tN0N9en8OYr22pjzAQAAmA5b1Ux2VW2X5FNJtk9y/EhAXtHvb5zj1JvG6s23PgAAAFuBrWYmu6oekOTjSQ5Ickpr7VOT7dGvtNbWzHa8n+Hed5G7AwAAwAJtFTPZfcD+aJLDk/xVkmPGqszMSM8187zTWL351gcAAGArsORDdh+wP5bkiCSnJjmytXbPWLUNraG+15rt/mZo1yZ5VFVts6H6AAAAbB2WdMgeCdgvTXJakj+6nxuVXZNkv/6mZqNt7JhkvyQ/aK39cKToG0lmysbNPB/7m5v2DgAAANiSLNmQPXKJ+EuTfDbJS+YI2GmttSQfSbI8yZvGit/UHz9l7PiH+/3bquqBI6/7zHSP9zqntXbVJr4NAAAAtiBL+cZnf5ruEvFbknw3yRurarzO6a21S/qf35XkD5IcX1WPS3JxkscnOSTJRUn+2+iJrbWvVdVHkhyd5OKqOiPdY71ekOTnSV61Od4UAAAA02sph+xH9vvlSd4wR50rk1ySdOusq+rAJCcmOSzJwenWXZ+c5C2ttfWznP+KdM/afnmSV6cL9J9P8obW2veGeBMAAABsOZZsyG6tHZnkyHmec2OSY/ttY+rfk+T9/QYAAMBWbsmuyQYAAIDFJmQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAxEyAYAAICBCNkAAAAwECEbAAAABiJkAwAAwECEbAAAABiIkA0AAAADEbIBAABgIEI2AAAADETIBgAAgIEI2QAAADAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCDbTroDsDV65AlnTLoLAADAZmAmGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAzEI7xYMI+hAgAAuDcz2QAAADAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhewBV9cSqOrOqbqiqW6vqgqp6/qT7Gqs6HAAAERtJREFUBQAAwOLadtId2NJV1cFJzk5yW5LPJLk5yWFJTquqh7fWTp5k/wAAAFg8ZrI3QVVtm+SUJPckOaC19vLW2muT7J3ku0neXlW7T7KPAAAALB4he9M8Ncmjk3y6tXbJzMHW2o1J3p7kgUmOmFDfAAAAWGQuF980B/X7c2YpO7vfH7ihRqrq/DmK9lpAnwAAAJgQIXvT7NnvrxgvaK1dV1W3jNRZcq486VmT7gIAi+yM756RQ089dN7nffFFX8yz/u39fG6ccUZy6PzbzRe/mDzL5xEA00PI3jQr+v2Nc5TfNFJnTq21NbMd72e4911Y1wBgeDttv9OCzluxwwY+DndaWLtZscGPWQBYVNZkAwAb7QmrnpCdd9h5XufssmyXrN5t9QYafkKy8/zazS67JKs30C4ALDIhe9PMzGDP9TX6Tpl7lhsAtjjLtluWox531LzOOWqfo7Jsu2UbaHhZctT82s1RR3XnAcAUEbI3zcxa7Pusu66qlUmWZ5b12gCwJTtuzXHZdcddN6ruyuUrc+yaYzey4eOSXTeu3axcmRy7ke0CwCISsjfNN/r9IbOUPX2sDgAsCasetCpnveSsDQbtlctX5qwXn5VVD1q1kQ2vSs46a8NBe+XKrt6qjWwXABaRkL1pvpLk+0kOr6p9Zg5W1Yokf5LkjiSfnFDfAGCz2WflPrn4FRdn7Zq12WXZLvcq22XZLlm7Zm3WvXxd9l659zwb3ie5+OJk7dpuzfW9Gt6lO75uXbL3PNsFgEVSrbVJ92GLVlUHp3sm9m1JPpPk5iSHJdk9ydrW2smb0Pb5++67777nnz/XY7QBYPLW37k+665dlxtvuzErdliR1but3vAa7I1qeH0XqG+8sbuL+OrV1mADsGjWrFmTCy644IK5ngY1F4/w2kStta9V1f5J3pLkBUm2S3JpkuNba6dNtHMAsAiWbbcs+z9i/83Q8LJk/83QLgBsRkL2AFprFyZ55qT7AQAAwGRZkw0AAAADEbIBAABgIEI2AAAADETIBgAAgIEI2QAAADAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAOp1tqk+8Acquony5cvf+hee+016a4AAABsVS677LLccsst17fWdp3PeUL2FKuq7yTZOcn3F/mlZ1L9ZYv8ujAUY5ilwDhmS2cMs6UzhtkjyS9aa4+dz0lCNvdRVecnSWttzaT7AgthDLMUGMds6YxhtnTGMAtlTTYAAAAMRMgGAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBA3F0cAAAABmImGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAxEyAYAAICBCNn8UlU9sarOrKobqurWqrqgqp4/6X7BjKp6WFW9pqrOqaqrq+qOqrquqj5XVU+e45ydquo9VXVVVd1eVVdW1buravli9x/mUlXHV1Xrt31nKTeOmUpV9Zyq+l9V9bOquq2qflBVp1bVw8fqGcNMleo8t6q+VlXXVtW/VtXlVfWhqtpjlvrGMButWmuT7gNToKoOTnJ2ktuSfCbJzUkOS7J7krWttZMn2D1IklTVSUmOT/K9JF9P8tMkeyb5wySV5PDW2mkj9XdMcm6SfZKck+T/JHlckkOSXJTkgNbabYv4FuA+qmqvJN9KcleSHZOsaa1dMFJuHDN1qqqS/EWSl6f7nXx2ur8dViU5MMmLW2vn9nWNYaZOVZ2c5Lgk1yb5uyQ3Jdk73bi8JcnvttYu6+saw8xPa822lW9Jtk3y/9IF7H1Gjq9IcnmS25PsPul+2mxJnpvkwFmOPyXJHUl+nmT7keNvSdKSnDRW/6T++Osn/Z5sW/eWZLsk65JckORT/bjcd6yOcWybui3Jq/vx9+dJtpmlfNuRn41h21RtSVYmuTvJlUlWjJUd24/Lj44cM4Zt89rMZJOqOiTdN9Afa60dNVZ2RJKPJ3lza+2tE+gebJSqOjvdN8pPbK19q59l+VGSnZKsbK3dOlJ3xyTXJbm+tfboiXQYklTViUlOSPL4JK9LckRGZrKNY6ZRVS1L8uMkv0jym621u+6nrjHM1OmX5Zyf5NOttRePle2Z5LtJvthae7YxzEJYk02SHNTvz5ml7Ox+f+DidAUW7M5+P/PH3p7pLls8b/QDMUn6f5+XZI/xdYOwWKrq8UnekOQtrbX/O0c145hpdEiSnZOcnmSbfl3rCVV1TFU9ZqyuMcw0uiLdFXD7VdVOY2WH9vuv9HtjmHkTskm6Xx5J9wvnXlpr16Vbl7LneBlMi6p6RJKnpVtXdWl/eM5xPXbc2GbRVdX2ST6Z5JIk77qfqsYx02h1v787yT8l+VySdyT5YJLLq+q/jtQ1hpk6rbWfpbuK6BFJvlNVH6yqd1bVWUnemeS/J/lAX90YZt62nXQHmAor+v2Nc5TfNFIHpkpVbZduLev2SY5vrd3dF23MuB6tB4vpren+IFs9MmZnYxwzjR7a749LcnGSJyX5drobQX04yWur6nuttQ/GGGZKtdbeW1U/TvKRJMeMFJ2b7jLymSvjjGHmzUw2sMWqqgeku2fAAUlOaa19arI9gg2rqjVJ1ib5s9bfuRa2MDN/P96R5A9baxe11m5prf19kv+Q5J4kr51Y72AjVNWfJvmrJG9P8vAkD0p3I9Udkny9qn5/gt1jCydkk/zqm7m5voHbKXN/ewcT0QfsjyY5PN2H5DFjVTZmXI/Wg82uqrZN8ol0l9ietBGnGMdMo5nx9q3W2jWjBf0XR99P8uiqenCMYaZQVT0t3R3DP9BaO6m19qP+i6Jzkzw73X1eZh5fawwzby4XJ7n3WpJ1owVVtTLJ8iQXLnanYC59wP5YkpcmOTXJka21e8aqbWiN1IbWWMHmsDy/Gnt3dDetvY/z++PPSTJzQzTjmGlyeb+/YY7ymePL4ncx0+mZ/f5r4wWtteuq6jtJHldVy2MMswBCNknyjSSvT3e30M+MlT19pA5M3FjAPi3JH82xpvWKJNeku3PojrM8cmO/JD9orf1wEboNM25P8pdzlB2Q7o+1LyT5abrntxrHTKOZYPJb4wX9fTIek+TWdOP4uhjDTJ8H9vuHzFH+kHTLHu6M38MsgMvFSbpHFHw/yeFVtc/MwapakeRP0q25+uSE+ga/NHKJ+EuTfDbJS+a6aVRrraW7mcnyJG8aK35Tf/yUzddbuK/W2vrW2tGzbUn+oa/2jv7YJcYx06i19r10j/18TFUdPVZ8QpIHJ/l8a+0uY5gpdV6/P67/e/eXquqYJL+R5PzW2u3GMAtR3bhha1dVB6d7JvZt6Wazb05yWJLdk6xtrZ18P6fDoqiqE5O8Od1j5d6XXz0Te9TprbVL+vo7pvsg3TvdH4QXJ3l8uqs2LkpyYGtt/ebvOWxYVX08yRFJ1rTWLhg5bhwzdarq0em+GHpokjOSfCfd3cWfmuSqJPv2jwE1hpk6VbVNkq+mu4Lo+nRXEN2Qblw+Ncn6JAe11i7s6xvDzIuQzS9V1ZPS3QTid5Nsl+55w+9prZ020Y5BbySE3J//2Fr7+Mg5K5KcmO5Lo5XpnqX92SRvaa3dvFk6CgswV8juy4xjpk5VPTzd4+iekeTX010a/oUkb22tXT9W1xhmqlTV9kmOTfL8JL+Z7hLyn6RbDvH21tq3x+obw2w0IRsAAAAGYk02AAAADETIBgAAgIEI2QAAADAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhGwC2YFV1YlW1qjpo0n0BAIRsAAAAGIyQDQAAAAMRsgEAAGAgQjYATLGqOqCqTq+qn1TV7VX1w6r626raf5a6h1fVJVW1vqqurar3VdWysTpH9mu4j5zl/IP6shPHjreq+npVPayqPllV11XVPTPrwKvqyn5b3r/mNX1f/6mqnrcJ7311VX2gqi6rqhv793VpVZ1QVdvNcc6BVfXNqrq1qn5WVadV1cP7/rdZ6ldVHVVV51XVTVX1r1X1rao6aqH9BmDrtu2kOwAAzK6qXp3kvUnWJ/l8kquTPCzJ/kmel+TckeqvTPKMJH+X5Kv9z/8pyb9J8uIBuvPrSc5P8vMkn0myQ5KbRsq3S3JOkp2TfC7JryV5YZK/rqpntNbOWcBrvizJs5N8M8mZfZsHJXlHkicmOWy0clUdkuSMJHcnOS3JNUkOTvf/9IvxxquqkvyPJC9KckWSTye5I8nvJfnLqvrt1traBfQbgK2YkA0AU6iq9k7yniTXJtmvtXblSFkl2W3slKclWd1au7yv84YklyR5YVX959baNZvYpb2SfCzJy1prd89SvirJRUkOaq3d0ffh00m+nOS4dAF8vt6e5I9HX69/7x9JclRV7ddaO68/vk2SDyfZJsnBrbVzR875RJKXztL+0ekC9seSvKK1dmdf/4FJ/ibJa6vq1NbaugX0HYCtlMvFAWA6vSLd5/QbRwN2krTOeGh+30zA7uusT3Jq38bqAfpzR5LXzRGwZxw7E7D7PnwlyVXpZp3nrbV29fjrtdZakj/v//m0kaL9k+ye5H+OBuzeG9PNbo97ZZJb0wX5O0de444kb+j/+aKF9B2ArZeZbACYTk/q9xs7AzzbbOuP+v2DN707+UFr7V/up/yG1toP5ujDmoW8YD+j/Mp0l50/NsnyJDVSZdXIz3v3+/GAndbaD6vq6iSPGmn715L8TrpLyo/vJsjvZWbN92MX0ncAtl5CNgBMpxVJWrrLxTfGTbMcu6vfbzNAf36ygfIb5zh+VxZ+5dzfpFuT/d10a6yvT3Jnui8NXp1k+5G6O/X76+do6ycZCdnp1o5XujXub76fPuw4714DsFUTsgFgOt2QLgTuluTHA7Z7T7+f7W+AFfdz3n3uzL05VdUT0wXss5M8a2xd9r7pQvaomS8ZHjpHk7vOUX9da+0Jm9hdAPgla7IBYDpd2O8PGbjdmbtsP2yWsscN/Fqb4tH9/oxZ1oE/ZZb6/9jv9xsvqKrfSPKI0WOttZuTfDvJb1XVEJfTA0ASIRsAptVfpLtZ159V1e6jBf2znVfNftoGrUs3K/3CqtphpM09c9/Z4Um6qt/f63ngVfXvkrx+lvrnpnvE2bOranwN+Nsy+yXz70/3WLBTquo+l4VX1aOq6pHz6zYAWzuXiwPAFGqtXVpVr0kXBP+5qk5PFzxXJjkg3fOgX7OAdq+pqlOTHJ5kXVWdle4S6+ckOStjz56eoAv77flVtVuSC9LNRv9+uvf+vNHKrbW7q+qYJF9I8tWqOi3devYD083a/2OSfz/2Gh9Ksm+SI5LsV1VfTncjtF3T3fDsyen+n67cDO8PgCVKyAaAKdVa+0BVXZbktUmeme7u2tcn+d9J/noTmj46yb8keUGSP05yeZKXpwuYUxGy+9B8aJKTkjwj3WPArkiyNsmXMhay+3O+VFWHJHlrkucnWZ/kK+ne55kZuzlc/ziwI6vqzCQvS3JofvV/PPNaX94c7w+Apau6zxcAgKWpqh6U7u7il7bWnjzp/gCwtFmTDQAsCVW1Yx+oR49tk+TdSZYlOX0iHQNgq2ImGwBYEqpqn3Q3QDs7yfeTPCjdnch/O8k/J3lya+3WyfUQgK2BkA0ALIqqOijJQRtR9ZLW2rxnnavqIUnele5mZ7umu/fM1elmsP9La+2G+bYJAPMlZAMAi6KqTkzy5o2o+onW2pGbtzcAsHkI2QAAADAQNz4DAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAxEyAYAAICBCNkAAAAwECEbAAAABiJkAwAAwED+P8eeSpzelo4tAAAAAElFTkSuQmCC\n",
1098 |       "text/plain": [
1099 |        "<Figure size 1120x560 with 1 Axes>"
1100 |       ]
1101 |      },
1102 |      "metadata": {
1103 |       "needs_background": "light"
1104 |      },
1105 |      "output_type": "display_data"
1106 |     }
1107 |    ],
1108 |    "source": [
1109 |     "# setting image resolution\n",
1110 |     "plt.figure(figsize = (8,4), dpi = 140)\n",
1111 |     "\n",
1112 |     "# Plotting histogram and descriptive summary\n",
1113 |     "plt.scatter(churn_age.mean(), 0, label = 'mean', color = 'red')\n",
1114 |     "plt.scatter(churn_age.median(), 0, label = 'median', color = 'green')\n",
1115 |     "plt.hist(churn_age,bins=10)\n",
1116 |     "\n",
1117 |     "# axes labels\n",
1118 |     "plt.xlabel('churn_age')\n",
1119 |     "plt.ylabel('frequency')\n",
1120 |     "plt.title('churn_age: mean, median, range')\n",
1121 |     "plt.legend()"
1122 |    ]
1123 |   },
1124 |   {
1125 |    "cell_type": "code",
1126 |    "execution_count": null,
1127 |    "metadata": {},
1128 |    "outputs": [],
1129 |    "source": [
1130 |     "* Mean Value is higher than the Median Value\n",
1131 |     "* The peak occurs at the interval (36,45) that means highest number of customers age lie in this interval\n",
1132 |     "* Most customer age between 30 and 65\n"
1133 |    ]
1134 |   }
1135 |  ],
1136 |  "metadata": {
1137 |   "colab": {
1138 |    "collapsed_sections": [],
1139 |    "name": "Mean_Variance.ipynb",
1140 |    "provenance": []
1141 |   },
1142 |   "kernelspec": {
1143 |    "display_name": "Python 3",
1144 |    "language": "python",
1145 |    "name": "python3"
1146 |   },
1147 |   "language_info": {
1148 |    "codemirror_mode": {
1149 |     "name": "ipython",
1150 |     "version": 3
1151 |    },
1152 |    "file_extension": ".py",
1153 |    "mimetype": "text/x-python",
1154 |    "name": "python",
1155 |    "nbconvert_exporter": "python",
1156 |    "pygments_lexer": "ipython3",
1157 |    "version": "3.6.9"
1158 |   }
1159 |  },
1160 |  "nbformat": 4,
1161 |  "nbformat_minor": 1
1162 | }
1163 | 


--------------------------------------------------------------------------------