├── README.md ├── Intro-Prgrm.py ├── 06.Read_Write.py ├── 18.hc.py ├── 05.DataSummarization.py ├── 11.Simple Linear Regression.py ├── 03.Apply_Functions.py ├── 12.Multiple Linear Regression.py ├── 17.kmeans.py ├── 07.Joins.py ├── 08.Index_Select_Filter.py ├── 16.RF.py ├── 15.DecisionTree.py ├── 09.MissingValues.py ├── 04.Loops.py ├── 02.Functions_Basics.py ├── 01.DataStructures.py ├── 19.MarketBasketAnalysis_AprioriAlgo.py ├── 13.multiple_linear_regression_BackwardElimination.py ├── 10.Graphs.py ├── 14.logistic_regression.py ├── 31.Reading Files into Python.ipynb ├── 32.Min_Max_Range_Updated.ipynb └── 33.Mean_Variance.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Data Science with Python 2 | -------------------------------------------------------------------------------- /Intro-Prgrm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Apr 25 16:45:56 2021 4 | 5 | @author: pc 6 | """ 7 | 8 | V = [1,2,3,4,5] 9 | print(V) 10 | 11 | import matplotlib.pyplot as plt 12 | plt.plot(V) 13 | -------------------------------------------------------------------------------- /06.Read_Write.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Aug 28 15:58:59 2021 4 | 5 | @author: Admin 6 | """ 7 | #-------------------------Reading & Writing data in Files---------------------- 8 | 9 | import pandas 10 | 11 | # Reading CSV Files with Pandas: 12 | df = pandas.read_csv('F:/WORK/pyWork/AnalyticsEdge_Python/pyData/User_Data.csv') 13 | print(df) 14 | 15 | # Writing CSV Files with Pandas: 16 | df.to_csv('F:/WORK/pyWork/AnalyticsEdge_Python/pyData/IIT-B.csv') 17 | 18 | # Reading Excel Files with Pandas 19 | df1 = pandas.read_excel('F:/WORK/pyWork/AnalyticsEdge_Python/pyData/User_Data.xlsx') 20 | 21 | df1 = pandas.read_excel('User_Data.xlsx') 22 | print(df1) 23 | 24 | # Writing Excel Files with Pandas 25 | df1.to_excel('IIT-B.xlsx') 26 | df2 = pandas.DataFrame(df1) 27 | print (df2) 28 | -------------------------------------------------------------------------------- /18.hc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Oct 19 19:56:10 2021 4 | 5 | @author: Admin 6 | """ 7 | # Hierarchical Clustering 8 | 9 | # Importing the libraries 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | 13 | # Importing the dataset 14 | dataset = pd.read_csv('F:/WORK/pyWork/pyData/Mall_Customers.csv') 15 | X = dataset.iloc[:, [3, 4]].values 16 | 17 | # Using the dendrogram to find the optimal number of clusters 18 | import scipy.cluster.hierarchy as sch 19 | dendrogram = sch.dendrogram(sch.linkage(X, method = 'ward')) 20 | plt.title('Dendrogram') 21 | plt.xlabel('Customers') 22 | plt.ylabel('Euclidean distances') 23 | 24 | #cut the dendrogram tree with a horizontal line at a height where the line can traverse 25 | #without intersecting the merging point. Hence, we can see the ideal no. of clusters is 5 26 | 27 | # Fitting Hierarchical Clustering to the dataset 28 | from sklearn.cluster import AgglomerativeClustering 29 | hc = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean', linkage = 'ward') 30 | y_hc = hc.fit_predict(X) 31 | 32 | # Visualising the clusters 33 | plt.scatter(X[y_hc == 0, 0], X[y_hc == 0, 1], s = 100, c = 'red', label = 'Cluster 1') 34 | plt.scatter(X[y_hc == 1, 0], X[y_hc == 1, 1], s = 100, c = 'blue', label = 'Cluster 2') 35 | plt.scatter(X[y_hc == 2, 0], X[y_hc == 2, 1], s = 100, c = 'green', label = 'Cluster 3') 36 | plt.scatter(X[y_hc == 3, 0], X[y_hc == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4') 37 | plt.scatter(X[y_hc == 4, 0], X[y_hc == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5') 38 | 39 | plt.title('Clusters of customers') 40 | plt.xlabel('Annual Income (k$)') 41 | plt.ylabel('Spending Score (1-100)') 42 | plt.legend() 43 | -------------------------------------------------------------------------------- /05.DataSummarization.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Aug 28 15:58:01 2021 4 | 5 | @author: Admin 6 | """ 7 | #-------------------------------Data Summary------------------------------- 8 | #Describe()- Used to get summary statistics in python. 9 | #Describe Function gives the mean, std and IQR values. 10 | #It analyzes both numeric and object series and also the DataFrame column sets of mixed data types. 11 | # creation of DataFrame 12 | import pandas as pd 13 | import numpy as np 14 | 15 | #Example 1: 16 | a1 = pd.Series([1, 2, 3,4]) 17 | a1 18 | a1.describe() 19 | 20 | a2 = pd.Series(['q', 'r', 'r', 'r','q','s','p']) 21 | a2 22 | a2.describe() 23 | 24 | info = pd.DataFrame({'numeric': [1, 2, 3, 4], 25 | 'object': ['p', 'q', 'r','e'] 26 | }) 27 | info 28 | 29 | info.describe(include=[np.number]) 30 | info.describe(include=[np.object]) 31 | info.describe() 32 | 33 | #Example 2: 34 | #Create a Dictionary of series 35 | d = {'Name':['Cathrine','Alisa','Bobby','Madonna','Rocky','Sebastian','Jaqluine', 36 | 'Rahul','David','Andrew','Ajay','Teresa'], 37 | 'Age':[26,27,25,24,31,27,25,33,42,32,51,47], 38 | 'Score':[89,87,67,55,47,72,76,79,44,92,99,69]} 39 | 40 | #Create a DataFrame 41 | df = pd.DataFrame(d) 42 | df 43 | 44 | #Descriptive or Summary Statistic of the numeric columns: 45 | #Summary statistics 46 | print(df.describe()) 47 | 48 | #Descriptive or Summary Statistic of the character columns: 49 | #Summary statistics of character column 50 | print(df.describe(include='object')) 51 | 52 | #Descriptive or Summary Statistic of all the columns 53 | #Summary statistics of both - character & numerical columns 54 | print(df.describe(include='all')) 55 | #--------------------------------------------------------------------------------------------------------------- 56 | -------------------------------------------------------------------------------- /11.Simple Linear Regression.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Sep 18 19:04:28 2021 4 | 5 | @author: Admin 6 | """ 7 | # Simple Linear Regression 8 | 9 | # Importing the libraries 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | 13 | # Importing the dataset 14 | dataset = pd.read_csv('F:/pyWork/pyData/stud_reg.csv') 15 | print(type(dataset)) 16 | 17 | X = dataset.iloc[:,:-1].values 18 | y = dataset.iloc[:, 1].values 19 | 20 | # Splitting the dataset into the Training set and Test set 21 | from sklearn.model_selection import train_test_split 22 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3, random_state = 0) 23 | 24 | #Note: The parameter 'random_state' is used to randomly bifurcate the dataset into training & 25 | #testing datasets. That number should be supplied as arguments to parameter 'random_state' 26 | #which helps us get the max accuracy. And that number is decided by hit & trial method. 27 | 28 | # Fitting Simple Linear Regression to the Training set 29 | from sklearn.linear_model import LinearRegression 30 | regressor = LinearRegression() 31 | regressor.fit(X_train, y_train) 32 | 33 | #Calculating the coefficients: 34 | print(regressor.coef_) 35 | 36 | #Calculating the intercept: 37 | print(regressor.intercept_) 38 | 39 | # Predicting the Test set results 40 | y_pred = regressor.predict(X_test) 41 | 42 | # Accuracy of the model 43 | 44 | #Calculating the r squared value: 45 | from sklearn.metrics import r2_score 46 | r2_score(y_test,y_pred) 47 | 48 | #Create a DataFrame 49 | df1 = {'Actual Applicants':y_test, 50 | 'Predicted Applicants':y_pred} 51 | df1 = pd.DataFrame(df1,columns=['Actual Applicants','Predicted Applicants']) 52 | print(df1) 53 | 54 | # Visualising the predicted results 55 | line_chart1 = plt.plot(X_test,y_pred, '--', c ='red') 56 | line_chart2 = plt.plot(X_test,y_test, ':', c='blue') 57 | 58 | #-------------------------------------------------------- 59 | -------------------------------------------------------------------------------- /03.Apply_Functions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Aug 21 15:33:29 2021 4 | 5 | @author: Admin 6 | """ 7 | #-----------------Apply Family of Functions-------------------------------- 8 | #To apply our own functions to dataset, pandas provides three functions from 9 | #apply family of functons: pipe(), apply(), applymap() 10 | 11 | # pipe():Table wise Function Application. 12 | # It performs the custom operation for the entire dataframe. 13 | import pandas as pd 14 | # own function 15 | def adder(adder1,adder2):return adder1+adder2 16 | 17 | #Create a Dictionary of series 18 | d = {'Score_Math':pd.Series([66,57,75,44,31,67,85,33,42,62,51,47]), 19 | 'Score_Science':pd.Series([89,87,67,55,47,72,76,79,44,92,93,69])} 20 | 21 | print(type(d)) 22 | print(d) 23 | df = pd.DataFrame(d) 24 | print (df) 25 | print (df.pipe(adder,2)) 26 | 27 | # apply():Row or Column Wise Function Application. 28 | # It performs the custom operation for either row wise or column wise. 29 | import numpy as np 30 | #Create a DataFrame 31 | d = {'Score_Math':pd.Series([66,57,75,44,31,67,85,33,42,62,51,47]), 32 | 'Score_Science':pd.Series([89,87,67,55,47,72,76,79,44,92,93,69])} 33 | 34 | df = pd.DataFrame(d) 35 | print (df) 36 | #Row Wise Fxn Application: 37 | #row wise mean 38 | print (df.apply(np.mean,axis=1)) 39 | 40 | #Column Wise Fxn Application: 41 | #column wise mean 42 | print (df.apply(np.mean,axis=0)) 43 | 44 | # applymap():Element wise Function Application. 45 | 46 | # applymap():Element wise Function Application. 47 | # It performs specified operation on all the elements of the dataframe. 48 | 49 | #Create a DataFrame 50 | d = {'Score_Math':pd.Series([66,57,75,44,31,67,85,33,42,62,51,47]), 51 | 'Score_Science':pd.Series([89,87,67,55,47,72,76,79,44,92,93,69])} 52 | 53 | df = pd.DataFrame(d) 54 | print (df) 55 | 56 | #Example 1: 57 | print (df.applymap(lambda x:x*2)) 58 | #Example2: 59 | import math as m 60 | print (df.applymap(lambda x:m.sqrt(x))) 61 | -------------------------------------------------------------------------------- /12.Multiple Linear Regression.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Sep 19 15:26:33 2021 4 | 5 | @author: Admin 6 | """ 7 | #Multiple Regression 8 | 9 | # Importing the libraries 10 | import pandas as pd 11 | import seaborn as sns 12 | 13 | # Importing the dataset 14 | dataset = pd.read_csv('F:/WORK/pyWork/AnalyticsEdge_Python/pyData/stud_reg_2.csv') 15 | print(type(dataset)) 16 | 17 | #Data Visualization: 18 | sns.heatmap(dataset) 19 | 20 | X = dataset.iloc[:, :-1].values 21 | y = dataset.iloc[:,2].values 22 | 23 | # Splitting the dataset into the Training set and Test set 24 | from sklearn.model_selection import train_test_split 25 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1) 26 | 27 | #Note: The parameter 'random_state' is used to randomly bifurcate the dataset into training & 28 | #testing datasets. That number should be supplied as arguments to parameter 'random_state' 29 | #which helps us get the max accuracy. And that number is decided by hit & trial method. 30 | 31 | # Fitting Linear Regression to the Training set 32 | from sklearn.linear_model import LinearRegression 33 | regressor = LinearRegression() 34 | regressor.fit(X_train, y_train) 35 | 36 | #Calculating the coefficients: 37 | print(regressor.coef_) 38 | 39 | #Calculating the intercept: 40 | print(regressor.intercept_) 41 | 42 | # Predicting the Test set results 43 | y_pred = regressor.predict(X_test) 44 | 45 | # Accuracy of the model 46 | 47 | #Calculating the r squared value: 48 | from sklearn.metrics import r2_score 49 | r2_score(y_test,y_pred) 50 | 51 | #Create a DataFrame 52 | df1 = {'Actual Applicants':y_test, 53 | 'Predicted Applicants':y_pred} 54 | df1 = pd.DataFrame(df1,columns=['Actual Applicants','Predicted Applicants']) 55 | print(df1) 56 | 57 | # Visualising the predicted results 58 | import matplotlib.pyplot as plt 59 | line_chart1 = plt.plot(y_pred,X_test, '--',c='green') 60 | line_chart2 = plt.plot(y_test,X_test, ':', c='red') 61 | plt.show() 62 | #------------------------------ 63 | -------------------------------------------------------------------------------- /17.kmeans.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Oct 17 09:41:39 2021 4 | 5 | @author: Admin 6 | """ 7 | # K-Means Clustering 8 | #Projects: Customer Segmentation 9 | #A Company wants to identify segments of customers for targetted marketing. 10 | 11 | # Importing the libraries 12 | import matplotlib.pyplot as plt 13 | import pandas as pd 14 | 15 | # Importing the dataset 16 | dataset = pd.read_csv('D:\SkillEdge\Python\Final\Codes\pyData\Mall_Customers.csv') 17 | X = dataset.iloc[:, [3,4]].values 18 | 19 | # Using the elbow method to find the optimal number of clusters 20 | from sklearn.cluster import KMeans 21 | help(KMeans()) 22 | wcss = [] 23 | for i in range(1, 11): 24 | kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 0) 25 | kmeans.fit(X) 26 | wcss.append(kmeans.inertia_) 27 | plt.plot(range(1, 11), wcss) 28 | plt.title('The Elbow Method') 29 | plt.xlabel('Number of clusters') 30 | plt.ylabel('WCSS') 31 | #if you want save figure, use savefig method in returned figure object. 32 | plt.savefig('output.png') 33 | 34 | # Fitting K-Means to the dataset 35 | kmeans = KMeans(n_clusters = 5, init = 'k-means++', random_state = 42) 36 | y_kmeans = kmeans.fit_predict(X) 37 | 38 | kmeans = pd.DataFrame(y_kmeans) 39 | dataset_1 = pd.concat([dataset,kmeans],axis=1) 40 | 41 | # Visualising the clusters 42 | plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1') 43 | plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Cluster 2') 44 | plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Cluster 3') 45 | plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4') 46 | plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5') 47 | #plt.scatter(X[y_kmeans == 5, 0], X[y_kmeans == 5, 1], s = 100, c = 'yellow', label = 'Cluster 3') 48 | #plt.scatter(X[y_kmeans == 6, 0], X[y_kmeans == 6, 1], s = 100, c = 'black', label = 'Cluster 4') 49 | #plt.scatter(X[y_kmeans == 7, 0], X[y_kmeans == 7, 1], s = 100, c = 'orange', label = 'Cluster 5') 50 | 51 | 52 | 53 | 54 | #plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroids') 55 | plt.title('Clusters of customers') 56 | plt.xlabel('Annual Income (k$)') 57 | plt.ylabel('Spending Score (1-100)') 58 | plt.legend() 59 | plt.show() 60 | -------------------------------------------------------------------------------- /07.Joins.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Aug 31 18:44:17 2021 4 | 5 | @author: Admin 6 | """ 7 | #---------------------------------Joins---------------------------------------- 8 | #We can merge two data frames in python by using the merge() function of pandas 9 | #Create dataframe: 10 | import pandas as pd 11 | 12 | # Example 1: 13 | 14 | # data frame 1 15 | d1 = {'Customer_id':pd.Series([1,2,3,4,5,6]), 16 | 'Product':pd.Series(['Oven','Oven','Oven','Television','Television','Television'])} 17 | df1 = pd.DataFrame(d1) 18 | print(df1) 19 | 20 | # data frame 2 21 | d2 = {'Customer_id':pd.Series([2,4,6]), 22 | 'State':pd.Series(['California','California','Texas'])} 23 | df2 = pd.DataFrame(d2) 24 | print(df2) 25 | 26 | #Inner join using pandas: 27 | #Return only those rows where left table have matching keys in the right table 28 | print (pd.merge(df1, df2, on='Customer_id', how='inner')) 29 | 30 | #Full join using pandas 31 | #Returns all rows from both tables. 32 | 33 | print (pd.merge(df1, df2, on='Customer_id', how='outer')) 34 | #join records from left table which have matching keys in right table. 35 | 36 | #Left Join using pandas 37 | #Returns all rows from left table and any rows with matching keys from right table. 38 | print (pd.merge(df1, df2, on='Customer_id', how='left')) 39 | 40 | #Right Join using pandas 41 | #Returns all rows from right table and any rows with matching keys from left table. 42 | print (pd.merge(df1, df2, on='Customer_id', how='right')) 43 | 44 | #Example 2: 45 | 46 | # Dataset 1 47 | emp_1 = {"Name": ["Penn", "Smith", "William", "Parker"], 48 | "Age": [21, 32, 29, 28]} 49 | EmpList_1 = pd.DataFrame(emp_1) 50 | print(EmpList_1) 51 | 52 | # Dataset 2 53 | emp_2 = {"Name": ["Penn", "Suzzane", "William"], 54 | "Education-Level": ["Under-Grad", "PG", "Grad"]} 55 | EmpList_2 = pd.DataFrame(emp_2) 56 | print(EmpList_2) 57 | 58 | #Inner join using pandas: 59 | print (pd.merge(EmpList_1, EmpList_2, on='Name', how='inner')) 60 | 61 | #Full join using pandas 62 | print (pd.merge(EmpList_1, EmpList_2, on='Name', how='outer')) 63 | #join records from left table which have matching keys in right table. 64 | 65 | #Left Join using pandas 66 | print (pd.merge(EmpList_1, EmpList_2, on='Name', how='left')) 67 | 68 | #Right Join using pandas 69 | #Returns all rows from right table and any rows with matching keys from left table. 70 | print (pd.merge(EmpList_1, EmpList_2, on='Name', how='right')) 71 | -------------------------------------------------------------------------------- /08.Index_Select_Filter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Aug 31 18:48:19 2021 4 | 5 | @author: Admin 6 | """ 7 | #------------------------Index, Select & Filter-------------------------------- 8 | #Create dataframe : 9 | import pandas as pd 10 | 11 | #Create a DataFrame 12 | d = {'Name':['Alisa','Bobby','Cathrine','Alisa','Bobby','Cathrine', 13 | 'Alisa','Bobby','Cathrine','Alisa','Bobby','Cathrine'], 14 | 'Exam':['Semester 1','Semester 1','Semester 1','Semester 1','Semester 1','Semester 1', 15 | 'Semester 2','Semester 2','Semester 2','Semester 2','Semester 2','Semester 2'], 16 | 'Subject':['Mathematics','Mathematics','Mathematics','Science','Science','Science', 17 | 'Mathematics','Mathematics','Mathematics','Science','Science','Science'], 18 | 'Score':[62,47,55,74,31,77,85,63,42,67,89,81]} 19 | 20 | df = pd.DataFrame(d,columns=['Name','Exam','Subject','Score']) 21 | df 22 | 23 | #View a column of the dataframe in pandas: 24 | df['Name'] 25 | 26 | #View two columns of the dataframe in pandas: 27 | df[['Name','Score','Exam']] 28 | 29 | #View first two rows of the dataframe in pandas: 30 | df[0:2] 31 | 32 | #-------Filter in Pandas dataframe:-------------- 33 | #View all rows where score greater than 70 34 | df['Score'] > 70 35 | df[df['Score'] > 70] 36 | 37 | #View all the rows where score greater than 70 and less than 85 38 | df[(df['Score'] > 70) & (df['Score'] < 85)] 39 | 40 | 41 | #-----------------Select in Pandas dataframe----------------------------------- 42 | #select row by using row number in pandas with .iloc 43 | #.iloc [1:m, 1:n] – is used to select or index rows based on their position 44 | #from 1 to m rows and 1 to n columns 45 | 46 | # select first 2 rows 47 | df.iloc[:2] 48 | # or 49 | df.iloc[:2,] 50 | 51 | #select 3rd to 5th rows 52 | df.iloc[2:5] 53 | # or 54 | df.iloc[2:5,] 55 | 56 | #select all rows starting from third row 57 | df.iloc[2:] 58 | # or 59 | df.iloc[2:,] 60 | 61 | #Select column by using column number in pandas with .iloc 62 | # select first 2 columns 63 | df.iloc[:,:2] 64 | #select first 1st and 4th columns 65 | df.iloc[[2,4],[0,3]] 66 | 67 | #Select value by using row name and column name in pandas with .loc: 68 | #.loc [[Row_names],[ column_names]] –used to select or index rows or columns based on their name 69 | 70 | #select value by row label and column label using loc 71 | df.loc[[1,2,4,8,11],['Name','Score']] 72 | -------------------------------------------------------------------------------- /16.RF.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Oct 12 19:15:52 2021 4 | 5 | @author: Admin 6 | """ 7 | #------------------------------Random Forest-------------------------------- 8 | # Random Forest Classification 9 | 10 | # Importing the libraries 11 | import pandas as pd 12 | 13 | # Importing the dataset 14 | dataset = pd.read_csv('Purchase_History.csv') 15 | X = dataset.iloc[:, [2, 3]].values 16 | y = dataset.iloc[:, 4].values 17 | 18 | # Splitting the dataset into the Training set and Test set 19 | from sklearn.model_selection import train_test_split 20 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0) 21 | 22 | 23 | # Fitting Random Forest Classification to the Training set 24 | from sklearn.ensemble import RandomForestClassifier 25 | classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy',max_depth = 3, min_samples_leaf=5) 26 | classifier.fit(X_train, y_train) 27 | 28 | #To see no. of decision trees created 29 | len(classifier.estimators_) 30 | 31 | #To see the decision trees created 32 | classifier.estimators_ 33 | 34 | #To access a particular decision tree, we can use indexing 35 | classifier.estimators_[0] 36 | 37 | # Predicting the Test set results 38 | y_pred = classifier.predict(X_test) 39 | 40 | # Making the Confusion Matrix 41 | from sklearn.metrics import confusion_matrix 42 | cm = confusion_matrix(y_test, y_pred) 43 | cm 44 | #Accuracy = 96% 45 | 46 | # Random Forest visualization 47 | 48 | #Since RF is quite big & clumpsy to draw due to large no. of DT, its not possible to 49 | #visualiza an entire RF on a small system like our laptop. 50 | #Hence, we visualize individual DTs from this RF. 51 | 52 | # Decision Tree -1 visualization----------------- 53 | from sklearn import tree 54 | #Lets create a blank chart of desired size using matplotlib library and place our Decision tree there. 55 | import matplotlib.pyplot as plt 56 | fig, axes= plt.subplots(nrows = 1,ncols = 1,figsize = (4,4), dpi=300) 57 | cn=['0','1'] 58 | tree.plot_tree(classifier.estimators_[0],class_names=cn,filled = True) 59 | 60 | #if you want save figure, use savefig method in returned figure object. 61 | fig.savefig('RF-DT-1.png') 62 | 63 | # Decision Tree-2 visualization----------------- 64 | from sklearn import tree 65 | #Lets create a blank chart of desired size using matplotlib library and place our Decision tree there. 66 | import matplotlib.pyplot as plt 67 | fig, axes= plt.subplots(nrows = 1,ncols = 1,figsize = (4,4), dpi=300) 68 | cn=['0','1'] 69 | tree.plot_tree(classifier.estimators_[1],class_names=cn,filled = True) 70 | 71 | #if you want save figure, use savefig method in returned figure object. 72 | fig.savefig('RF-DT-2.png') 73 | 74 | #----------- 75 | -------------------------------------------------------------------------------- /15.DecisionTree.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Oct 9 16:48:32 2021 4 | 5 | @author: Admin 6 | """ 7 | 8 | # Importing the libraries 9 | import pandas as pd 10 | 11 | # Importing the dataset 12 | dataset = pd.read_csv('F:\WORK\pyWork\AnalyticsEdge_Python\pyData\Purchase_History.csv') 13 | 14 | #Method-1 (Handling Categorical Variables) 15 | pd.get_dummies(dataset["Gender"]) 16 | pd.get_dummies(dataset["Gender"],drop_first=True) 17 | S_Dummy = pd.get_dummies(dataset["Gender"],drop_first=True) 18 | S_Dummy.head(5) 19 | #Now, lets concatenate these dummy var columns in our dataset. 20 | dataset = pd.concat([dataset,S_Dummy],axis=1) 21 | dataset.head(5) 22 | dataset.tail(2) 23 | #dropping the columns whose dummy var have been created 24 | dataset.drop(["Gender",],axis=1,inplace=True) 25 | dataset.head(5) 26 | #------------------------------------------------------------------------------ 27 | 28 | #Obtaining DV & IV from the dataset 29 | X = dataset.iloc[:, [1,2,4]].values 30 | y = dataset.iloc[:, 3].values 31 | 32 | # Splitting the dataset into the Training set and Test set 33 | from sklearn.model_selection import train_test_split 34 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 2) 35 | 36 | 37 | # Fitting Decision Tree Classification to the Training set 38 | from sklearn.tree import DecisionTreeClassifier 39 | #classifier = DecisionTreeClassifier(criterion = 'entropy') 40 | #If desired we can supply extra parameters to decision trees fxn, but 41 | #it may or may not give better accuracy. 42 | classifier = DecisionTreeClassifier(criterion = 'entropy',max_depth = 3, min_samples_leaf=5) 43 | 44 | classifier.fit(X_train, y_train) 45 | 46 | # Predicting the Test set results 47 | y_pred = classifier.predict(X_test) 48 | 49 | # Making the Confusion Matrix 50 | from sklearn.metrics import confusion_matrix 51 | cm = confusion_matrix(y_test, y_pred) 52 | print(cm) 53 | #Accuracy = 91% 54 | 55 | # Decision Tree visualization----------------- 56 | from sklearn import tree 57 | 58 | #Simple Decision Tree 59 | tree.plot_tree(classifier) 60 | #image is quite blurred 61 | 62 | #Lets try to make decision tree more interpretable by adding filling colors. 63 | tree.plot_tree(classifier,filled = True) 64 | #Although the Decision tree shows class name & leafs are colred but still its view is blurred. 65 | 66 | #Lets create a blank chart of desired size using matplotlib library and place our Decision tree there. 67 | import matplotlib.pyplot as plt 68 | fig, axes = plt.subplots(nrows = 1,ncols = 1,figsize = (4,4), dpi=300) 69 | #The above line is used to set the pixels of the Decision Trees nodes so that 70 | #the content mentioned in each node of Decision tree is visible. 71 | cn=['0','1'] 72 | tree.plot_tree(classifier,class_names=cn,filled = True) 73 | 74 | #if you want save figure, use savefig method in returned figure object. 75 | fig.savefig('Skilledge-Python-April-batch.png') 76 | -------------------------------------------------------------------------------- /09.MissingValues.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Sep 2 20:05:47 2021 4 | 5 | @author: Admin 6 | """ 7 | #--------------------------Handling Missing Values---------------------------- 8 | 9 | #Counting the Missing Values--------------------------- 10 | import pandas as pd 11 | import numpy as np 12 | 13 | #Create a DataFrame 14 | df1 = {'Subject':['semester1','semester2','semester3','semester4','semester1', 15 | 'semester2','semester3'], 16 | 'Score':[62,47,np.nan,74,np.nan,77,85]} 17 | 18 | df1 = pd.DataFrame(df1,columns=['Subject','Score']) 19 | print(df1) 20 | 21 | '''Is there any missing values in dataframe ''' 22 | df1.isnull() 23 | df1.notnull() 24 | 25 | '''Is there any missing values across columns''' 26 | df1.isnull().any() 27 | 28 | '''How many missing values are there across each column''' 29 | df1.isnull().sum() 30 | 31 | #Dropping rows with Missing Values----------------------- 32 | 33 | #Create a DataFrame 34 | df1 = {'Name':['George','Andrea','micheal','maggie','Ravi','Xien','Jalpa',np.nan], 35 | 'State':['Arizona','Georgia','Newyork','Indiana','Florida','California',np.nan,np.nan], 36 | 'Gender':["M","F","M","F","M","M",np.nan,np.nan], 37 | 'Score':[63,48,56,75,np.nan,77,np.nan,np.nan]} 38 | 39 | df1 = pd.DataFrame(df1,columns=['Name','State','Gender','Score']) 40 | print(df1) 41 | 42 | #Drop all rows that have any NaN (missing) values 43 | df1.dropna() 44 | 45 | #Drop only if entire row has NaN values 46 | df1.dropna(how='all') 47 | 48 | #Drop only if a row has more than 2 NaN values 49 | df1.dropna(thresh=2) 50 | 51 | #Drop NaN in a specific column 52 | df1.dropna(subset=['Gender']) 53 | df2 = df1.dropna(subset=['Gender','Score']) 54 | df2 55 | #Dropping rows using axis values: 56 | df1 57 | df1.dropna(axis=0) 58 | 59 | #Dropping columns using axis values: 60 | df1.dropna(axis=1) 61 | 62 | #------------------Creating Data Frame Again----------------------------------- 63 | df1 = {'Name':['George','Andrea','micheal','maggie','Ravi','Xien','Jalpa',np.nan], 64 | 'State':['Arizona','Georgia','Newyork','Indiana','Florida','California',np.nan,np.nan], 65 | 'Gender':["M","F","M","F","M","M",np.nan,np.nan], 66 | 'Score':[63,48,56,75,np.nan,77,np.nan,np.nan]} 67 | 68 | df1 = pd.DataFrame(df1,columns=['Name','State','Gender','Score']) 69 | print(df1) 70 | #------------------Replacing Missing Values with Zero-------------------------- 71 | 72 | df1 73 | df1.fillna(0) 74 | 75 | #-----------------Replacing Missing Values with Mean of the column------------- 76 | 77 | df1 78 | df1["Score"].fillna(df1["Score"].mean(),inplace=True) 79 | print(df1) 80 | 81 | #----------------Replacing Missing Value with Median of the column------------- 82 | df1["Score"].fillna(df1["Score"].median(), inplace=True) 83 | print(df1) 84 | 85 | #Replace Missing (or) Generic Values using replace() method 86 | #Many times, we have to replace a generic value with some specific value. 87 | #We can achieve this by applying the replace method. 88 | df = pd.DataFrame({'one':[10,20,30,40,50,2000], 'two':[1000,0,30,40,50,60]}) 89 | print(df) 90 | 91 | print (df.replace({1000:10,2000:60})) 92 | 93 | #------------------Handling Duplicate Values-------------------------------- 94 | 95 | #The drop_duplicates() function performs common data cleaning task that deals with duplicate values 96 | #in the DataFrame. This method helps in removing duplicate values from the DataFrame. 97 | 98 | emp = {"Name": ["Parker", "Smith", "William", "Parker"], 99 | "Age": [21, 32, 29, 21]} 100 | info = pd.DataFrame(emp) 101 | print(info) 102 | info = info.drop_duplicates() 103 | print(info) 104 | 105 | 106 | emp = {"Name": ["Parker", "Smith", "William", "Parker"], 107 | "Age": [21, 32, 29, 22]} 108 | info = pd.DataFrame(emp) 109 | print(info) 110 | info = info.drop_duplicates() 111 | print(info) 112 | -------------------------------------------------------------------------------- /04.Loops.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Aug 24 18:51:52 2021 4 | 5 | @author: 6 | """ 7 | #--------------------------------Loops---------------------------------------- 8 | #-------------For Loop--------------- 9 | # for loop is used in case where we need to execute some part of the code until the given condition 10 | # is satisfied. It is better to use for loop if the number of iteration is known in advance. 11 | #It is frequently used to traverse the data structures like list, tuple, or dictionary. 12 | #Example1: 13 | i=0 14 | for i in range(0,10): 15 | print(i,end =',') 16 | 17 | #Example2:printing the table of the given number 18 | i=1 19 | num = int(input("Enter a number:")) 20 | for i in range(1,11): 21 | print("%a X %a = %a" %(num,i,num*i)) 22 | 23 | #Example3:Nested For loop 24 | n = int(input("Enter the number of rows you want to print?")) 25 | i,j=0,0 26 | for i in range(0,n): 27 | print() 28 | for j in range(0,i+1): 29 | print("*",end="") 30 | 31 | #Exampl4: Else statement with For loop 32 | for i in range(0,5): 33 | print(i) 34 | else:print("for loop completely exhausted, since there is no break.") 35 | 36 | #------------While Loop------------- 37 | # while loop is to be used in the scenario where we don't know the number of iterations in advance. 38 | #The block of statements is executed in the while loop until the condition specified in while loop 39 | #is satisfied. 40 | #Example1: 41 | i=1; 42 | while i<=10: 43 | print(i); 44 | i=i+1; 45 | 46 | #Example2: 47 | i=1 48 | number=0 49 | 50 | number = int(input("Enter the number?")) 51 | while i<=10: 52 | print("%a X %a = %a \n"%(number,i,number*i)); 53 | i = i+1; 54 | 55 | #Example3:Infinite while loop 56 | var = 1 57 | while var != 2: 58 | i = int(input("Enter the number?")) 59 | print ("Entered value is %d"%(i)) 60 | 61 | while (1): 62 | print("Hi! we are inside the infinite while loop"); 63 | 64 | # For loop is ran finite no. of times even if we give only one value 65 | for i in range(0,1): 66 | print("Hi! we are inside the finite for loop"); 67 | 68 | #Example4: Using else with while loop 69 | i=1; 70 | while i<=5: 71 | print(i) 72 | i=i+1; 73 | else:print("The while loop exhausted"); 74 | 75 | #-------------If Statement---------------- 76 | #The if statement is used to test a specific condition. 77 | #If the condition is true, a block of code (if-block) will be executed. 78 | #Exampl1: 79 | num = int(input("enter the number?")) 80 | if num%2 == 0: 81 | print("Number is even") 82 | 83 | #Example2: 84 | a = int(input("Enter a? ")); 85 | b = int(input("Enter b? ")); 86 | c = int(input("Enter c? ")); 87 | if a>b and a>c: 88 | print("a is largest"); 89 | 90 | if b>a and b>c: 91 | print("b is largest"); 92 | 93 | if c>a and c>b: 94 | print("c is largest"); 95 | 96 | #-----------If Else Statement------------- 97 | #If the condition provided in the if statement is false, then the else statement will be executed. 98 | #Example1: 99 | age = int (input("Enter your age? ")) 100 | if age>=18: 101 | print("You are eligible to vote !!"); 102 | else: 103 | print("Sorry! you have to wait !!"); 104 | 105 | #Example2: 106 | num = int (input("enter the number?")) 107 | if num%2 == 0: 108 | print("Number is even...") 109 | else: 110 | print("Number is odd...") 111 | 112 | #-------Elif Statement------------------ 113 | #The elif statement enables us to check multiple conditions and execute the specific block of 114 | #statements depending upon the true condition among them.It works like if-else-if ladder statement. 115 | #Example: 116 | number = int(input("Enter the number?")) 117 | if number==10: 118 | print("number is equals to 10") 119 | elif number==50: 120 | print("number is equal to 50"); 121 | elif number==100: 122 | print("number is equal to 100"); 123 | else: 124 | print("number is not equal to 10, 50 or 100"); 125 | -------------------------------------------------------------------------------- /02.Functions_Basics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Aug 21 15:32:07 2021 4 | 5 | @author: Admin 6 | """ 7 | # -*- coding: utf-8 -*- 8 | """ 9 | 10 | #abs(): Returns the absolute value of a number. 11 | # integer number 12 | 13 | integer = -20 14 | abs(integer) 15 | print('Absolute value of -20 is:', abs(integer)) 16 | 17 | # floating number 18 | 19 | floating = -20.83 20 | print('Absolute value of -20.83 is:', abs(floating)) 21 | 22 | #all(): It returns true if all items passed in iterable object are true. 23 | #Otherwise, it returns False. 24 | #This fxn accepts an iterable object (such as list, dictionary, etc.). 25 | # all values true 26 | 27 | k = [1, 3, 4, 6] 28 | print(all(k)) 29 | 30 | # all values false 31 | 32 | k = [0, False] 33 | print(all(k)) 34 | 35 | # one false value 36 | k = [1, 3, 7, 0] 37 | print(all(k)) 38 | 39 | # empty iterable 40 | k = [] 41 | print(all(k)) 42 | 43 | #------------------------------------------------------------------------------------ 44 | 45 | #bool(): Converts a value to boolean(True or False) 46 | test1 = [] 47 | print(test1,'is',bool(test1)) 48 | 49 | test1 = [0] 50 | print(test1,'is',bool(test1)) 51 | 52 | test1 = None 53 | print(test1,'is',bool(test1)) 54 | 55 | test1 = 'Easy string' 56 | print(test1,'is',bool(test1)) 57 | 58 | #sum(): Used to get the sum of numbers of an iterable, i.e., list. 59 | 60 | list_1 = [1,2,4] 61 | s = sum(list_1) 62 | print(s) 63 | 64 | s = sum(list_1, 10) 65 | print(s) 66 | 67 | #len(): Returns the length (the number of items) of an object. 68 | 69 | strA = 'Python' 70 | print(len(strA)) 71 | 72 | #list() creates a list in python. 73 | # empty list 74 | 75 | Gaurav = list() 76 | print(Gaurav) 77 | 78 | #Converting string to list 79 | String = 'abcde' 80 | print(list(String)) 81 | 82 | #divmod(): Used to get quotient and remainder of two numbers. 83 | #This function takes two numeric arguments and returns a tuple. 84 | #Both arguments are required and numeric 85 | # Calling function 86 | result = divmod(10,2) 87 | # Displaying result 88 | print(result) 89 | 90 | #dict(): Its a constructor which creates a dictionary. 91 | # Calling function 92 | result = dict() # returns an empty dictionary 93 | print(result) 94 | 95 | result2 = dict(a=1,b=2) 96 | # Displaying result 97 | print(result2) 98 | 99 | #set(): It is used to create a new set using elements passed during the call. 100 | #It takes an iterable object as an argument and returns a new set object. 101 | # Calling function 102 | result = set() # empty set 103 | result2 = set('12') 104 | result3 = set('javatpoint') 105 | result4 = {1,2} 106 | print (result4) 107 | # Displaying result 108 | print(result) 109 | print(result2) 110 | print(result3) 111 | 112 | #pow(): Used to compute the power of a number. 113 | # positive x, positive y (x**y) 114 | print(pow(4, 2)) 115 | 116 | # negative x, positive y 117 | print(pow(-4, 2)) 118 | 119 | #tuple(): Used to create a tuple object. 120 | t1 = tuple() 121 | print('t1=', t1) 122 | 123 | # creating a tuple from a list 124 | l = [1, 6, 9] 125 | t2 = tuple(l) 126 | print('t2=', t2) 127 | 128 | # creating a tuple from a string 129 | t1 = tuple('Java') 130 | print('t1=',t1) 131 | 132 | #---------------------------------------------------------------------- 133 | #lambda()- Helps creating anonymous functions. 134 | #Lambda functions can accept any number of arguments, 135 | #but they can return only one value in the form of expression. 136 | 137 | #Multiple arguments to Lambda function 138 | x = lambda a,b:a+b 139 | # a and b are the arguments and a+b is the expression which gets evaluated and returned. 140 | print("Addition = ",x(20,10)) 141 | 142 | #Program to filter out the list which contains numbers divisible by 3. 143 | List = [1,2,3,4,10,123,22] 144 | Oddlist = list(filter(lambda x:(x%3 == 0),List)) 145 | # the list contains all the items of the list for which the lambda function evaluates to true 146 | print(Oddlist) 147 | 148 | #program to triple each number of the list using map 149 | List = [1,2,3,4,10,123,22] 150 | new_list = list(map(lambda x:x*3,List)) 151 | # this will return the triple of each item of the list and add it to new_list 152 | print(new_list) 153 | -------------------------------------------------------------------------------- /01.DataStructures.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat April 4 01:46:06 2020 4 | 5 | @author: Admin 6 | """ 7 | 8 | print("hello world") 9 | 10 | #Numbers 11 | #a=3 , b=5 #a and b are number objects 12 | 13 | #String 14 | str1 = 'Hello Students' #string str1 15 | str2 = ' how are you' #string str2 16 | str1 17 | str2 18 | print (str1[0:5]) #printing first five character using slice operator 19 | (str1[0:5]) 20 | print (str1[4]) #printing 5th character of the string 21 | print (str1*2) #printing the string twice 22 | print (str1 + str2) #printing the concatenation of str1 and str2 23 | 24 | #Lists 25 | l = [1, "hi", "python", True] 26 | print (l[3:]) 27 | print (l[0:2]) 28 | print (l) 29 | print (l + l) 30 | print (l * 3) 31 | print (type(l)) 32 | #Lets try mutation 33 | l[1] = "Bye" 34 | print (l) 35 | 36 | #Tuple 37 | t = ('hi', 'python', 2, 4) 38 | t 39 | print (t[1:]); 40 | print (t[0:3]); 41 | print (t); 42 | print (t + t) 43 | print (t * 3) 44 | print (type(t)) 45 | #Lets try mutation 46 | t[1] = "Bye" 47 | print (t) 48 | 49 | #Dictionary 50 | d = {1:"Jimmy", 2:'Alex', 3:'john', 4:'mike'} 51 | d 52 | print("1st name is "+d[1]) 53 | print("2nd name is "+ d[4]) 54 | print (d); 55 | print (d.keys()); 56 | print (d.values()); 57 | 58 | #----ADVANCED---- 59 | #list 60 | #ordered collection of items; sequence of items in a list 61 | shoplist =['apple','carrot','mango', 'banana'] 62 | shoplist 63 | len(shoplist) 64 | print(shoplist) 65 | 66 | #add item to list 67 | shoplist.append('rice') 68 | shoplist 69 | 70 | #sort 71 | shoplist.sort() #inplace sort 72 | shoplist 73 | 74 | #index/select 75 | shoplist[0] 76 | shoplist[0:4] 77 | 78 | #delete item 79 | del shoplist[0] 80 | shoplist 81 | 82 | #Tuple 83 | #Used to hold multiple object; similar to lists; less functionality than list 84 | #immutable - cannot modify- fast ; ( ) 85 | zoo = ('python','lion','elephant','bird') 86 | zoo 87 | len(zoo) 88 | languages = 'c', 'java', 'php' , 1 #better to put (), this also works 89 | languages 90 | type(languages) 91 | 92 | #Dictionary - like an addressbook. use of associate keys with values 93 | #key-value pairs { 'key1':value1, 'key2':value2} ; { } bracket, :colon 94 | 95 | student = {'A101': 'Abhinav', 'A102': 'Ravi', 'A103':'Prafull', 'A104': 'Karan'} 96 | student 97 | student['A103'] 98 | print('Name of rollno A103 is ' +student['A103']) 99 | del student['A104'] 100 | student 101 | len(student) 102 | 103 | #for rollno, name in student.items(): 104 | #print('name of {} is {} '.format(rollno, name) ) 105 | 106 | #Lets test Mutation: 107 | #adding a value 108 | student['A104'] = 'Hitesh' 109 | student 110 | 111 | #Set 112 | Anubhav = {1,2,3,4,5} 113 | Anubhav 114 | Aman_1 = set() 115 | Aman_1 116 | 117 | #Sets are unordered collections of objects; ( [ , ]) 118 | teamA = set(['india','england','australia','sri lanka','ireland']) 119 | teamA 120 | teamB = set(['pakistan', 'south africa','bangladesh','ireland']) 121 | teamB 122 | 123 | #Checking whether a data value exists in a set or not. 124 | 'india' in teamA 125 | 'india' in teamB 126 | 127 | #Adding values in a set 128 | teamA.add('China') 129 | teamA #puts in order 130 | teamA.add('india') 131 | teamA #no duplicates 132 | teamA.remove('australia') 133 | teamA 134 | 135 | #Create dataframe : 136 | import pandas as pd 137 | 138 | #Create a DataFrame 139 | d = {'Name':['Alisa','Bobby','Cathrine','Alisa','Bobby','Cathrine', 140 | 'Alisa','Bobby','Cathrine','Alisa','Bobby','Cathrine'], 141 | 'Exam':['Semester 1','Semester 1','Semester 1','Semester 1','Semester 1','Semester 1', 142 | 'Semester 2','Semester 2','Semester 2','Semester 2','Semester 2','Semester 2'], 143 | 'Subject':['Mathematics','Mathematics','Mathematics','Science','Science','Science', 144 | 'Mathematics','Mathematics','Mathematics','Science','Science','Science'], 145 | 'Score':[62,47,55,74,31,77,85,63,42,67,89,81]} 146 | 147 | d 148 | 149 | df = pd.DataFrame(d,columns=['Name','Exam','Subject','Score']) 150 | df 151 | 152 | #View a column of the dataframe in pandas: 153 | df['Name'] 154 | 155 | #View two columns of the dataframe in pandas: 156 | df[['Name','Score','Exam']] 157 | 158 | #View first two rows of the dataframe in pandas: 159 | df[0:2] 160 | 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /19.MarketBasketAnalysis_AprioriAlgo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Oct 26 14:46:14 2021 4 | 5 | @author: Admin 6 | """ 7 | #Import Libraries---------- 8 | import numpy as np 9 | import pandas as pd 10 | from mlxtend.frequent_patterns import apriori, association_rules 11 | 12 | #Loading and exploring the data----------------- 13 | #Loading the Data 14 | data = pd.read_excel('Online_Retail_Store.xlsx') 15 | data.head() 16 | data.info() 17 | # Exploring the columns of the data 18 | data.columns 19 | # Exploring the different regions of transactions 20 | data.Country.unique() 21 | 22 | #Cleaning the Data----------------- 23 | #Identifying missing values: 24 | '''Is there any missing values across columns''' 25 | data.isnull().any() 26 | 27 | '''How many missing values are there across each column''' 28 | data.isnull().sum() 29 | 30 | # Dropping the rows without any invoice number 31 | data.dropna(axis = 0, subset =['InvoiceNo'], inplace = True) 32 | data.isnull().sum() 33 | 34 | # Dropping all transactions which were done on credit ('C') 35 | data.info() 36 | data = data[~data['InvoiceNo'].str.contains('C')] 37 | #For the above cmd to work, we need to ensure that we convert Column "Invoinve No." to string form. 38 | data['InvoiceNo'] = data['InvoiceNo'].astype('str') 39 | data = data[~data['InvoiceNo'].str.contains('C')] 40 | #Hence, now we have been able to remove the rows with credit (C) type billing. 41 | 42 | # Stripping extra spaces in the description 43 | data['Description'] = data['Description'].str.strip() 44 | 45 | #Splitting the data according to the region of transaction------- 46 | # Transactions done in France 47 | basket_France = (data[data['Country'] =="France"] 48 | .groupby(['InvoiceNo', 'Description'])['Quantity'] 49 | .sum().unstack().reset_index() 50 | .fillna(0) 51 | .set_index('InvoiceNo')) 52 | 53 | # Transactions done in the United Kingdom 54 | basket_UK = (data[data['Country'] =="United Kingdom"] 55 | .groupby(['InvoiceNo', 'Description'])['Quantity'] 56 | .sum().unstack().reset_index().fillna(0) 57 | .set_index('InvoiceNo')) 58 | 59 | # Transactions done in Portugal 60 | basket_Por = (data[data['Country'] =="Portugal"] 61 | .groupby(['InvoiceNo', 'Description'])['Quantity'] 62 | .sum().unstack().reset_index().fillna(0) 63 | .set_index('InvoiceNo')) 64 | 65 | basket_Sweden = (data[data['Country'] =="Sweden"] 66 | .groupby(['InvoiceNo', 'Description'])['Quantity'] 67 | .sum().unstack().reset_index().fillna(0) 68 | .set_index('InvoiceNo')) 69 | 70 | #Hot encoding the Data------------ 71 | # Defining the hot encoding function to make the data suitable 72 | # for the concerned libraries 73 | def hot_encode(x): 74 | if(x<= 0): 75 | return 0 76 | if(x>= 1): 77 | return 1 78 | 79 | # Encoding the datasets 80 | basket_encoded = basket_France.applymap(hot_encode) 81 | basket_France = basket_encoded 82 | 83 | basket_encoded = basket_UK.applymap(hot_encode) 84 | basket_UK = basket_encoded 85 | 86 | basket_encoded = basket_Por.applymap(hot_encode) 87 | basket_Por = basket_encoded 88 | 89 | basket_encoded = basket_Sweden.applymap(hot_encode) 90 | basket_Sweden = basket_encoded 91 | 92 | #Building the models and analyzing the results----------------- 93 | 94 | #France: 95 | # Building the model 96 | frq_items = apriori(basket_France, min_support = 0.15, use_colnames = True) 97 | frq_items 98 | 99 | # Collecting the inferred rules in a dataframe 100 | rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 101 | print(rules.head()) 102 | France_rules=pd.DataFrame(rules) 103 | 104 | #Portugal 105 | frq_items = apriori(basket_Por, min_support = 0.15, use_colnames = True) 106 | rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 107 | print(rules.head()) 108 | Portugal_rules=pd.DataFrame(rules) 109 | 110 | #Sweden 111 | frq_items = apriori(basket_Sweden, min_support = 0.10, use_colnames = True) 112 | rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 113 | print(rules.head()) 114 | Sweden_rules=pd.DataFrame(rules) 115 | 116 | #UK 117 | frq_items = apriori(basket_UK, min_support = 0.09, use_colnames = True) 118 | rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 119 | print(rules.head()) 120 | UK_rules=pd.DataFrame(rules) 121 | 122 | #Here Empty DataFrame signifies that none of the Rules in UK satisfy the levels mentioned for 123 | #Support & Lift in above freq items sets -------------------------------------------------------------------------------- /13.multiple_linear_regression_BackwardElimination.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Sep 21 18:49:36 2021 4 | 5 | @author: Admin 6 | """ 7 | # Multiple Linear Regression 8 | 9 | # Importing the libraries 10 | 11 | 'import matplotlib.pyplot as plt' 12 | import pandas as pd 13 | 14 | # Importing the dataset 15 | dataset = pd.read_csv('D:\SkillEdge\Python\Final\Codes\pyData/50_Startups.csv') 16 | 17 | #Method-1 (Handling Categorical Variables) 18 | pd.get_dummies(dataset["State"]) 19 | pd.get_dummies(dataset["State"],drop_first=True) 20 | S_Dummy = pd.get_dummies(dataset["State"],drop_first=True) 21 | S_Dummy.head(5) 22 | #Now, lets concatenate these dummy var columns in our dataset. 23 | dataset = pd.concat([dataset,S_Dummy],axis=1) 24 | dataset.head(5) 25 | #dropping the columns whose dummy var have been created 26 | dataset.drop(["State",],axis=1,inplace=True) 27 | dataset.head(5) 28 | #------------------------------------------------------------------------------ 29 | 30 | #Obtaining DV & IV from the dataset 31 | X = dataset.iloc[:,[0,1,2,4,5]].values 32 | y = dataset.iloc[:,3].values 33 | 34 | # Splitting the dataset into the Training set and Test set 35 | from sklearn.model_selection import train_test_split 36 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 37 | 38 | 39 | # Fitting Multiple Linear Regression to the Training set 40 | from sklearn.linear_model import LinearRegression 41 | regressor = LinearRegression() 42 | regressor.fit(X_train, y_train) 43 | 44 | # Predicting the Test set results 45 | y_pred = regressor.predict(X_test) 46 | 47 | # Accuracy of the model 48 | 49 | #Calculating the r squared value: 50 | from sklearn.metrics import r2_score 51 | r2_score(y_test,y_pred) 52 | 53 | #Coefficient 54 | regressor.coef_ 55 | 56 | # Intercept 57 | regressor.intercept_ 58 | 59 | #The above score tells that our model is 93% accurate with the test dataset. 60 | 61 | #--------------------------Backward Elimination-------------------------------- 62 | #Backward elimination is a feature selection technique while building a machine learning model. It is used 63 | #to remove those features that do not have significant effect on dependent variable or prediction of output. 64 | 65 | #Step: 1- Preparation of Backward Elimination: 66 | 67 | #Importing the library: 68 | import statsmodels.api as sm 69 | 70 | #Adding a column in matrix of features: 71 | import numpy as nm 72 | X = nm.append(arr = nm.ones((50,1)).astype(int), values=X, axis=1) 73 | 74 | #Applying backward elimination process now 75 | #Firstly we will create a new feature vector x_opt, which will only contain a set of 76 | #independent features that are significantly affecting the dependent variable. 77 | x_opt=X[:, [ 0,1,2,3,4,5]] 78 | 79 | #for fitting the model, we will create a regressor_OLS object of new class OLS of 80 | #statsmodels library. Then we will fit it by using the fit() method. 81 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit() 82 | 83 | #We will use summary() method to get the summary table of all the variables. 84 | regressor_OLS.summary() 85 | 86 | #In the above summary table, we can clearly see the p-values of all the variables. 87 | #Here x1, x2 are dummy variables, x3 is R&D spend, x4 is Administration spend, and x5 is Marketing spend. 88 | 89 | #Now since x5 has highest p-value greater than 0.05, hence, will remove the x1 variable 90 | #(dummy variable) from the table and will refit the model. 91 | x_opt= X[:, [0,1,2,3,4]] 92 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit() 93 | regressor_OLS.summary() 94 | 95 | #Now since x4 has highest p-value greater than 0.05, hence, will remove the x4 variable 96 | #(dummy variable) from the table and will refit the model. 97 | x_opt= X[:, [0,1,2,3]] 98 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit() 99 | regressor_OLS.summary() 100 | 101 | #Now we will remove the Admin spend (x2) which is having .602 p-value and 102 | # again refit the model. 103 | x_opt= X[:, [0,1,3]] 104 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit() 105 | regressor_OLS.summary() 106 | 107 | #Finally, we will remove one more variable, which has .60 p-value for marketing spend, 108 | #that is more than significant level value of 0.05 109 | x_opt= X[:, [0,1]] 110 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit() 111 | regressor_OLS.summary() 112 | 113 | #Hence,only R&D independent variable is a significant variable for the prediction. 114 | #So we can now predict efficiently using this variable. 115 | 116 | #----------Building Multiple Regression model by only using R&D spend:----------------- 117 | #importing datasets 118 | data_set= pd.read_csv('F:/WORK/pyWork/pyData/50_Startups.csv') 119 | #Extracting Independent and dependent Variable 120 | x_BE= data_set.iloc[:,:-4].values 121 | y_BE= data_set.iloc[:,4].values 122 | # Splitting the dataset into training and test set. 123 | from sklearn.model_selection import train_test_split 124 | x_BE_train, x_BE_test, y_BE_train, y_BE_test= train_test_split(x_BE, y_BE, test_size= 0.2, random_state=0) 125 | 126 | #Fitting the MLR model to the training set: 127 | from sklearn.linear_model import LinearRegression 128 | regressor= LinearRegression() 129 | regressor.fit(x_BE_train, y_BE_train) 130 | 131 | #Predicting the Test set result; 132 | y_pred= regressor.predict(x_BE_test) 133 | 134 | #Cheking the score 135 | #Calculating the r squared value: 136 | from sklearn.metrics import r2_score 137 | r2_score(y_BE_test,y_pred) 138 | #The above score tells that our model is now more accurate with the test dataset with 139 | #accuracy equal to 95% 140 | 141 | #Calculating the coefficients: 142 | print(regressor.coef_) 143 | 144 | #Calculating the intercept: 145 | print(regressor.intercept_) 146 | 147 | #Regression Eq'n: Profit = 48416 + 0.85*R&D_Spend 148 | © 2021 GitHub, Inc. 149 | -------------------------------------------------------------------------------- /10.Graphs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Sep 5 17:32:15 2021 4 | 5 | @author: Admin 6 | """ 7 | import matplotlib.pyplot as plt 8 | #-----------------------------------GRAPHS--------------------------------- 9 | 10 | #--------------------------Bar Chart------------------------------------------ 11 | #Vertical Bar Chart 12 | import numpy as np 13 | 14 | city=['Delhi','Beijing','Washington','Tokyo','Moscow'] 15 | Happiness_Index=[60,40,70,65,85] 16 | 17 | plt.bar(city,Happiness_Index,color='pink',edgecolor='red') 18 | plt.xlabel('City', fontsize=16) 19 | plt.ylabel('Happiness_Index', fontsize=16) 20 | plt.title('Barchart - Happiness index across cities',fontsize=20) 21 | 22 | #Horizontal Bar Chart 23 | 24 | city=['Delhi','Beijing','Washington','Tokyo','Moscow'] 25 | Happiness_Index=[60,40,70,65,85] 26 | 27 | plt.barh(city,Happiness_Index,color='blue',edgecolor='black') 28 | plt.xlabel('Happiness_Index', fontsize=16) 29 | plt.ylabel('City', fontsize=16) 30 | plt.title('Horizontal Barchart - Happiness index across cities',fontsize=20) 31 | 32 | #Stacked Bar Chart in Python with legends: 33 | 34 | city=['Delhi','Beijing','Washington','Tokyo','Moscow'] 35 | Gender=['Male','Female'] 36 | Happiness_Index_Male=[60,40,70,65,85] 37 | Happiness_Index_Female=[30,60,70,55,75] 38 | 39 | plt.bar(city,Happiness_Index_Male,color='blue',edgecolor='black') 40 | plt.bar(city,Happiness_Index_Female,color='pink',edgecolor='black',bottom=Happiness_Index_Male) 41 | #bar() function plots the Happiness_Index_Female on top of Happiness_Index_Male with the help of 42 | #argument bottom=Happiness_Index_Male. 43 | plt.xlabel('City', fontsize=16) 44 | plt.ylabel('Happiness_Index', fontsize=16) 45 | plt.title('Stacked Barchart - Happiness index across cities',fontsize=18) 46 | plt.legend(Gender,loc=2) 47 | 48 | #--------------------------Histogram------------------------------------------- 49 | #Histogram with no Fills: 50 | 51 | values = [82,76,24,40,67,62,75,78,71,32,98,89,78,67,72,82,87,66,56,52] 52 | plt.hist(values,5, histtype='step', align='mid', color='green', label='Test Score Data') 53 | #Here, second argument is the number of bins, 54 | #histype=’step’: it plots the histogram in step, 55 | #format, aligned to mid, color chosen is green. 56 | plt.legend(loc=2) 57 | #argument loc=2 plots the legend on the top left corner. 58 | plt.title('Histogram of score') 59 | 60 | #Histogram with bar Filled: 61 | 62 | values = [82,76,24,40,67,62,75,78,71,32,98,89,78,67,72,82,87,66,56,52] 63 | plt.hist(values,10, histtype='bar', color='cyan', label='Test score Data',edgecolor='black') 64 | #Argument histype=’bar’ plots the histogram in bar filled format. 65 | plt.legend() 66 | plt.title('Histogram of score') 67 | 68 | #----------------------------Box Plot------------------------------------------ 69 | #Box Plot 70 | 71 | value1=[82,76,24,40,67,62,75,78,71,32,98,89,78,67,72,82,87,66,56,52] 72 | value2=[62,5,91,25,36,32,96,95,3,90,95,32,27,55,100,15,71,11,37,21] 73 | value3=[23,89,12,78,72,89,25,69,68,86,19,49,15,16,16,75,65,31,25,52] 74 | value4=[59,73,70,16,81,61,88,98,10,87,29,72,16,23,72,88,78,99,75,30] 75 | 76 | box_plot_data=[value1,value2,value3,value4] 77 | plt.boxplot(box_plot_data) 78 | 79 | #Box plot with fills and labels: 80 | 81 | value1 = [82,76,24,40,67,62,75,78,71,32,98,89,78,67,72,82,87,66,56,52] 82 | value2=[62,5,91,25,36,32,96,95,3,90,95,32,27,55,100,15,71,11,37,21] 83 | value3=[23,89,12,78,72,89,25,69,68,86,19,49,15,16,16,75,65,31,25,52] 84 | value4=[59,73,70,16,81,61,88,98,10,87,29,72,16,23,72,88,78,99,75,30] 85 | 86 | box_plot_data=[value1,value2,value3,value4] 87 | plt.boxplot(box_plot_data,patch_artist=True,labels=['course1','course2','course3','course4']) 88 | #argument "patch_artist=True", fills the boxplot and argument "label" takes label to be plotted. 89 | 90 | #Horizontal box plot in python with different colors: 91 | 92 | value1 = [82,76,24,40,67,62,75,78,71,32,98,89,78,67,72,82,87,66,56,52] 93 | value2=[62,5,91,25,36,32,96,95,3,90,95,32,27,55,100,15,71,11,37,21] 94 | value3=[23,89,12,78,72,89,25,69,68,86,19,49,15,16,16,75,65,31,25,52] 95 | value4=[59,73,70,16,81,61,88,98,10,87,29,72,16,23,72,88,78,99,75,30] 96 | 97 | box_plot_data=[value1,value2,value3,value4] 98 | box=plt.boxplot(box_plot_data,vert=0,patch_artist=True, 99 | labels=['course1','course2','course3','course4'],) 100 | #Adding argument vert =0 plots the horizontal box plot. 101 | colors = ['cyan', 'lightblue', 'lightgreen', 'tan'] 102 | for patch, color in zip(box['boxes'], colors): 103 | patch.set_facecolor(color) 104 | #Colors array takes four different colors and passes them to four different boxes of the boxplot 105 | #with patch.set_facecolor() function. 106 | #-------------------Line plot or Line chart -------------------- 107 | 108 | values = [1, 5, 8, 9, 7, 11, 8, 12, 14, 9] 109 | plt.plot(values) 110 | 111 | 112 | #Multiple Line charts with legends and Labels: 113 | #lets take an example of sale of units in 2016 and 2017 to demonstrate line charts. 114 | 115 | sales1 = [1, 5, 8, 9, 7, 11, 8, 12, 14, 9, 5] 116 | sales2 = [3, 7, 9, 6, 4, 5, 14, 7, 6, 16, 12] 117 | line_chart1 = plt.plot( sales1,range(1,12)) 118 | line_chart2 = plt.plot( sales2,range(1,12)) 119 | plt.title('Monthly sales of 2016 and 2017') 120 | plt.xlabel('Sales') 121 | plt.ylabel('Month') 122 | plt.legend(['year 2016', 'year 2017'], loc=4) 123 | 124 | 125 | #Charts with different line styles: 126 | 127 | sales1 = [1, 5, 8, 9, 7, 11, 8, 12, 14, 9, 5] 128 | sales2 = [3, 7, 9, 6, 4, 5, 14, 7, 6, 16, 12] 129 | line_chart1 = plt.plot(range(1,12), sales1,'--') 130 | line_chart2 = plt.plot(range(1,12), sales2,':') 131 | plt.title('Monthly sales of 2016 and 2017') 132 | 133 | 134 | #---------------------Pie Chart-------------------------------------------- 135 | #Pie chart in Python with legends: 136 | 137 | values = [60, 80, 90, 55, 10, 30] 138 | Col = ['b', 'g', 'r', 'c', 'm', 'y'] 139 | labels = ['US', 'UK', 'India', 'Germany', 'Australia', 'South Korea'] 140 | Exp = (0.5, 0, 0, 0, 0, 0) 141 | plt.pie(values, colors=Col, labels= values,explode=Exp,counterclock=False, shadow=True) 142 | plt.title('Population Density Index') 143 | plt.legend(labels,loc=3) 144 | 145 | #Pie chart in Python with percentage values: 146 | 147 | values = [60, 80, 90, 55, 10, 30] 148 | colors = ['b', 'g', 'r', 'c', 'm', 'y'] 149 | labels = ['US', 'UK', 'India', 'Germany', 'Australia', 'South Korea'] 150 | explode = (0.2, 0, 0, 0, 0, 0) 151 | plt.pie(values, colors=colors, labels=labels, 152 | explode=explode, autopct='%1.1f%%', shadow=True) 153 | plt.title('Population Density Index') 154 | 155 | #-------------------------------Scatter Plot---------------------------------- 156 | # Scatter plot in Python: 157 | 158 | weight1=[63.3,57,64.3,63,71,61.8,62.9,65.6,64.8,63.1,68.3,69.7,65.4,66.3,60.7] 159 | height1=[156.3,100.7,114.8,156.3,237.1,123.9,151.8,164.7,105.4,136.1,175.2,137.4,164.2,151,124.3] 160 | plt.scatter(weight1,height1,c='r',marker='*') 161 | plt.xlabel('weight', fontsize=16) 162 | plt.ylabel('height', fontsize=16) 163 | plt.title('scatter plot - height vs weight',fontsize=20) 164 | 165 | #Scatter plot for three different groups 166 | 167 | weight1=[57,58.2,58.6,59.6,59.8,60.2,60.5,60.6,60.7,61.3,61.3,61.4,61.8,61.9,62.3] 168 | height1=[100.7,195.6,94.3,127.1,111.7,159.7,135,149.9,124.3,112.9,176.7,110.2,123.9,161.9,107.8] 169 | 170 | weight2=[62.9,63,63.1,63.2,63.3,63.4,63.4,63.4,63.5,63.6,63.7,64.1,64.3,64.3,64.7,64.8,65] 171 | height2=[151.8,156.3,136.1,124.2,156.3,130,181.2,255.9,163.1,123.1,119.5,179.9,114.8,174.1,108.8,105.4,141.4] 172 | 173 | 174 | weight3=[69.2,69.2,69.4,69.7,70,70.3,70.8,71,71.1,71.7,71.9,72.4,73,73.1,76.2] 175 | height3=[166.8,172.9,193.8,137.4,162.4,137.1,169.1,237.1,189.1,179.3,174.8,213.3,198,191.1,220.6] 176 | 177 | import numpy as np 178 | weight=np.concatenate((weight1,weight2,weight3)) 179 | height=np.concatenate((height1,height2,height3)) 180 | 181 | color_array = ['b'] * 15 + ['g'] * 17 + ['r'] * 15 182 | 183 | plt.scatter(weight, height, marker='*', c=color_array) 184 | 185 | plt.xlabel('weight', fontsize=16) 186 | plt.ylabel('height', fontsize=16) 187 | plt.title('grouped scatter plot - height vs weight',fontsize=20) 188 | -------------------------------------------------------------------------------- /14.logistic_regression.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Sep 30 19:49:04 2021 4 | 5 | @author: Admin 6 | """ 7 | # Logistic Regression 8 | 9 | #-------------Logistic Regression------------------------------ 10 | #Import Libraries 11 | import pandas as pd 12 | import seaborn as sns 13 | import matplotlib.pyplot as plt 14 | 15 | 16 | #Import data 17 | titanic_data = pd.read_csv("D:\SkillEdge\Python\Final\Codes\pyData/titanic.csv") 18 | titanic_data.head(5) 19 | titanic_data.tail(5) 20 | 21 | print("No. of passengers in original dataset:" +str(len(titanic_data.index))) 22 | 23 | #Analyzing Data 24 | sns.countplot(x="survived",data=titanic_data) 25 | 26 | sns.countplot(x="survived",hue="sex",data=titanic_data) 27 | 28 | sns.countplot(x="survived",hue="pclass",data=titanic_data) 29 | 30 | #CHECKING DATA TYPE OF A VARIABLE AND CONVERTING IT INTO ANOTHER TYPE----- 31 | titanic_data.info() 32 | titanic_data["age"].plot.hist() 33 | plt.hist(titanic_data["age"]) 34 | 35 | 36 | #Converting var "age" from object type to float type 37 | titanic_data["age"] = pd.to_numeric(titanic_data.age, errors='coerce') 38 | titanic_data.info() 39 | #Parameter: errors = 'coerce' in above fxn, replaces missing values (like "?") if any 40 | #in "age" column by "nan" values. 41 | 42 | titanic_data["age"].plot.hist() 43 | 44 | #Converting var "fare" from object type to float type 45 | titanic_data["fare"] = pd.to_numeric(titanic_data.fare, errors='coerce') 46 | titanic_data.info() 47 | #Parameter: errors = 'coerce' in above fxn, replaces missing values (like "?") if any 48 | #in "fare" column by "nan" values. 49 | 50 | titanic_data["fare"].plot.hist() 51 | 52 | #Identifying/Finding missing values if any---- 53 | titanic_data.isnull() 54 | titanic_data.isnull().sum() 55 | 56 | sns.heatmap(titanic_data.isnull(),yticklabels=False, cmap="viridis") 57 | 58 | #Note: 59 | #Since missing values in "fare" are quite less, we can delete such rows. 60 | #Since missing values in "age" are high, its better we do imputation in it. 61 | 62 | sns.boxplot(x="age",data=titanic_data) 63 | sns.boxplot(x="fare",data=titanic_data) 64 | 65 | #By boxplot we observe that the no. of outliers in "age" are quite less, hence, 66 | #if we plan to do imputation in "age" we can do it by "mean" imputation. 67 | 68 | #Handling Missing Values------------ 69 | titanic_data.head(5) 70 | 71 | #Droping all the rows which have a missing value in column (Fare) 72 | #Drop NaN in a specific column 73 | titanic_data.dropna(subset=['fare'],inplace=True) 74 | sns.heatmap(titanic_data.isnull(),yticklabels=False) 75 | 76 | #Imputing missing values in column (Age) with mean imputation 77 | titanic_data["age"].fillna(titanic_data["age"].mean(), inplace=True) 78 | sns.heatmap(titanic_data.isnull(),yticklabels=False) 79 | 80 | #Hence, we do not have any missing values in the dataset now. 81 | titanic_data.isnull().sum() 82 | 83 | #Note: 84 | #A Heat map is usually drawn for either continuous of categorical var 85 | #Lets take few cont var columns and draw the heat map 86 | #Cont = titanic_data[:,[5,6,7]] 87 | #sns.heatmap(Cont) 88 | 89 | #There are lot of string value var in dataset which have to be converted to numerical 90 | #values for applying machine learing algoritm. Hence, we will now convert string var 91 | #to numerical var. 92 | titanic_data.info() 93 | pd.get_dummies(titanic_data["sex"]) 94 | 95 | pd.get_dummies(titanic_data["sex"],drop_first=True) 96 | 97 | Sex_Dummy = pd.get_dummies(titanic_data["sex"],drop_first=True) 98 | Sex_Dummy.head(5) 99 | 100 | pd.get_dummies(titanic_data["embarked"]) 101 | Embardked_Dummy = pd.get_dummies(titanic_data["embarked"],drop_first=True) 102 | Embardked_Dummy.head(5) 103 | 104 | pd.get_dummies(titanic_data["pclass"]) 105 | PClass_Dummy = pd.get_dummies(titanic_data["pclass"],drop_first=True) 106 | PClass_Dummy.head(5) 107 | 108 | #Now, lets concatenate these dummy var columns in our dataset. 109 | titanic_data = pd.concat([titanic_data,Sex_Dummy,PClass_Dummy,Embardked_Dummy],axis=1) 110 | titanic_data.head(5) 111 | 112 | #dropping the columns whose dummy var have been created 113 | titanic_data.drop(["sex","embarked","pclass","Passenger_id","name","ticket"],axis=1,inplace=True) 114 | titanic_data.head(5) 115 | 116 | #Splitting the dataset into Train & Test dataset 117 | x=titanic_data.drop("survived",axis=1) 118 | y=titanic_data["survived"] 119 | 120 | from sklearn.model_selection import train_test_split 121 | X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 0) 122 | 123 | # Fitting Logistic Regression to the Training set 124 | from sklearn.linear_model import LogisticRegression 125 | help(LogisticRegression()) 126 | logmodel = LogisticRegression(solver='liblinear') #It is the default solver for Scikit-learn versions earlier than 0.22.0. 127 | logmodel.fit(X_train, y_train) 128 | 129 | predictions = logmodel.predict(X_test) 130 | 131 | from sklearn.metrics import confusion_matrix 132 | confusion_matrix(y_test,predictions) 133 | 134 | confusion_matrix(predictions,y_test) 135 | 136 | #Hence, accuracy = (165+84)\(165+84+30+44) = 77.5% 137 | 138 | #Calculating the coefficients: 139 | print(logmodel.coef_) 140 | 141 | #Calculating the intercept: 142 | print(logmodel.intercept_) 143 | 144 | #----To Improve the accuracy of the model, lets go with Backward ELimination Method & 145 | # rebuild the logisitc model again with few independent variables-------- 146 | titanic_data_1 = titanic_data 147 | titanic_data_1.head(5) 148 | 149 | #--------------------------Backward Elimination-------------------------------- 150 | #Backward elimination is a feature selection technique while building a machine learning model. It is used 151 | #to remove those features that do not have significant effect on dependent variable or prediction of output. 152 | 153 | #Step: 1- Preation of Backward Elimination: 154 | #Importing the library: 155 | import statsmodels.api as sm 156 | 157 | #Adding a column in matrix of features: 158 | x1=titanic_data_1.drop("survived",axis=1) 159 | y1=titanic_data_1["survived"] 160 | import numpy as nm 161 | x1 = nm.append(arr = nm.ones((1291,1)).astype(int), values=x1, axis=1) 162 | 163 | #Applying backward elimination process now 164 | #Firstly we will create a new feature vector x_opt, which will only contain a set of 165 | #independent features that are significantly affecting the dependent variable. 166 | x_opt= x1[:, [0,1,2,3,4,5,6,7,8,9,10]] 167 | 168 | #for fitting the model, we will create a regressor_OLS object of new class OLS of statsmodels library. 169 | #Then we will fit it by using the fit() method. 170 | regressor_OLS=sm.OLS(endog = y1, exog=x_opt).fit() 171 | 172 | #We will use summary() method to get the summary table of all the variables. 173 | regressor_OLS.summary() 174 | 175 | #In the above summary table, we can clearly see the p-values of all the variables. 176 | #And remove the ind var with p-value greater than 0.05 177 | x_opt= x1[:, [0,1,2,4,5,6,7,8,9,10]] 178 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit() 179 | regressor_OLS.summary() 180 | 181 | x_opt= x1[:, [0,1,2,4,5,6,7,9,10]] 182 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit() 183 | regressor_OLS.summary() 184 | 185 | x_opt= x1[:, [0,1,2,5,6,7,9,10]] 186 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit() 187 | regressor_OLS.summary() 188 | 189 | x_opt= x1[:, [0,1,2,5,6,7,10]] 190 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit() 191 | regressor_OLS.summary() 192 | #Hence,independent var - age, sibsp, sex, pclass & embarked are significant variable 193 | #for the predicting the value of Dependent Var "survived". 194 | #So we can now predict efficiently using these variables. 195 | 196 | #-------Building Logistic Regression model using ind var: age, sibsip, sex, pclass & embarked-------- 197 | # Splitting the dataset into training and test set. 198 | from sklearn.model_selection import train_test_split 199 | x_BE_train, x_BE_test, y_BE_train, y_BE_test= train_test_split(x_opt, y1, test_size= 0.25, random_state=0) 200 | 201 | # Fitting Logistic Regression to the Training set 202 | from sklearn.linear_model import LogisticRegression 203 | logmodel = LogisticRegression(solver='liblinear') 204 | logmodel.fit(x_BE_train, y_BE_train) 205 | 206 | predictions = logmodel.predict(x_BE_test) 207 | 208 | from sklearn.metrics import confusion_matrix 209 | confusion_matrix(y_BE_test,predictions) 210 | 211 | #Accuracy = (170+87)/(170+87+25+41) = 80% 212 | 213 | #Calculating the coefficients: 214 | print(logmodel.coef_) 215 | 216 | #Calculating the intercept: 217 | print(logmodel.intercept_) 218 | 219 | #So, ur final Predicitve Modelling Equation becomes: 220 | #Survived = 221 | #exp(3.74 -0.03*age -0.27*sibsp -2.52*sex(male) -1.03*pclass(2) -2.1*pclass(3) -0.33*embd(S)) 222 | # \ 223 | #exp(3.74 -0.03*age -0.27*sibsp -2.52*sex(male) -1.03*pclass(2) -2.1*pclass(3) -0.33*embd(S)) + 1 224 | -------------------------------------------------------------------------------- /31.Reading Files into Python.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"2. Reading Files into Python.ipynb","provenance":[],"authorship_tag":"ABX9TyM/mIl9ka9uA5pMkM+Ll0AK"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"1FzYccB24irQ","colab_type":"text"},"source":["## Reading Files into Python"]},{"cell_type":"code","metadata":{"id":"zDWO5w4jIiWL","colab_type":"code","colab":{}},"source":["# importing libraries\n","import pandas as pd"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"LFxGyLx3L5IS","colab_type":"code","colab":{}},"source":["#importing data\n","data = pd.read_csv('churn_prediction.csv')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"qPcIwT47NQX6","colab_type":"code","outputId":"dd64ac4c-6cd5-4f12-c536-2336c75475b9","executionInfo":{"status":"ok","timestamp":1580377658648,"user_tz":-330,"elapsed":1247,"user":{"displayName":"Sharoon Saxena","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64","userId":"01167841530696023488"}},"colab":{"base_uri":"https://localhost:8080/","height":224}},"source":["#first 5 instances using \"head()\" function\n","data.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
customer_idvintageagegenderdependentsoccupationcitycustomer_nw_categorybranch_codecurrent_balanceprevious_month_end_balanceaverage_monthly_balance_prevQaverage_monthly_balance_prevQ2current_month_creditprevious_month_creditcurrent_month_debitprevious_month_debitcurrent_month_balanceprevious_month_balancechurnlast_transaction
01313566Male0.0self_employed187.027551458.711458.711458.711449.070.200.200.200.201458.711458.7102019-05-21
1231035Male0.0self_employedNaN232145390.378704.667799.2612419.410.560.565486.27100.566496.788787.6102019-11-01
24235631Male0.0salaried146.02413913.165815.294910.172815.940.610.616046.73259.235006.285070.140NaT
3547890NaNNaNself_employed1020.025822291.912291.912084.541006.540.470.470.472143.332291.911669.7912019-08-06
46253142Male2.0self_employed1494.03388927.721401.721643.311871.120.33714.61588.621538.061157.151677.1612019-11-03
\n","
"],"text/plain":[" customer_id vintage age ... previous_month_balance churn last_transaction\n","0 1 3135 66 ... 1458.71 0 2019-05-21\n","1 2 310 35 ... 8787.61 0 2019-11-01\n","2 4 2356 31 ... 5070.14 0 NaT\n","3 5 478 90 ... 1669.79 1 2019-08-06\n","4 6 2531 42 ... 1677.16 1 2019-11-03\n","\n","[5 rows x 21 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"_G5CwMDl2B_8","colab_type":"code","outputId":"14298801-9bf5-4aeb-a582-cfb9b03192d5","executionInfo":{"status":"ok","timestamp":1580377658649,"user_tz":-330,"elapsed":1035,"user":{"displayName":"Sharoon Saxena","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64","userId":"01167841530696023488"}},"colab":{"base_uri":"https://localhost:8080/","height":224}},"source":["#last 5 instances using \"tail()\" function\n","data.tail()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
customer_idvintageagegenderdependentsoccupationcitycustomer_nw_categorybranch_codecurrent_balanceprevious_month_end_balanceaverage_monthly_balance_prevQaverage_monthly_balance_prevQ2current_month_creditprevious_month_creditcurrent_month_debitprevious_month_debitcurrent_month_balanceprevious_month_balancechurnlast_transaction
2837730297184510Female0.0student1020.0212071076.431076.432282.192787.700.300.300.300.301076.431076.4302019-10-22
2837830298491934Female0.0self_employed1046.022233844.104069.213668.833865.551.712.29901.001014.073738.543690.3202019-12-17
283793029929747Male0.0salaried1096.0258865511.9761017.5553444.8121925.814666.843883.06168.2371.8061078.5057564.2412019-12-31
2838030300258550Male3.0self_employed1219.032741625.551625.551683.201857.420.200.200.200.201625.551625.550NaT
2838130301234918Male0.0student1232.024742107.052821.343213.444447.450.117.44714.401094.092402.623260.5812019-11-02
\n","
"],"text/plain":[" customer_id vintage ... churn last_transaction\n","28377 30297 1845 ... 0 2019-10-22\n","28378 30298 4919 ... 0 2019-12-17\n","28379 30299 297 ... 1 2019-12-31\n","28380 30300 2585 ... 0 NaT\n","28381 30301 2349 ... 1 2019-11-02\n","\n","[5 rows x 21 columns]"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"code","metadata":{"id":"5Iy0lfDNNZ8U","colab_type":"code","outputId":"5e2a976c-90ad-4dc4-af21-e2d551a8516a","executionInfo":{"status":"ok","timestamp":1580377659085,"user_tz":-330,"elapsed":1033,"user":{"displayName":"Sharoon Saxena","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64","userId":"01167841530696023488"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["#finding out the shape of the data using \"shape\" variable: Output (rows, columns)\n","data.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(28382, 21)"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"code","metadata":{"id":"b75gSeumN50y","colab_type":"code","outputId":"0276bf06-6ea1-4415-edd5-264c0c8d96a4","executionInfo":{"status":"ok","timestamp":1580377659609,"user_tz":-330,"elapsed":1118,"user":{"displayName":"Sharoon Saxena","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64","userId":"01167841530696023488"}},"colab":{"base_uri":"https://localhost:8080/","height":153}},"source":["#Printing all the columns present in data\n","data.columns"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Index(['customer_id', 'vintage', 'age', 'gender', 'dependents', 'occupation',\n"," 'city', 'customer_nw_category', 'branch_code', 'current_balance',\n"," 'previous_month_end_balance', 'average_monthly_balance_prevQ',\n"," 'average_monthly_balance_prevQ2', 'current_month_credit',\n"," 'previous_month_credit', 'current_month_debit', 'previous_month_debit',\n"," 'current_month_balance', 'previous_month_balance', 'churn',\n"," 'last_transaction'],\n"," dtype='object')"]},"metadata":{"tags":[]},"execution_count":15}]},{"cell_type":"code","metadata":{"id":"kKIUtLmZX5tg","colab_type":"code","outputId":"71c127fd-08af-4241-a936-e43dbf7ee0eb","executionInfo":{"status":"ok","timestamp":1580377660322,"user_tz":-330,"elapsed":820,"user":{"displayName":"Sharoon Saxena","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64","userId":"01167841530696023488"}},"colab":{"base_uri":"https://localhost:8080/","height":391}},"source":["data.dtypes"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["customer_id int64\n","vintage int64\n","age int64\n","gender object\n","dependents float64\n","occupation object\n","city float64\n","customer_nw_category int64\n","branch_code int64\n","current_balance float64\n","previous_month_end_balance float64\n","average_monthly_balance_prevQ float64\n","average_monthly_balance_prevQ2 float64\n","current_month_credit float64\n","previous_month_credit float64\n","current_month_debit float64\n","previous_month_debit float64\n","current_month_balance float64\n","previous_month_balance float64\n","churn int64\n","last_transaction object\n","dtype: object"]},"metadata":{"tags":[]},"execution_count":16}]}]} 2 | -------------------------------------------------------------------------------- /32.Min_Max_Range_Updated.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "LIOaq8QL8CDm" 8 | }, 9 | "source": [ 10 | "## Min, Max and Range of Data" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": { 17 | "colab": {}, 18 | "colab_type": "code", 19 | "id": "Jn7K0qBlR_Kn" 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "#import libraries\n", 24 | "import pandas as pd\n", 25 | "import numpy as np" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "colab_type": "text", 32 | "id": "NxC7Fy8fOuRz" 33 | }, 34 | "source": [ 35 | "This is the dataset for __Customer Churn Problem__. \n" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "metadata": { 42 | "colab": {}, 43 | "colab_type": "code", 44 | "id": "jtGI5XRpR_Ku" 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "# importing dataset\n", 49 | "data = pd.read_csv('churn_prediction.csv')" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "data": { 59 | "text/html": [ 60 | "
\n", 61 | "\n", 74 | "\n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | "
customer_idvintageagegenderdependentsoccupationcitycustomer_nw_categorybranch_codecurrent_balance...average_monthly_balance_prevQaverage_monthly_balance_prevQ2current_month_creditprevious_month_creditcurrent_month_debitprevious_month_debitcurrent_month_balanceprevious_month_balancechurnlast_transaction
01210166Male0.0self_employed187.027551458.71...1458.711449.070.200.200.200.201458.711458.7102019-05-21
12234835Male0.0self_employedNaN232145390.37...7799.2612419.410.560.565486.27100.566496.788787.6102019-11-01
24219431Male0.0salaried146.02413913.16...4910.172815.940.610.616046.73259.235006.285070.140NaT
35232990NaNNaNself_employed1020.025822291.91...2084.541006.540.470.470.472143.332291.911669.7912019-08-06
46157942Male2.0self_employed1494.03388927.72...1643.311871.120.33714.61588.621538.061157.151677.1612019-11-03
\n", 224 | "

5 rows × 21 columns

\n", 225 | "
" 226 | ], 227 | "text/plain": [ 228 | " customer_id vintage age gender dependents occupation city \\\n", 229 | "0 1 2101 66 Male 0.0 self_employed 187.0 \n", 230 | "1 2 2348 35 Male 0.0 self_employed NaN \n", 231 | "2 4 2194 31 Male 0.0 salaried 146.0 \n", 232 | "3 5 2329 90 NaN NaN self_employed 1020.0 \n", 233 | "4 6 1579 42 Male 2.0 self_employed 1494.0 \n", 234 | "\n", 235 | " customer_nw_category branch_code current_balance ... \\\n", 236 | "0 2 755 1458.71 ... \n", 237 | "1 2 3214 5390.37 ... \n", 238 | "2 2 41 3913.16 ... \n", 239 | "3 2 582 2291.91 ... \n", 240 | "4 3 388 927.72 ... \n", 241 | "\n", 242 | " average_monthly_balance_prevQ average_monthly_balance_prevQ2 \\\n", 243 | "0 1458.71 1449.07 \n", 244 | "1 7799.26 12419.41 \n", 245 | "2 4910.17 2815.94 \n", 246 | "3 2084.54 1006.54 \n", 247 | "4 1643.31 1871.12 \n", 248 | "\n", 249 | " current_month_credit previous_month_credit current_month_debit \\\n", 250 | "0 0.20 0.20 0.20 \n", 251 | "1 0.56 0.56 5486.27 \n", 252 | "2 0.61 0.61 6046.73 \n", 253 | "3 0.47 0.47 0.47 \n", 254 | "4 0.33 714.61 588.62 \n", 255 | "\n", 256 | " previous_month_debit current_month_balance previous_month_balance churn \\\n", 257 | "0 0.20 1458.71 1458.71 0 \n", 258 | "1 100.56 6496.78 8787.61 0 \n", 259 | "2 259.23 5006.28 5070.14 0 \n", 260 | "3 2143.33 2291.91 1669.79 1 \n", 261 | "4 1538.06 1157.15 1677.16 1 \n", 262 | "\n", 263 | " last_transaction \n", 264 | "0 2019-05-21 \n", 265 | "1 2019-11-01 \n", 266 | "2 NaT \n", 267 | "3 2019-08-06 \n", 268 | "4 2019-11-03 \n", 269 | "\n", 270 | "[5 rows x 21 columns]" 271 | ] 272 | }, 273 | "execution_count": 3, 274 | "metadata": {}, 275 | "output_type": "execute_result" 276 | } 277 | ], 278 | "source": [ 279 | "data.head()" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 4, 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "data": { 289 | "text/plain": [ 290 | "(28382, 21)" 291 | ] 292 | }, 293 | "execution_count": 4, 294 | "metadata": {}, 295 | "output_type": "execute_result" 296 | } 297 | ], 298 | "source": [ 299 | "data.shape" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": { 305 | "colab_type": "text", 306 | "id": "TkE9qtAvOuSD" 307 | }, 308 | "source": [ 309 | "Identification of __Datatypes__" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 0, 315 | "metadata": { 316 | "colab": {}, 317 | "colab_type": "code", 318 | "id": "D1eopfppOuSG", 319 | "outputId": "7b2b8784-bce5-4424-e2a1-7e649fc2ce62" 320 | }, 321 | "outputs": [ 322 | { 323 | "data": { 324 | "text/plain": [ 325 | "customer_id int64\n", 326 | "vintage int64\n", 327 | "age int64\n", 328 | "gender object\n", 329 | "dependents float64\n", 330 | "occupation object\n", 331 | "city float64\n", 332 | "customer_nw_category int64\n", 333 | "branch_code int64\n", 334 | "days_since_last_transaction float64\n", 335 | "current_balance float64\n", 336 | "previous_month_end_balance float64\n", 337 | "average_monthly_balance_prevQ float64\n", 338 | "average_monthly_balance_prevQ2 float64\n", 339 | "current_month_credit float64\n", 340 | "previous_month_credit float64\n", 341 | "current_month_debit float64\n", 342 | "previous_month_debit float64\n", 343 | "current_month_balance float64\n", 344 | "previous_month_balance float64\n", 345 | "churn int64\n", 346 | "dtype: object" 347 | ] 348 | }, 349 | "execution_count": 4, 350 | "metadata": { 351 | "tags": [] 352 | }, 353 | "output_type": "execute_result" 354 | } 355 | ], 356 | "source": [ 357 | "data.dtypes" 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": { 363 | "colab_type": "text", 364 | "id": "s3iJ7r43Ac0W" 365 | }, 366 | "source": [ 367 | "## Isolating numerical columns" 368 | ] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "metadata": { 373 | "colab_type": "text", 374 | "id": "-l0diipkOuSb" 375 | }, 376 | "source": [ 377 | "Storing indices of __Integer and Float__ in numercial_cols because we are dealing with __numerical variables__" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 5, 383 | "metadata": { 384 | "colab": { 385 | "base_uri": "https://localhost:8080/", 386 | "height": 153 387 | }, 388 | "colab_type": "code", 389 | "executionInfo": { 390 | "elapsed": 1441, 391 | "status": "ok", 392 | "timestamp": 1581504105970, 393 | "user": { 394 | "displayName": "Sharoon Saxena", 395 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64", 396 | "userId": "01167841530696023488" 397 | }, 398 | "user_tz": -330 399 | }, 400 | "id": "0PrPd53w-rp6", 401 | "outputId": "6d46988f-e384-43d7-9622-353cd5cfc487" 402 | }, 403 | "outputs": [ 404 | { 405 | "data": { 406 | "text/plain": [ 407 | "Index(['customer_id', 'vintage', 'age', 'dependents', 'city',\n", 408 | " 'customer_nw_category', 'branch_code', 'current_balance',\n", 409 | " 'previous_month_end_balance', 'average_monthly_balance_prevQ',\n", 410 | " 'average_monthly_balance_prevQ2', 'current_month_credit',\n", 411 | " 'previous_month_credit', 'current_month_debit', 'previous_month_debit',\n", 412 | " 'current_month_balance', 'previous_month_balance', 'churn'],\n", 413 | " dtype='object')" 414 | ] 415 | }, 416 | "execution_count": 5, 417 | "metadata": {}, 418 | "output_type": "execute_result" 419 | } 420 | ], 421 | "source": [ 422 | "# storing indices of all numerical data types in numerical_cols\n", 423 | "numerical_cols = data.select_dtypes(include=['int64', 'float64']).columns\n", 424 | "\n", 425 | "# checking\n", 426 | "numerical_cols" 427 | ] 428 | }, 429 | { 430 | "cell_type": "markdown", 431 | "metadata": { 432 | "colab_type": "text", 433 | "id": "Vnm7rEvEAbel" 434 | }, 435 | "source": [ 436 | "### Min obseravtion" 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": 0, 442 | "metadata": { 443 | "colab": {}, 444 | "colab_type": "code", 445 | "id": "6oGzqFxqR_K9", 446 | "outputId": "f19a1072-9be5-403c-9337-6dc22cce9394" 447 | }, 448 | "outputs": [ 449 | { 450 | "data": { 451 | "text/html": [ 452 | "
\n", 453 | "\n", 466 | "\n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | "
customer_idvintageagegenderdependentsoccupationcitycustomer_nw_categorybranch_codedays_since_last_transaction...previous_month_end_balanceaverage_monthly_balance_prevQaverage_monthly_balance_prevQ2current_month_creditprevious_month_creditcurrent_month_debitprevious_month_debitcurrent_month_balanceprevious_month_balancechurn
1260813467308280Male0.0retired1096.01275.0...423.061694.57868.269471.012680.0415229.447859.371050.172002.971
\n", 520 | "

1 rows × 21 columns

\n", 521 | "
" 522 | ], 523 | "text/plain": [ 524 | " customer_id vintage age gender dependents occupation city \\\n", 525 | "12608 13467 3082 80 Male 0.0 retired 1096.0 \n", 526 | "\n", 527 | " customer_nw_category branch_code days_since_last_transaction ... \\\n", 528 | "12608 1 27 5.0 ... \n", 529 | "\n", 530 | " previous_month_end_balance average_monthly_balance_prevQ \\\n", 531 | "12608 423.06 1694.57 \n", 532 | "\n", 533 | " average_monthly_balance_prevQ2 current_month_credit \\\n", 534 | "12608 868.26 9471.01 \n", 535 | "\n", 536 | " previous_month_credit current_month_debit previous_month_debit \\\n", 537 | "12608 2680.04 15229.44 7859.37 \n", 538 | "\n", 539 | " current_month_balance previous_month_balance churn \n", 540 | "12608 1050.17 2002.97 1 \n", 541 | "\n", 542 | "[1 rows x 21 columns]" 543 | ] 544 | }, 545 | "execution_count": 6, 546 | "metadata": { 547 | "tags": [] 548 | }, 549 | "output_type": "execute_result" 550 | } 551 | ], 552 | "source": [ 553 | "# observation with minimum current balance\n", 554 | "data[data['current_balance'] == data['current_balance'].min()]" 555 | ] 556 | }, 557 | { 558 | "cell_type": "markdown", 559 | "metadata": { 560 | "colab_type": "text", 561 | "id": "yDJjIU9POuS9" 562 | }, 563 | "source": [ 564 | "* Customer's id is 13467\n", 565 | "* Customer has __minimum current balance__ is -5503.96 \n" 566 | ] 567 | }, 568 | { 569 | "cell_type": "markdown", 570 | "metadata": { 571 | "colab_type": "text", 572 | "id": "SCHZvc0XOuTC" 573 | }, 574 | "source": [ 575 | "### Max observation" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": 0, 581 | "metadata": { 582 | "colab": {}, 583 | "colab_type": "code", 584 | "id": "YqXn9lcyR_LA", 585 | "outputId": "c48e8ca4-f9d6-471c-dd0d-9f8c14c8af28" 586 | }, 587 | "outputs": [ 588 | { 589 | "data": { 590 | "text/html": [ 591 | "
\n", 592 | "\n", 605 | "\n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | "
customer_idvintageagegenderdependentsoccupationcitycustomer_nw_categorybranch_codedays_since_last_transaction...previous_month_end_balanceaverage_monthly_balance_prevQaverage_monthly_balance_prevQ2current_month_creditprevious_month_creditcurrent_month_debitprevious_month_debitcurrent_month_balanceprevious_month_balancechurn
2409525712119290Male0.0retired1020.02518.0...24270.5411728.39111617.4112269845.390.217637857.360.218399.6224270.541
\n", 659 | "

1 rows × 21 columns

\n", 660 | "
" 661 | ], 662 | "text/plain": [ 663 | " customer_id vintage age gender dependents occupation city \\\n", 664 | "24095 25712 1192 90 Male 0.0 retired 1020.0 \n", 665 | "\n", 666 | " customer_nw_category branch_code days_since_last_transaction ... \\\n", 667 | "24095 2 5 18.0 ... \n", 668 | "\n", 669 | " previous_month_end_balance average_monthly_balance_prevQ \\\n", 670 | "24095 24270.54 11728.39 \n", 671 | "\n", 672 | " average_monthly_balance_prevQ2 current_month_credit \\\n", 673 | "24095 111617.41 12269845.39 \n", 674 | "\n", 675 | " previous_month_credit current_month_debit previous_month_debit \\\n", 676 | "24095 0.21 7637857.36 0.21 \n", 677 | "\n", 678 | " current_month_balance previous_month_balance churn \n", 679 | "24095 8399.62 24270.54 1 \n", 680 | "\n", 681 | "[1 rows x 21 columns]" 682 | ] 683 | }, 684 | "execution_count": 7, 685 | "metadata": { 686 | "tags": [] 687 | }, 688 | "output_type": "execute_result" 689 | } 690 | ], 691 | "source": [ 692 | "# obseravtion with maxximum current month debit\n", 693 | "data[data['current_month_debit'] == data['current_month_debit'].max()]" 694 | ] 695 | }, 696 | { 697 | "cell_type": "markdown", 698 | "metadata": { 699 | "colab_type": "text", 700 | "id": "qKH3ErymOuTU" 701 | }, 702 | "source": [ 703 | "* Customer's id is 25712\n", 704 | "* Customer has __maximum current month debit__ is 7637857.36 \n" 705 | ] 706 | }, 707 | { 708 | "cell_type": "markdown", 709 | "metadata": { 710 | "colab_type": "text", 711 | "id": "X0UJT_w3OuTW" 712 | }, 713 | "source": [ 714 | "### Range " 715 | ] 716 | }, 717 | { 718 | "cell_type": "markdown", 719 | "metadata": { 720 | "colab_type": "text", 721 | "id": "-RO9lwQeOuTa" 722 | }, 723 | "source": [ 724 | " __Range of Age__ in our datase indicating the difference of Age between the oldest and youngest customers" 725 | ] 726 | }, 727 | { 728 | "cell_type": "code", 729 | "execution_count": 0, 730 | "metadata": { 731 | "colab": { 732 | "base_uri": "https://localhost:8080/", 733 | "height": 34 734 | }, 735 | "colab_type": "code", 736 | "executionInfo": { 737 | "elapsed": 1044, 738 | "status": "ok", 739 | "timestamp": 1581505193463, 740 | "user": { 741 | "displayName": "Sharoon Saxena", 742 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64", 743 | "userId": "01167841530696023488" 744 | }, 745 | "user_tz": -330 746 | }, 747 | "id": "kkHzHmLrR_LF", 748 | "outputId": "223f34f6-1355-49b8-86a5-33a50d4b7d24" 749 | }, 750 | "outputs": [ 751 | { 752 | "name": "stdout", 753 | "output_type": "stream", 754 | "text": [ 755 | "1 90\n" 756 | ] 757 | } 758 | ], 759 | "source": [ 760 | "# Range of Age \n", 761 | "\n", 762 | "print(data['age'].min(), data['age'].max())" 763 | ] 764 | }, 765 | { 766 | "cell_type": "markdown", 767 | "metadata": { 768 | "colab_type": "text", 769 | "id": "QRiwOGUTOuTp" 770 | }, 771 | "source": [ 772 | "* Oldest Customer Age is 90\n", 773 | "* Youngest Customer Age is 1\n", 774 | "* Range is [1,90]" 775 | ] 776 | }, 777 | { 778 | "cell_type": "markdown", 779 | "metadata": { 780 | "colab_type": "text", 781 | "id": "ObUYzPs5OuTt" 782 | }, 783 | "source": [ 784 | "### Max, Min, Range for each column" 785 | ] 786 | }, 787 | { 788 | "cell_type": "code", 789 | "execution_count": 0, 790 | "metadata": { 791 | "colab": { 792 | "base_uri": "https://localhost:8080/", 793 | "height": 408 794 | }, 795 | "colab_type": "code", 796 | "executionInfo": { 797 | "elapsed": 1055, 798 | "status": "ok", 799 | "timestamp": 1581505222868, 800 | "user": { 801 | "displayName": "Sharoon Saxena", 802 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64", 803 | "userId": "01167841530696023488" 804 | }, 805 | "user_tz": -330 806 | }, 807 | "id": "VusaDbjqR_LH", 808 | "outputId": "b045a934-0606-4298-d0c6-edb1df53eae1" 809 | }, 810 | "outputs": [ 811 | { 812 | "data": { 813 | "text/plain": [ 814 | "customer_id 30301.00\n", 815 | "vintage 12899.00\n", 816 | "age 90.00\n", 817 | "dependents 52.00\n", 818 | "city 1649.00\n", 819 | "customer_nw_category 3.00\n", 820 | "branch_code 4782.00\n", 821 | "days_since_last_transaction 365.00\n", 822 | "current_balance 5905904.03\n", 823 | "previous_month_end_balance 5740438.63\n", 824 | "average_monthly_balance_prevQ 5700289.57\n", 825 | "average_monthly_balance_prevQ2 5010170.10\n", 826 | "current_month_credit 12269845.39\n", 827 | "previous_month_credit 2361808.29\n", 828 | "current_month_debit 7637857.36\n", 829 | "previous_month_debit 1414168.06\n", 830 | "current_month_balance 5778184.77\n", 831 | "previous_month_balance 5720144.50\n", 832 | "churn 1.00\n", 833 | "dtype: float64" 834 | ] 835 | }, 836 | "execution_count": 9, 837 | "metadata": { 838 | "tags": [] 839 | }, 840 | "output_type": "execute_result" 841 | } 842 | ], 843 | "source": [ 844 | "# Printing Max of evey numerical column\n", 845 | "data[numerical_cols].max()" 846 | ] 847 | }, 848 | { 849 | "cell_type": "markdown", 850 | "metadata": { 851 | "colab_type": "text", 852 | "id": "MM_lBwUHYXa2" 853 | }, 854 | "source": [ 855 | "* Maximum value of vintage for a customer is 12899.\n", 856 | "* Maximum age of a customer in our dataset is 90\n", 857 | "* Maximum number of dependents in our dataset is 52\n", 858 | "* Maximum day since last transaction is 365\n", 859 | "* Maximum values for __current_balance, previous_month_end_balance,average_monthly_balance_prevQ, current_month_balance, previous_month_balance__ are close to 57 lakhs.\n", 860 | "* Maximum value for current_month_credit is 12269845.39\n", 861 | "* Maximum value for previous_month_credit is 2361808.29\n", 862 | "* maximum value for current_month_debit and previous_month debit is respectively 7637857.36 and 1414168.06.\n", 863 | "* The features like __customer_id, city, customer_nw_category, branch_code, churn__ are required to be treated as categorcial variable so their maximum value don't represent numerical significance.\n" 864 | ] 865 | }, 866 | { 867 | "cell_type": "code", 868 | "execution_count": 0, 869 | "metadata": { 870 | "colab": { 871 | "base_uri": "https://localhost:8080/", 872 | "height": 408 873 | }, 874 | "colab_type": "code", 875 | "executionInfo": { 876 | "elapsed": 1084, 877 | "status": "ok", 878 | "timestamp": 1581505242279, 879 | "user": { 880 | "displayName": "Sharoon Saxena", 881 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64", 882 | "userId": "01167841530696023488" 883 | }, 884 | "user_tz": -330 885 | }, 886 | "id": "8OSHRZvfR_LK", 887 | "outputId": "a6e584fc-e627-4ca4-bdf6-e02d6348760b" 888 | }, 889 | "outputs": [ 890 | { 891 | "data": { 892 | "text/plain": [ 893 | "customer_id 1.00\n", 894 | "vintage 180.00\n", 895 | "age 1.00\n", 896 | "dependents 0.00\n", 897 | "city 0.00\n", 898 | "customer_nw_category 1.00\n", 899 | "branch_code 1.00\n", 900 | "days_since_last_transaction 0.00\n", 901 | "current_balance -5503.96\n", 902 | "previous_month_end_balance -3149.57\n", 903 | "average_monthly_balance_prevQ 1428.69\n", 904 | "average_monthly_balance_prevQ2 -16506.10\n", 905 | "current_month_credit 0.01\n", 906 | "previous_month_credit 0.01\n", 907 | "current_month_debit 0.01\n", 908 | "previous_month_debit 0.01\n", 909 | "current_month_balance -3374.18\n", 910 | "previous_month_balance -5171.92\n", 911 | "churn 0.00\n", 912 | "dtype: float64" 913 | ] 914 | }, 915 | "execution_count": 10, 916 | "metadata": { 917 | "tags": [] 918 | }, 919 | "output_type": "execute_result" 920 | } 921 | ], 922 | "source": [ 923 | "# printing min of every numercial column\n", 924 | "data[numerical_cols].min()" 925 | ] 926 | }, 927 | { 928 | "cell_type": "code", 929 | "execution_count": 0, 930 | "metadata": { 931 | "colab": {}, 932 | "colab_type": "code", 933 | "id": "ZMnoIdSmYXa5", 934 | "outputId": "08e05724-a51a-4a81-9815-c2f869813b45" 935 | }, 936 | "outputs": [ 937 | { 938 | "name": "stdout", 939 | "output_type": "stream", 940 | "text": [ 941 | "range of customer_id: [1, 30301]\n", 942 | "range of vintage: [180, 12899]\n", 943 | "range of age: [1, 90]\n", 944 | "range of dependents: [0.0, 52.0]\n", 945 | "range of city: [0.0, 1649.0]\n", 946 | "range of customer_nw_category: [1, 3]\n", 947 | "range of branch_code: [1, 4782]\n", 948 | "range of days_since_last_transaction: [0.0, 365.0]\n", 949 | "range of current_balance: [-5503.96, 5905904.03]\n", 950 | "range of previous_month_end_balance: [-3149.57, 5740438.63]\n", 951 | "range of average_monthly_balance_prevQ: [1428.69, 5700289.57]\n", 952 | "range of average_monthly_balance_prevQ2: [-16506.1, 5010170.1]\n", 953 | "range of current_month_credit: [0.01, 12269845.39]\n", 954 | "range of previous_month_credit: [0.01, 2361808.29]\n", 955 | "range of current_month_debit: [0.01, 7637857.36]\n", 956 | "range of previous_month_debit: [0.01, 1414168.06]\n", 957 | "range of current_month_balance: [-3374.18, 5778184.77]\n", 958 | "range of previous_month_balance: [-5171.92, 5720144.5]\n", 959 | "range of churn: [0, 1]\n" 960 | ] 961 | } 962 | ], 963 | "source": [ 964 | "for col in numerical_cols:\n", 965 | " print(\"range of {}{}{}{}{}{}{}{}\".format(col,\":\",\" \",\"[\",data[col].min(), \", \",data[col].max(),\"]\"))" 966 | ] 967 | }, 968 | { 969 | "cell_type": "markdown", 970 | "metadata": { 971 | "colab_type": "text", 972 | "id": "TzIxUenlYXa8" 973 | }, 974 | "source": [ 975 | "* Range of current_month_credit is highest among all features.\n", 976 | "* Range of days_since_last_transaction is 1 year." 977 | ] 978 | }, 979 | { 980 | "cell_type": "code", 981 | "execution_count": 0, 982 | "metadata": { 983 | "colab": {}, 984 | "colab_type": "code", 985 | "id": "rZ-87c8xYXa8" 986 | }, 987 | "outputs": [], 988 | "source": [] 989 | } 990 | ], 991 | "metadata": { 992 | "colab": { 993 | "collapsed_sections": [], 994 | "name": "Min_Max_Range_Updated.ipynb", 995 | "provenance": [] 996 | }, 997 | "kernelspec": { 998 | "display_name": "Python 3", 999 | "language": "python", 1000 | "name": "python3" 1001 | }, 1002 | "language_info": { 1003 | "codemirror_mode": { 1004 | "name": "ipython", 1005 | "version": 3 1006 | }, 1007 | "file_extension": ".py", 1008 | "mimetype": "text/x-python", 1009 | "name": "python", 1010 | "nbconvert_exporter": "python", 1011 | "pygments_lexer": "ipython3", 1012 | "version": "3.8.5" 1013 | } 1014 | }, 1015 | "nbformat": 4, 1016 | "nbformat_minor": 1 1017 | } 1018 | -------------------------------------------------------------------------------- /33.Mean_Variance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 14, 6 | "metadata": { 7 | "colab": {}, 8 | "colab_type": "code", 9 | "id": "UtrBkZutQ_nz" 10 | }, 11 | "outputs": [], 12 | "source": [ 13 | "# importing libraries\n", 14 | "import pandas as pd\n", 15 | "import numpy as np\n", 16 | "import matplotlib.pyplot as plt" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "### This is our dataset of Customer Churn Prediction." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 15, 29 | "metadata": { 30 | "colab": {}, 31 | "colab_type": "code", 32 | "id": "BSs0Y24MQ_n6" 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "# importing data\n", 37 | "data = pd.read_csv('churn_prediction.csv')" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 16, 43 | "metadata": { 44 | "colab": { 45 | "base_uri": "https://localhost:8080/", 46 | "height": 211 47 | }, 48 | "colab_type": "code", 49 | "executionInfo": { 50 | "elapsed": 1240, 51 | "status": "ok", 52 | "timestamp": 1581056281581, 53 | "user": { 54 | "displayName": "Sharoon Saxena", 55 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64", 56 | "userId": "01167841530696023488" 57 | }, 58 | "user_tz": -330 59 | }, 60 | "id": "36nlgrybQ_n9", 61 | "outputId": "3903402a-332e-44c7-9d45-7511ca730b3c" 62 | }, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/html": [ 67 | "
\n", 68 | "\n", 81 | "\n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | "
customer_idvintageagegenderdependentsoccupationcitycustomer_nw_categorybranch_codecurrent_balance...previous_month_creditcurrent_month_debitprevious_month_debitcurrent_month_balanceprevious_month_balancechurndoy_ls_tranwoy_ls_tranmoy_ls_trandow_ls_tran
01210166Male0.0self_employed187.027551458.71...0.200.200.201458.711458.710141.021.05.01.0
12234835Male0.0self_employedNaN232145390.37...0.565486.27100.566496.788787.610305.044.011.04.0
24219431Male0.0salaried146.02413913.16...0.616046.73259.235006.285070.140NaNNaNNaNNaN
35232990NaNNaNself_employed1020.025822291.91...0.470.472143.332291.911669.791218.032.08.01.0
46157942Male2.0self_employed1494.03388927.72...714.61588.621538.061157.151677.161307.044.011.06.0
\n", 231 | "

5 rows × 24 columns

\n", 232 | "
" 233 | ], 234 | "text/plain": [ 235 | " customer_id vintage age gender dependents occupation city \\\n", 236 | "0 1 2101 66 Male 0.0 self_employed 187.0 \n", 237 | "1 2 2348 35 Male 0.0 self_employed NaN \n", 238 | "2 4 2194 31 Male 0.0 salaried 146.0 \n", 239 | "3 5 2329 90 NaN NaN self_employed 1020.0 \n", 240 | "4 6 1579 42 Male 2.0 self_employed 1494.0 \n", 241 | "\n", 242 | " customer_nw_category branch_code current_balance ... \\\n", 243 | "0 2 755 1458.71 ... \n", 244 | "1 2 3214 5390.37 ... \n", 245 | "2 2 41 3913.16 ... \n", 246 | "3 2 582 2291.91 ... \n", 247 | "4 3 388 927.72 ... \n", 248 | "\n", 249 | " previous_month_credit current_month_debit previous_month_debit \\\n", 250 | "0 0.20 0.20 0.20 \n", 251 | "1 0.56 5486.27 100.56 \n", 252 | "2 0.61 6046.73 259.23 \n", 253 | "3 0.47 0.47 2143.33 \n", 254 | "4 714.61 588.62 1538.06 \n", 255 | "\n", 256 | " current_month_balance previous_month_balance churn doy_ls_tran \\\n", 257 | "0 1458.71 1458.71 0 141.0 \n", 258 | "1 6496.78 8787.61 0 305.0 \n", 259 | "2 5006.28 5070.14 0 NaN \n", 260 | "3 2291.91 1669.79 1 218.0 \n", 261 | "4 1157.15 1677.16 1 307.0 \n", 262 | "\n", 263 | " woy_ls_tran moy_ls_tran dow_ls_tran \n", 264 | "0 21.0 5.0 1.0 \n", 265 | "1 44.0 11.0 4.0 \n", 266 | "2 NaN NaN NaN \n", 267 | "3 32.0 8.0 1.0 \n", 268 | "4 44.0 11.0 6.0 \n", 269 | "\n", 270 | "[5 rows x 24 columns]" 271 | ] 272 | }, 273 | "execution_count": 16, 274 | "metadata": {}, 275 | "output_type": "execute_result" 276 | } 277 | ], 278 | "source": [ 279 | "# First look\n", 280 | "data.head()" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 17, 286 | "metadata": { 287 | "colab": { 288 | "base_uri": "https://localhost:8080/", 289 | "height": 33 290 | }, 291 | "colab_type": "code", 292 | "executionInfo": { 293 | "elapsed": 1196, 294 | "status": "ok", 295 | "timestamp": 1581056282914, 296 | "user": { 297 | "displayName": "Sharoon Saxena", 298 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64", 299 | "userId": "01167841530696023488" 300 | }, 301 | "user_tz": -330 302 | }, 303 | "id": "VRCymO1kQ_oA", 304 | "outputId": "1fbba1c9-53da-4e3d-c6fd-edb0a8eed4bf" 305 | }, 306 | "outputs": [ 307 | { 308 | "data": { 309 | "text/plain": [ 310 | "(28382, 24)" 311 | ] 312 | }, 313 | "execution_count": 17, 314 | "metadata": {}, 315 | "output_type": "execute_result" 316 | } 317 | ], 318 | "source": [ 319 | "#shape of the data\n", 320 | "data.shape" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 18, 326 | "metadata": { 327 | "colab": { 328 | "base_uri": "https://localhost:8080/", 329 | "height": 163 330 | }, 331 | "colab_type": "code", 332 | "executionInfo": { 333 | "elapsed": 1135, 334 | "status": "ok", 335 | "timestamp": 1581056285543, 336 | "user": { 337 | "displayName": "Sharoon Saxena", 338 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64", 339 | "userId": "01167841530696023488" 340 | }, 341 | "user_tz": -330 342 | }, 343 | "id": "0zQJF63XQ_oD", 344 | "outputId": "de4bdf95-994e-4964-d223-b01626604ea8" 345 | }, 346 | "outputs": [ 347 | { 348 | "data": { 349 | "text/plain": [ 350 | "Index(['customer_id', 'vintage', 'age', 'gender', 'dependents', 'occupation',\n", 351 | " 'city', 'customer_nw_category', 'branch_code', 'current_balance',\n", 352 | " 'previous_month_end_balance', 'average_monthly_balance_prevQ',\n", 353 | " 'average_monthly_balance_prevQ2', 'current_month_credit',\n", 354 | " 'previous_month_credit', 'current_month_debit', 'previous_month_debit',\n", 355 | " 'current_month_balance', 'previous_month_balance', 'churn',\n", 356 | " 'doy_ls_tran', 'woy_ls_tran', 'moy_ls_tran', 'dow_ls_tran'],\n", 357 | " dtype='object')" 358 | ] 359 | }, 360 | "execution_count": 18, 361 | "metadata": {}, 362 | "output_type": "execute_result" 363 | } 364 | ], 365 | "source": [ 366 | "#columns of data\n", 367 | "data.columns" 368 | ] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "metadata": {}, 373 | "source": [ 374 | "### Identificaiton of Data types " 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 19, 380 | "metadata": { 381 | "colab": { 382 | "base_uri": "https://localhost:8080/", 383 | "height": 440 384 | }, 385 | "colab_type": "code", 386 | "executionInfo": { 387 | "elapsed": 964, 388 | "status": "ok", 389 | "timestamp": 1581056286919, 390 | "user": { 391 | "displayName": "Sharoon Saxena", 392 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64", 393 | "userId": "01167841530696023488" 394 | }, 395 | "user_tz": -330 396 | }, 397 | "id": "IoHu82GiQ_oG", 398 | "outputId": "6fed1d1f-4fc8-42a2-ab9e-88dcc4f67557" 399 | }, 400 | "outputs": [ 401 | { 402 | "data": { 403 | "text/plain": [ 404 | "customer_id int64\n", 405 | "vintage int64\n", 406 | "age int64\n", 407 | "gender object\n", 408 | "dependents float64\n", 409 | "occupation object\n", 410 | "city float64\n", 411 | "customer_nw_category int64\n", 412 | "branch_code int64\n", 413 | "current_balance float64\n", 414 | "previous_month_end_balance float64\n", 415 | "average_monthly_balance_prevQ float64\n", 416 | "average_monthly_balance_prevQ2 float64\n", 417 | "current_month_credit float64\n", 418 | "previous_month_credit float64\n", 419 | "current_month_debit float64\n", 420 | "previous_month_debit float64\n", 421 | "current_month_balance float64\n", 422 | "previous_month_balance float64\n", 423 | "churn int64\n", 424 | "doy_ls_tran float64\n", 425 | "woy_ls_tran float64\n", 426 | "moy_ls_tran float64\n", 427 | "dow_ls_tran float64\n", 428 | "dtype: object" 429 | ] 430 | }, 431 | "execution_count": 19, 432 | "metadata": {}, 433 | "output_type": "execute_result" 434 | } 435 | ], 436 | "source": [ 437 | "#different data types\n", 438 | "data.dtypes" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "### Mean Value" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": 20, 451 | "metadata": { 452 | "colab": { 453 | "base_uri": "https://localhost:8080/", 454 | "height": 33 455 | }, 456 | "colab_type": "code", 457 | "executionInfo": { 458 | "elapsed": 1018, 459 | "status": "ok", 460 | "timestamp": 1581056294995, 461 | "user": { 462 | "displayName": "Sharoon Saxena", 463 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64", 464 | "userId": "01167841530696023488" 465 | }, 466 | "user_tz": -330 467 | }, 468 | "id": "5VbQGrVYQ_oJ", 469 | "outputId": "8ef087dd-dc40-4932-fb78-fa71905f2b85" 470 | }, 471 | "outputs": [ 472 | { 473 | "name": "stdout", 474 | "output_type": "stream", 475 | "text": [ 476 | "48.208336269466564\n", 477 | "47.461216730038025\n" 478 | ] 479 | } 480 | ], 481 | "source": [ 482 | "#mean of age\n", 483 | "print(data['age'].mean())\n", 484 | "\n", 485 | "#mean of age who are likely to churn\n", 486 | "print(data[data['churn'] == 1]['age'].mean())" 487 | ] 488 | }, 489 | { 490 | "cell_type": "markdown", 491 | "metadata": {}, 492 | "source": [ 493 | "The __mean value of Age of Customers__ and the __mean value of Age of Customers who are likely to churn__ is around 48" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": 21, 499 | "metadata": { 500 | "colab": { 501 | "base_uri": "https://localhost:8080/", 502 | "height": 33 503 | }, 504 | "colab_type": "code", 505 | "executionInfo": { 506 | "elapsed": 781, 507 | "status": "ok", 508 | "timestamp": 1581056297085, 509 | "user": { 510 | "displayName": "Sharoon Saxena", 511 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64", 512 | "userId": "01167841530696023488" 513 | }, 514 | "user_tz": -330 515 | }, 516 | "id": "WvLj2Fo9Q_oR", 517 | "outputId": "05b175ec-f135-471c-f91f-d348d2ecd3a7" 518 | }, 519 | "outputs": [ 520 | { 521 | "name": "stdout", 522 | "output_type": "stream", 523 | "text": [ 524 | "7380.55180360792\n", 525 | "5220.884321292776\n" 526 | ] 527 | } 528 | ], 529 | "source": [ 530 | "#current balance mean\n", 531 | "print(data['current_balance'].mean())\n", 532 | "\n", 533 | "#current balance of customers who are likely to churn\n", 534 | "print(data[data['churn']==1]['current_balance'].mean())" 535 | ] 536 | }, 537 | { 538 | "cell_type": "markdown", 539 | "metadata": {}, 540 | "source": [ 541 | "* The __mean value of current balance of Customers___ is __7380.55180360792__ \n", 542 | "* The __mean value of Current Balance of Customers__ who are likely to churn is __5220.884321292776__." 543 | ] 544 | }, 545 | { 546 | "cell_type": "markdown", 547 | "metadata": {}, 548 | "source": [ 549 | "### Median" 550 | ] 551 | }, 552 | { 553 | "cell_type": "code", 554 | "execution_count": 22, 555 | "metadata": { 556 | "colab": { 557 | "base_uri": "https://localhost:8080/", 558 | "height": 33 559 | }, 560 | "colab_type": "code", 561 | "executionInfo": { 562 | "elapsed": 659, 563 | "status": "ok", 564 | "timestamp": 1581056300650, 565 | "user": { 566 | "displayName": "Sharoon Saxena", 567 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64", 568 | "userId": "01167841530696023488" 569 | }, 570 | "user_tz": -330 571 | }, 572 | "id": "aHIkif6QQ_oW", 573 | "outputId": "cbf009f7-8780-425e-b2ec-3c703be9759b" 574 | }, 575 | "outputs": [ 576 | { 577 | "data": { 578 | "text/plain": [ 579 | "46.0" 580 | ] 581 | }, 582 | "execution_count": 22, 583 | "metadata": {}, 584 | "output_type": "execute_result" 585 | } 586 | ], 587 | "source": [ 588 | "#median of age\n", 589 | "data['age'].median()" 590 | ] 591 | }, 592 | { 593 | "cell_type": "markdown", 594 | "metadata": {}, 595 | "source": [ 596 | "* __Median__ of age is 46. \n", 597 | "* That means 46 is the __50th percentile__ of the age of customers." 598 | ] 599 | }, 600 | { 601 | "cell_type": "markdown", 602 | "metadata": {}, 603 | "source": [ 604 | "### Standard Deviation and Variance" 605 | ] 606 | }, 607 | { 608 | "cell_type": "code", 609 | "execution_count": 23, 610 | "metadata": { 611 | "colab": { 612 | "base_uri": "https://localhost:8080/", 613 | "height": 33 614 | }, 615 | "colab_type": "code", 616 | "executionInfo": { 617 | "elapsed": 1079, 618 | "status": "ok", 619 | "timestamp": 1581056392277, 620 | "user": { 621 | "displayName": "Sharoon Saxena", 622 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64", 623 | "userId": "01167841530696023488" 624 | }, 625 | "user_tz": -330 626 | }, 627 | "id": "_pRqupeJQ_ok", 628 | "outputId": "309e9107-c905-44ff-8e68-c0705e2907d4" 629 | }, 630 | "outputs": [ 631 | { 632 | "name": "stdout", 633 | "output_type": "stream", 634 | "text": [ 635 | "42598.711923233204\n", 636 | "1814650257.5186107\n" 637 | ] 638 | } 639 | ], 640 | "source": [ 641 | "print(data['current_balance'].std())\n", 642 | "print(data['current_balance'].var())" 643 | ] 644 | }, 645 | { 646 | "cell_type": "markdown", 647 | "metadata": {}, 648 | "source": [ 649 | "* __standard deviation__ for current balance of the customers is 42598.711923233204\n", 650 | "* __variance__ for current balance of the customers is 1814650257.5186107" 651 | ] 652 | }, 653 | { 654 | "cell_type": "markdown", 655 | "metadata": {}, 656 | "source": [ 657 | "### Describe Function" 658 | ] 659 | }, 660 | { 661 | "cell_type": "markdown", 662 | "metadata": { 663 | "colab": { 664 | "base_uri": "https://localhost:8080/", 665 | "height": 33 666 | }, 667 | "colab_type": "code", 668 | "executionInfo": { 669 | "elapsed": 659, 670 | "status": "ok", 671 | "timestamp": 1581056393920, 672 | "user": { 673 | "displayName": "Sharoon Saxena", 674 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64", 675 | "userId": "01167841530696023488" 676 | }, 677 | "user_tz": -330 678 | }, 679 | "id": "TZ6WONNhQ_oo", 680 | "outputId": "c73c8bda-6079-4f12-85c1-068f1632537a" 681 | }, 682 | "source": [ 683 | "describe() function is used to view some basic __descriptive statistical details__ like __percentile, mean, std etc.__ of a data frame." 684 | ] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": 24, 689 | "metadata": { 690 | "colab": { 691 | "base_uri": "https://localhost:8080/", 692 | "height": 297 693 | }, 694 | "colab_type": "code", 695 | "executionInfo": { 696 | "elapsed": 1279, 697 | "status": "ok", 698 | "timestamp": 1581056402110, 699 | "user": { 700 | "displayName": "Sharoon Saxena", 701 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64", 702 | "userId": "01167841530696023488" 703 | }, 704 | "user_tz": -330 705 | }, 706 | "id": "G2aklW5KQ_ow", 707 | "outputId": "3a64f669-809f-41b6-a077-06cf350af978" 708 | }, 709 | "outputs": [ 710 | { 711 | "data": { 712 | "text/html": [ 713 | "
\n", 714 | "\n", 727 | "\n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | "
customer_idvintageagedependentscitycustomer_nw_categorybranch_codecurrent_balanceprevious_month_end_balanceaverage_monthly_balance_prevQ...previous_month_creditcurrent_month_debitprevious_month_debitcurrent_month_balanceprevious_month_balancechurndoy_ls_tranwoy_ls_tranmoy_ls_trandow_ls_tran
count28382.00000028382.00000028382.00000025919.00000027579.00000028382.00000028382.0000002.838200e+042.838200e+042.838200e+04...2.838200e+042.838200e+042.838200e+042.838200e+042.838200e+0428382.00000025159.00000025159.00000025159.00000025159.000000
mean15143.5086672091.14410548.2083360.347236796.1095762.225530925.9750197.380552e+037.495771e+037.496780e+03...3.261694e+033.658745e+033.339761e+037.451133e+037.495177e+030.185329295.04570939.11630010.1422553.042728
std8746.454456272.67677517.8071630.997661432.8721020.660443937.7991294.259871e+044.252935e+044.172622e+04...2.968889e+045.198542e+042.430111e+044.203394e+044.243198e+040.38857186.28435615.8897972.7886711.712724
min1.00000073.0000001.0000000.0000000.0000001.0000001.000000-5.503960e+03-3.149570e+031.428690e+03...1.000000e-021.000000e-021.000000e-02-3.374180e+03-5.171920e+030.0000001.0000001.0000001.0000000.000000
25%7557.2500001958.00000036.0000000.000000409.0000002.000000176.0000001.784470e+031.906000e+032.180945e+03...3.300000e-014.100000e-014.100000e-011.996765e+032.074408e+030.000000270.00000033.0000009.0000001.000000
50%15150.5000002154.00000046.0000000.000000834.0000002.000000572.0000003.281255e+033.379915e+033.542865e+03...6.300000e-019.193000e+011.099600e+023.447995e+033.465235e+030.000000335.00000047.00000012.0000003.000000
75%22706.7500002292.00000060.0000000.0000001096.0000003.0000001440.0000006.635820e+036.656535e+036.666887e+03...7.492350e+021.360435e+031.357553e+036.667958e+036.654693e+030.000000354.00000050.00000012.0000005.000000
max30301.0000002476.00000090.00000052.0000001649.0000003.0000004782.0000005.905904e+065.740439e+065.700290e+06...2.361808e+067.637857e+061.414168e+065.778185e+065.720144e+061.000000365.00000052.00000012.0000006.000000
\n", 949 | "

8 rows × 22 columns

\n", 950 | "
" 951 | ], 952 | "text/plain": [ 953 | " customer_id vintage age dependents city \\\n", 954 | "count 28382.000000 28382.000000 28382.000000 25919.000000 27579.000000 \n", 955 | "mean 15143.508667 2091.144105 48.208336 0.347236 796.109576 \n", 956 | "std 8746.454456 272.676775 17.807163 0.997661 432.872102 \n", 957 | "min 1.000000 73.000000 1.000000 0.000000 0.000000 \n", 958 | "25% 7557.250000 1958.000000 36.000000 0.000000 409.000000 \n", 959 | "50% 15150.500000 2154.000000 46.000000 0.000000 834.000000 \n", 960 | "75% 22706.750000 2292.000000 60.000000 0.000000 1096.000000 \n", 961 | "max 30301.000000 2476.000000 90.000000 52.000000 1649.000000 \n", 962 | "\n", 963 | " customer_nw_category branch_code current_balance \\\n", 964 | "count 28382.000000 28382.000000 2.838200e+04 \n", 965 | "mean 2.225530 925.975019 7.380552e+03 \n", 966 | "std 0.660443 937.799129 4.259871e+04 \n", 967 | "min 1.000000 1.000000 -5.503960e+03 \n", 968 | "25% 2.000000 176.000000 1.784470e+03 \n", 969 | "50% 2.000000 572.000000 3.281255e+03 \n", 970 | "75% 3.000000 1440.000000 6.635820e+03 \n", 971 | "max 3.000000 4782.000000 5.905904e+06 \n", 972 | "\n", 973 | " previous_month_end_balance average_monthly_balance_prevQ ... \\\n", 974 | "count 2.838200e+04 2.838200e+04 ... \n", 975 | "mean 7.495771e+03 7.496780e+03 ... \n", 976 | "std 4.252935e+04 4.172622e+04 ... \n", 977 | "min -3.149570e+03 1.428690e+03 ... \n", 978 | "25% 1.906000e+03 2.180945e+03 ... \n", 979 | "50% 3.379915e+03 3.542865e+03 ... \n", 980 | "75% 6.656535e+03 6.666887e+03 ... \n", 981 | "max 5.740439e+06 5.700290e+06 ... \n", 982 | "\n", 983 | " previous_month_credit current_month_debit previous_month_debit \\\n", 984 | "count 2.838200e+04 2.838200e+04 2.838200e+04 \n", 985 | "mean 3.261694e+03 3.658745e+03 3.339761e+03 \n", 986 | "std 2.968889e+04 5.198542e+04 2.430111e+04 \n", 987 | "min 1.000000e-02 1.000000e-02 1.000000e-02 \n", 988 | "25% 3.300000e-01 4.100000e-01 4.100000e-01 \n", 989 | "50% 6.300000e-01 9.193000e+01 1.099600e+02 \n", 990 | "75% 7.492350e+02 1.360435e+03 1.357553e+03 \n", 991 | "max 2.361808e+06 7.637857e+06 1.414168e+06 \n", 992 | "\n", 993 | " current_month_balance previous_month_balance churn \\\n", 994 | "count 2.838200e+04 2.838200e+04 28382.000000 \n", 995 | "mean 7.451133e+03 7.495177e+03 0.185329 \n", 996 | "std 4.203394e+04 4.243198e+04 0.388571 \n", 997 | "min -3.374180e+03 -5.171920e+03 0.000000 \n", 998 | "25% 1.996765e+03 2.074408e+03 0.000000 \n", 999 | "50% 3.447995e+03 3.465235e+03 0.000000 \n", 1000 | "75% 6.667958e+03 6.654693e+03 0.000000 \n", 1001 | "max 5.778185e+06 5.720144e+06 1.000000 \n", 1002 | "\n", 1003 | " doy_ls_tran woy_ls_tran moy_ls_tran dow_ls_tran \n", 1004 | "count 25159.000000 25159.000000 25159.000000 25159.000000 \n", 1005 | "mean 295.045709 39.116300 10.142255 3.042728 \n", 1006 | "std 86.284356 15.889797 2.788671 1.712724 \n", 1007 | "min 1.000000 1.000000 1.000000 0.000000 \n", 1008 | "25% 270.000000 33.000000 9.000000 1.000000 \n", 1009 | "50% 335.000000 47.000000 12.000000 3.000000 \n", 1010 | "75% 354.000000 50.000000 12.000000 5.000000 \n", 1011 | "max 365.000000 52.000000 12.000000 6.000000 \n", 1012 | "\n", 1013 | "[8 rows x 22 columns]" 1014 | ] 1015 | }, 1016 | "execution_count": 24, 1017 | "metadata": {}, 1018 | "output_type": "execute_result" 1019 | } 1020 | ], 1021 | "source": [ 1022 | "data.describe(include=[int, float])" 1023 | ] 1024 | }, 1025 | { 1026 | "cell_type": "markdown", 1027 | "metadata": {}, 1028 | "source": [ 1029 | "* count represents total number of data points.\n", 1030 | "* mean represents average value\n", 1031 | "* std represents standard deviation\n", 1032 | "* min represents the minimum value of the column\n", 1033 | "* 25% represents 25th percentile that means 25% data fall below this value.\n", 1034 | "* 50% represents 50th percentile that means 50% data fall below this value. This is __Median__\n", 1035 | "* 75% represents the 75th percentile here\n", 1036 | "* max represents the maximum value of the column" 1037 | ] 1038 | }, 1039 | { 1040 | "cell_type": "code", 1041 | "execution_count": 25, 1042 | "metadata": { 1043 | "colab": {}, 1044 | "colab_type": "code", 1045 | "id": "RFier7inQ_o1" 1046 | }, 1047 | "outputs": [], 1048 | "source": [ 1049 | "churn_age = data[data['churn']==1]['age']" 1050 | ] 1051 | }, 1052 | { 1053 | "cell_type": "markdown", 1054 | "metadata": {}, 1055 | "source": [ 1056 | "A __Histogram__ visualises the distribution of data over a continuous interval. \n", 1057 | "Each bar in a histogram represents the tabulated __frequency__ at each __interval/bin__. \n", 1058 | "In simple words height represents the frequency for respective bin (interval)" 1059 | ] 1060 | }, 1061 | { 1062 | "cell_type": "code", 1063 | "execution_count": 26, 1064 | "metadata": { 1065 | "colab": { 1066 | "base_uri": "https://localhost:8080/", 1067 | "height": 573 1068 | }, 1069 | "colab_type": "code", 1070 | "executionInfo": { 1071 | "elapsed": 1734, 1072 | "status": "ok", 1073 | "timestamp": 1581058880316, 1074 | "user": { 1075 | "displayName": "Sharoon Saxena", 1076 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64", 1077 | "userId": "01167841530696023488" 1078 | }, 1079 | "user_tz": -330 1080 | }, 1081 | "id": "gi_dN-BNQ_o7", 1082 | "outputId": "97e6261e-6b33-4050-a41b-74a0ec8852c0" 1083 | }, 1084 | "outputs": [ 1085 | { 1086 | "data": { 1087 | "text/plain": [ 1088 | "" 1089 | ] 1090 | }, 1091 | "execution_count": 26, 1092 | "metadata": {}, 1093 | "output_type": "execute_result" 1094 | }, 1095 | { 1096 | "data": { 1097 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAA9kAAAIcCAYAAADi5m1ZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAVhwAAFYcBshnuugAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nOzde7xWVZ348c8XQThxp0QhVDQtnXC8oCajjpImdtXMUnMqYzCLRglLxelHgWZhhKg5eY0ca7SmHLPRwlsiqeAFxtRSNBVTERIQEEIUWL8/9j7w8PA85wL7nOecw+f9eu3XPmevtfZaez+bw/metddakVJCkiRJkiRtvU61boAkSZIkSR2FQbYkSZIkSQUxyJYkSZIkqSAG2ZIkSZIkFcQgW5IkSZKkghhkS5IkSZJUEINsSZIkSZIKYpAtSZIkSVJBDLIlSZIkSSqIQbYkSZIkSQUxyJYkSZIkqSAG2ZIkSZIkFcQgW5IkSZKkghhkS1IbExEp3wbXui2SWk5EzM//rR9Zdvy0/PiM2rRMkrQ1DLIlSZIkSSqIQbYkSVLbshyYB/y11g2RJDVf51o3QJIkSRullG4Bbql1OyRJW8aebEmSJEmSCmKQLUktLDLHR8StEbEgItZExKKImB0R4yNiQANl94mIX+T510TEvIj4VkRsXyHvkflkSfMbON/1eZ4JDZWNiFMi4r6IWFo6MVNETMi/vz4itouIsRHxeET8PSJej4jbImLolt2pzdq6f0RcEBEPRMTLEfFWRCyJiN9HxOcjIhoo2ysiLo6I5yLizYh4JSJ+HBGDGptUKr+uL0bEPRGxOK/3lYj4r4jYt4hrK6tvQ3vyZ+XfIuKxiFgVEQsj4qcRsXNJ/qMi4o78XqyKiD9ExBGN1HFARPxnPtHWmxGxLCJm5nVv9rtA3o4PR8QVEfF/EfFa/vy9EhG/iojDG6hrw8R9zXl+t0ZbvIcl5Y6MiDvz/G9ExCMRMbKp11Mh7b0RMS4i7o2IF0va8mBEnFXt3kbJv/2IqIuIifnn8Wb++f4iIvZsqF1boux5ODh/fhZGxLrIfw7V8nmLiKGR/WxeEtnPsccj+7nWKX+eUkScVqXsgIj4fkQ8GREr82fp8Yj4dkT03Np7J6kdSym5ubm5ubXQBtQBvwZSvi0GHgH+AryVHzutrEx93i8Bq4E3gEeBV0rSflWhriPztPkNtOf6PM+EamWBqfnXC4GH83qPzPNNyNN+BtyRf/0s8BjwZv7934GDCrh3j+bnWwY8ld+30ntwY5Vy7wL+VJLvz8Dc/H6/BkzMj8+oULYvMLOk7Ct52RX5928BJ1ept77Mac28ztPq2wPcWHJPHy95Rl4A3gmMBtbnn82c/NlIwBrg0CrnPycvk/LreAx4qaS9twDblZXpkaetB/4G/DEvt7Tk+JcbuQ/Nen638llpc/cwLzeqpNyy/BlekH8/lezfWyL/91Xpeiqc81d52hv5NT5ccp76e7B9A//2L8nbv57s38aTwNts/Pm0a8GfTX27vp7XU/88zAO+XcvnDTiu5PlYmX8+z+ff/zK/lxX/TQNH5Z9p/bPzdL6tzY89DQws8l66ubm1n63mDXBzc3PryFvJL7avAycCnUrS6oDPAYeVlan/xfAt4GKgW0naZ0t+aR9eVu5Itj7IXksWLJ8KRJ4WQNf86wklbXsBGFpyjncBD+Tp9xVw7z4LDKlw/CDgmbyezQJe4L/ztBeBfUuODwD+UPJL9YwKZX+bp/2htG6yN7++BqzLf5F/b4WyWxtkvwW8CgwrSds9v88JuJXsDxijSj6b7mz8Y8f9Fc59Usnz9/my5+8gsiAtAePLym1PFrQMLDu+HfAZYFXe3p0buA/Nen638llpi/dwb7LgKwGXsfHfUOT3di0bg9sjq1xPpWf0OODg+vaXHN8LmJWXG9fAv/23yP64sEdJ2m5kQWEC/rOoz6XseVgL/KDseair1fNG9vOgPkj+GdCjJO1YsmC92h9C92DjH96+U1Z2ABt/jtxT5L10c3NrP1vNG+Dm5ubWUTfgH0t+AfxgM8rVl7m7Svpv8vRLyo4fydYH2Qn49wbKTyjJd3iF9APY2PPUuwXv7dF5Pb8rO757yS/Vh1Uo17/kl+MZVc75ItCnSr2X53l+VCHt5Xz7dDOv5bSSe7pZWbKe1/r0/6iQ/v6S9D4lxzuzsYfzk1XqHprfr9ep0PvZQJu/k5/3vKKe3618HtrcPQR+nJebW6XcNSV1HlnlemY0dN0VzrlHXu6pCmnX52lvAu+pkH5Cnr60qM+l7Hm4YyvOUfjzxsY3Wp4GOlco9+WSc59Wlvaz/PhlVersmf8sSMDBRd5PNze39rE5u7gktZwT8v2slNLvt6D8f1Q5Pgv4ONkv1C3huibkeTyl9IcKx/+PrPeuK/Aeslett1hE7AqcTBa8vys/LyX7/cuKjCDrKZyXUrq//Hwppb9FxK/J3iAod1K+vymltKxKk24GziR7VbT83IMauJSmeD2l9MsKx+eUfH1NhXr/FBFvAt3I7nl9/g8AuwILUzZb9WZSSnMi4kVgMFmwOKs0PSIOBj5J1ivbh42rkvTP9+X3v1Qtnt+2dA8/ku9/WKWtlwKnV7+U6iKiP9m/i4OBHfN2l85R8L6IqEspra5Q/I6U0nMVjte3u29E9EspLd2StjXgx41laOXn7dh8f31KaW2Fcv9J9hl1LT0YEV3yNgJcWanClNIbEXEX2R9LjiJ7pV/SNsQgW5Jazj75/sEtLP9MleOL8n1LTKyzOKX0tybkq9i2lFKKiL8BO7OV7YuIs4DJZK+SVvPOsu/fl+8fa6DM/1E5yK6f1OyEiDisStlu+X7nKulbo1LgA9kY1Xp/aSDPLmRjW+vVX09dRGz2B4cS9fdwZ/JAKyI6A9OofJ8qla2kFs9vm7iHEdEb2Ck/9qcqZeaRvULdrN/FIuJE4Cdl7dwsG9CPbFxyucY+F8g+m6KD7Gr3oVbPW4M/K1JKqyNiHtkbSaX2BN6Rf31dVJ9/cdd83xI/KyS1cQbZktRyeuX7ar2ijVlV5fj6fF/1t7utUK3O5uTb6vZFxDCycayQ9VD9J9nY1zdSSusiYneygKr8/7H6wOONBk5fLa1vvt8z3xpS10j6lqh2T9OGL1JqLE/pPa+/nt7AoU2o/x0lX3+DLOB5EzifbMzyX4G/539IGUnWM9mlgfO1pee3te9haUC3qFLG/DleQtYT3SQRMZjsVeWuZHMPXE72uvPylNLafJbzdXn2ap9NxetPKa0vCRhb+2dLLZ63Lf1Z0bfk6+b+u5K0jTDIlqSWsyLf92ml+ioFCeW6t0ZDCvCFfP+rlNK/VUiv1qO1Mt831EtaLa2+7MiU0k8aaV97UH89M1NKRzSz7Gn5/hsppUqv4TbUo9iRbOk9LA3OdiQb57+JiNiO5t/Hk8kC7IeBU1JK68vS2+vnclq+b83nbSXZH0+a+7Oi/plIQJeU0roKeSRt41wnW5JazuP5/p9aqb76npz+DeR5b2s0pAC75fuZVdIPqXJ8Xr5vaD3r/aocfyLfl78e2l7VX8/7G1rHuYotvf8dzRbdw5TScrIlwgD+oUq299H8zo76z+X+CgE2tN/PpRbPW4M/KyKijso/L58hm3cigCEt0C5JHYBBtiS1nJvJejuGRcSRrVDfX/L6ukXEZhMERcShtJ8A8u/5fkB5QkR0I5t8rJL6pZj2iojN/rgREe8Cjq9S9r/z/ecjosmv8LZh95OtyfxO4F+bWbah+78X2URS24KtuYe/y/fVntUxW9Cehj6XIHvtuj2qxfM2Pd9/IR8TXu5zbJyDYYN8Mrnb8m/PaYF2SeoADLIlqYWklJ4kWzYH4OaI+GSUDnqM6BYR/9LAJFvNrW8Z2frOAJdFxIZXLPOg+waydXnbg/vy/eiIOKj+YD6r8q+oMplQSul5oH526f+KiH1Kyu5E9oePiuOpU0q3AXeSTRh1b6XPJSJ2j4hzI2JUhbT5+XZiUy6wpaWU3mJjEPDDiPha3ju3QUT0iIhPRUT5jPL19/+7ETGgJP++wP+ycdxvoSJiQkSkiJjfEudvrq28h1PI/r0dEBGXRMT2ef6IiH8lC9orzWrdkPrP5dMR8dGSNvQkWxXg4Gaer0kiYnD+uaQW+oNhLZ63q4DlZDOZ/zgiNgyliYhjyNb0rvbz8ptkQwJOjYhr8p8tG0RE54g4IiKmRcS7W6Dtkto4g2xJallfJVuntR/wP8BrEfFwRDxLNmb7pxS7lNE5ZJMHHQ68HBGPRcQzZEtpPcvG3tq27lrgKbIxkw9FxLyImEu29uzRQKVx2vW+mpcdDPwxIv6Ul/0r2XrI38vzVfrF/STgbrJfvP8QEYvyz2tOPmv6c8DFQKXlunbNt4ZmfW5VKaUbga8B2wFTgaX5MzE7Iv5CFmT8iuyelhpPNvxgKPBCRPwxIp4mm4m5K3BBa11DrW3pPUwp/YmsFzsBY4FFEfEw2TN8HdnSXpVm/27Ib4AZZK+Z3xYRz0fEo2Svpp8GfGlLrrENaPXnLaX0KtncD2uBzwMLI+KRiHiO7I2YO9m4rNm6srLzgE8Ai8mWYXslIp6OiFkR8STZuO0ZwBdpeLI2SR2UQbYktaD81cLjgc+QvT66jmxMcE+ytXj/HxtfWyyivoeBw4DbyYLt9wFvAecCH6P5PWc1kc8AfTjZOrSvko3ZHADcQtZbd08DZReTjeGcDMwn+yPGTsCNZOttv5ZnXVGh7DKytbY/DdzKxs9r7zz/TcApwCVbd4WtJ6V0Gdlycv8BvEB2P4aS/TFgJnAe8KGyMk8Aw8juwWqy56gL2WzW+5N9Ji2hvhdzq9ZXL9qW3MO83NVkwffdZEH6P5C9fj4qpTR2C9qxnmz97Ul5OwaRLTt2L/DBlNJPm3vOJqr/XN4E/lz0yWv1vKWUbiX7WfG/ZL3WQ/L6zyH7g1t973alnxUzgL2AbwGPkP2MOQh4N/Ak2c+fQ6kw6Z2kji9SSo3nkiSpg4iI/wBGA1NTSmfXuj3aKCKeIgtcDk4pPVLr9igTEeeRBfY/Sil9tdbtaQ357O9LyZZi3Del9HgjRSRpA3uyJUnbjIjoTdZLDdVnMlYNRMQOZAH2PQbYbc7hZG/BTK51Q1rRZ8gC7CW0QO+9pI7NIFuS1KFERF1EXBARg8qO7072OuoOZK/a3l6L9qmq+onmJtW0FdpEvnTZocAvUkrza9ycQkXEsRFxUkR0LTkWEfFJ4Ef5oStTSu1imI2ktsPXxSVJLSoi7m9mkRNTSgsbz1a1vh5kM/8CPE82Brsf2TjaAF4HPpxSemhL65DU/kXEv5FNQPcm2frXbwK7A+/Ks9xL9rNiTW1aKKm9MsiWJLWoiGjufzS7bU2PWT6W8hyyCczeS7bG8XqyCYjuAKaklF7a0vNL6hjyNbi/ChxJNrlbb7I/0D1BNsnhj1NK7WXZQ0ltiEG2JEmSJEkFcUy2JEmSJEkFMciWJEmSJKkgBtmSJEmSJBXEIFuSJEmSpIJ0rnUDVF1EPA30JVuCRpIkSZLUenYHXk8p7dWcQgbZbVvfHj169B8yZEj/WjdEkiRJkrYlTz75JCtXrmx2OYPstu35IUOG9J81a1at2yFJkiRJ25Rhw4Yxe/bsZr9V7JhsSZIkSZIKYpAtSZIkSVJBDLIlSZIkSSqIQbYkSZIkSQUxyJYkSZIkqSAG2ZIkSZIkFcQgW5IkSZKkgrhOtiRJkqR2af369SxZsoQ33niDtWvXsn79+lo3SW1Up06d6Ny5Mz179uSd73wnnTq1XH+zQbYkSZKkdmfNmjX89a9/Ze3atRuORUQNW6S2bN26daxbt441a9awbNkydtllF7p27doidRlkS5IkSWp3Fi9ezNq1a6mrq2PHHXeka9euLdo7qfZt/fr1rFmzhkWLFrF69WoWL17Mu9/97hapy6dQkiRJUruzcuVKAAYNGkRdXZ0BthrUqVMn6urqGDRoELDx+WmRulrszJIkSZLUQlJKRASdO/tyrpquc+fORAQppRarwyBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohT8UmS1IjB426vdRParfmTPlrrJkiS1KoMsiVJkiSpmtWr4dFHYcUK6NULDjwQ6upq3Sq1YQbZkiRJklTulVdg6lSYNg1ef33j8b59YeRIOPtsGDiwdu1Tm+WYbEmSJEkq9dhjMHQoTJmyaYAN2fdTpsABB2T5amjGjBlEBBMmTODBBx9k+PDh9OzZkx122IHRo0ezevVqAG6//XaGDRtG9+7d2XHHHTn33HNZu3btZue79dZbOeqoo+jbty/dunVjyJAh/OAHP2DdunWb5Fu+fDkXX3wxRxxxBAMHDmT77bdn4MCBfP7zn+e5557b7LwTJkwgIpgxYwY33ngj++23H3V1dQwYMIAxY8ZsaGdHYZAtSZIkSfVeeQWOPRYWLWo436JFWb4FC1qnXQ146KGHOOqoo+jduzdnnHEGu+yyC1deeSWnn346v/jFLzjxxBPZddddOeOMM+jTpw+TJ0/mu9/97ibnOP/88zn++OOZN28eJ5xwAqNHj6auro5zzjmHk08+eZO8Tz31FN/61reoq6vjk5/8JF/72tc48MADufHGGzn44IN58cUXK7bziiuu4Etf+hLvf//7+cpXvkLfvn25/PLLGTVqVIvdm1rwdXFJkiRJqjd1auMBdr1Fi7L8kye3bJsaMX36dH79619z3HHHAfD2229vCHrvuOMOZs6cyUEHHQTAxIkT2WOPPbjssss4//zz6dKlC3fddReTJk1ixIgR3HzzzXTv3h2AlBKjR4/mqquu4uabb+ZTn/oUAHvvvTevvvoq/fr126Qd9957L0cffTTf+c53uPbaazdr5913382cOXN43/veB8BFF13Efvvtx89//nMmT57MwA7y+r092ZIkSZIE2SRn06Y1r8y0aVm5Gho+fPiGABugS5cunHjiiaSU+PjHP74hwAbo2bMnH/vYx1i6dCkvv/wykPUwA1xzzTUbAmyAiGDSpElEBDfddNOG4717994swK5vx/vf/37uvvvuiu0cM2bMhgAboK6ujlNOOYX169czZ86cLbz6tseebEmSJEmCbBbx8jHYjVm6FObMgcMOa5k2NcF+++232bEBAwY0mrZgwQJ22203Zs+eTffu3ZlW5Q8MdXV1PP3005scmzFjBpdeeikPPfQQixcv3mSM9/bbb1/xPEOHDt3s2KBBgwBYtmxZxTLtkUG2JEmSJEG2TNeWWL682HY0U69evTY71rlz50bT3n77bQCWLl3K2rVrmThxYtU6Vq1ateHrX/7yl5x00kn06NGDESNGMHjwYN7xjncQEVx//fVVx2Q31JbyydXaM4NsSZIkSYJsHewt0bt3se1oZb169SIiWLx4cZPyT5gwgW7dujFnzhz23HPPTdJ+/vOft0QT2xXHZEuSJEkSwIEHZutgN0e/ftlyX+3YBz7wAZYsWcKzzz7bpPzPPfcce++992YB9quvvsrzzz/fEk1sVwyyJUmSJAmgrg5GjmxemZEjs3Lt2FlnnQXAyJEjWbJkyWbpCxcu5Kmnntrw/a677spf/vIXFpXMwv7mm2/yla98ZcMr6Nsyg2xJkiRJqnf22bDjjk3Lu9NOMHZsy7anFRx77LGMHz+e+++/nz322INTTjmFcePGcfrppzN8+HAGDRrErbfeuiH/mWeeyYoVK9h///0566yzGD16NPvssw9PPvkk++67bw2vpG0wyJYkSZKkegMHwvTpjQfaO+2U5esgaztfcMEF3HXXXRx++OHcc889XHLJJdx2222sWbOGCRMmcOqpp27I+9WvfpWrrrqKfv36ce2113LLLbdwxBFHMGvWLPr06VPDq2gbIqVU6zaoioiYdcghhxwya9asWjdFkrZpg8fdXusmtFvzJ3201k2Q1EHVLym11157tUwFCxbA1KnZOthLl2483q9f9or42LEdJsDe1jT12Rk2bBizZ8+enVIa1pzzO7u4JEmSJJUbOBAmT4YLLsjWwV6+PJtFfOjQdj8GWy3LIFuSJEmSqqmrg8MOq3Ur1I44JluSJEmSpIIYZEuSJEmSVBCDbEmSJEmSCmKQLUmSJElSQQyyJUmSJEkqiEG2JEmSJEkFMciWJEmSJKkgBtmSJEmSJBXEIFuSJEmSpIIYZEuSJEmSVBCDbEmSJEmSCmKQLUmSJEkqxIwZM4gIJkyYsMnxwYMHM3jw4Jq0qbUZZEuSJEmSVJDOtW6AJEmSJLVVq99ezaMLHmXFmhX06tqLAwceSF2Xulo3q9255557at2EVmOQLUmSJEllXlnxClNnT2Xa/03j9Tdf33C8b7e+jNx/JGcPO5uBPQfWsIXty3ve855aN6HV+Lq4JEmSJJV4bOFjDL1mKFNmTdkkwAZ4/c3XmTJrCgdcfQCPLXysRi3MlI5/fvDBBxk+fDg9e/Zkhx12YPTo0axevRqA22+/nWHDhtG9e3d23HFHzj33XNauXbvZ+W699VaOOuoo+vbtS7du3RgyZAg/+MEPWLdu3WZ5V69ezbhx49h555035L322murtrXSmOwFCxbw7W9/m0MOOYT+/fvTtWtXBg8ezOjRo/nb3/622TlOO+00IoIXXniByy+/nL322ouuXbuy6667MnHiRNavX9/MO9gyDLIlSZIkKffKilc49mfHsmjVogbzLVq1iGN/diwL3ljQSi2r7qGHHuKoo46id+/enHHGGeyyyy5ceeWVnH766fziF7/gxBNPZNddd+WMM86gT58+TJ48me9+97ubnOP888/n+OOPZ968eZxwwgmMHj2auro6zjnnHE4++eRN8q5fv55PfOITXHzxxfTt25cxY8ZwyCGHMHbsWKZMmdLkds+cOZMpU6aw4447csopp3DmmWfynve8hyuvvJJhw4axfPnyiuXOOeccLrzwQoYNG8aXv/xlACZMmMD48eObeedahq+LS5IkSVJu6uypjQbY9RatWsTUWVOZfMzkFm5Vw6ZPn86vf/1rjjvuOADefvttDjzwQG688UbuuOMOZs6cyUEHHQTAxIkT2WOPPbjssss4//zz6dKlC3fddReTJk1ixIgR3HzzzXTv3h2AlBKjR4/mqquu4uabb+ZTn/oUADfccAN33303xx57LLfddhvbbbcdAGPGjOHAAw9scrs/+MEPsnDhQnr06LHJ8RtuuIEvfOELXHHFFXzzm9/crNzcuXN5/PHHGTBgAADjx49nzz335Ic//CHf/va32X777Zt5B4tlT7YkSZIkkU1yNu3/pjWrzLTHprH67dUt1KKmGT58+IYAG6BLly6ceOKJpJT4+Mc/viHABujZsycf+9jHWLp0KS+//DIAV1xxBQDXXHPNhgAbICKYNGkSEcFNN9204fgNN9wAwEUXXbQhwAbYZ599+NznPtfkdvfv33+zABvgc5/7HL169eLuu++uWG78+PEbAmyAd73rXRx33HG88cYbzJs3r8n1txR7siVpGzF43O21boIkSW3aowse3WwMdmOWrl7KnFfncNguh7VQqxq33377bXasPghtKG3BggXstttuzJ49m+7duzNtWuU/MNTV1fH0009v+P6Pf/wj3bt354ADDtgs7+GHH86Pf/zjJrf9f/7nf7j66quZO3cur7/++ibjvxcsqPwq/tChQzc7NmjQIACWLVvW5LpbSrsKsiPiX4DDgaHAPsD2wBdTStdXyd8LmAB8CtgJeBX4JTAxpbSyQv5OwFeBLwF7ACuBu4FvppSer1LHCODfgQOABMwBvpNS2nbmqJckSZI6gBVrVmxRueVvVh473Fp69eq12bHOnTs3mvb2228DsHTpUtauXcvEiROr1rFq1aoNXy9fvpydd965Yr4dd9yxye2eMmUK3/jGN9hhhx045phjGDRoEHV12fJol156KWvWrKlYrqFrqjRJW2trV0E28B1gV2AxWcC8a7WMEdEduA/YD7gTuAnYH/gGcERE/HNK6c2yYlcDo4A/AZcDA4HPAMdExCEppWfL6vgX4KfAa8D1+eGTgLsi4jMppV9t+aVKkiRJak29um4evDVF7269C25J6+rVqxcRweLFi5uUv3fv3rz22msV0xYtatp49rVr13LhhRcyYMAAHnvsMfr3778hLaXE97///Sadpy1qb2OyRwGDU0o7AFc1kvdcsgD74pTSiJTSuJTSCOBi4CBgbGnmiBien38mcEBK6byU0ueA44F+wBVl+fsCPyQL+A9IKZ2ZUjqTrEd7CXBlRPTcusuVJEmS1FoOHHggfbv1bVaZfnX9GDpg89eX25MPfOADLFmyhGeffbbxzMC+++7LqlWrmDt37mZpf/jDH5p0jsWLF7N8+XKGDRu2SYAN8Oijj25Yfqw9aldBdkrp7pTSi43li4ggC5hXAheWJV+YHx9Vdvz0fD8+pfRWSZ2/A2aQ9WbvUpL/00Af4IcppZdL8r9MFpC/C/hkEy5LkiRJUhtQ16WOkfuPbFaZkfuNpK5LXQu1qHWcddZZAIwcOZIlS5Zslr5w4UKeeuqpDd/XT272zW9+c5PXs5944gl++tOfNqnO/v37U1dXx9y5c/n73/++4fjrr7/OmWeeuUXX0Va0qyC7GfYke9X7gZTSqtKE/PsHgN0jonQgwZFAfVq5O/L9EWX5IXsVvSn5JUmSJLVxZw87mx27N21c8U49dmLssLGNZ2zjjj32WMaPH8/999/PHnvswSmnnMK4ceM4/fTTGT58OIMGDeLWW2/dkP8LX/gCRx99NNOnT2f//ffnvPPO4/TTT2fYsGEcc8wxTaqzU6dOjB49mvnz57Pvvvty9tlnM2rUKIYMGUKnTp0YOHBgS11ui+vIQTZAtfcdni3Nl4/fHgC8kFKqNFJ+k/xNqKNS/qoiYlalDRjSlPKSJEmSijGw50Cm/8v0RgPtnXrsxPRTpzOwZ/sNBktdcMEF3HXXXRx++OHcc889XHLJJdx2222sWbOGCRMmcOqpp27I26lTJ2699VbOPfdcli5dymWXXcaDDz7I1KlT+frXv97kOr/3ve9x0UUXERH86Ec/4q677uKUU07hzons33wAACAASURBVDvvpEuXLi1xma0iUkq1bsMWiYhxwPeoMLt4RHwW+C/gopTS/6tQ9iKyGcFPSCndEhEDgVfIer43m3s/Ij5E1mN9eUppTH7sGbIguktKaW1Z/i7AW8DjKaV9m3Ats6okDTnkkEN6zJpVLVmSms4lvFQL8yd9tNZNkNRB1S8ptddee7XI+Re8sYCps6Yy7bFpLF29dMPxfnX9GLnfSMYOG9thAuxtTVOfnWHDhjF79uzZKaVhzTl/e5tdvEOq9qHlwfchrdwcSZIkaZs3sOdAJh8zmQuGX8CcV+ew/M3l9O7Wm6EDhrb7MdhqWR01yK5fqK7aXPq9yvI1N395mfLZASrllyRJktTO1HWp47BdNnvZVaqqo47JbmxM9CbjqfPJ0F4FdouI7RrL34Q6GhsTLkmSJEnqgDpykL0AODSf1GyD/PtDySY5e6kk6T6gPq3ciHw/syw/QKXp80aU5ZEkSZIkbQM6ZJCdstncrgN6AOPLksfnx68tO35Nvr8wIravPxgRHyZbruvOsjW6/5vsdfAzI2JQSf5BwL8Bi4FbtvpiJEmSJEntRrsakx0Ro4D6ARH75PtREXFk/vX9KaXr8q+/DxwHnBcR+wNzgQPIep4fAS4tPXdK6d6IuA4YBcyNiNvJlvU6CVgKnFmW//WI+Dfgp3n+X+RJJwHvBE5KKb2x9VctSZIkSWov2lWQTRZgf6Hs2KFs+or3dZCNs46II4AJwKeA4WTjrqcAE1NKqyuc/wzgCeBLwBhgJVlv9DdTSs+VZ04p/SwiFpMtB/ZFIAFzgO+klO7ewmuUJEmS1IiIYP369aSUiIhaN0ftREqJlBKdOrXcS93tKshOKZ0GnNaM/MuBsfnWlPzrgcvzral1TAemNzW/JEmSpK3XtWtXVq9ezapVq+jRo0etm6N2YtWqVUD2/LSUDjkmW5IkSVLH1rNnTwAWLlzIypUryaZlkipLKbFy5UoWLlwIQK9evRopseXaVU+2JEmSJAH07duXVatWsWrVKl56KVs0yNfGVU3pH2G6d+9Onz59Wqwug2xJkiRJ7U6nTp0YNGgQy5YtY8WKFaxZs8bebFXVqVMnunbtSq9evejTp49jsiVJkiSpXKdOnejXrx/9+vWrdVOkDRyTLUmSJElSQezJliRJLWbwuNtr3YR2af6kj9a6CZKkLWRPtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkE6dJAdmRMi4t6IeDUi/h4R8yLi6ojYvUL+XhFxSUS8GBFrImJ+REyOiB5Vzt8pIs6MiCciYnVEvBYRN1U6tyRJkiSp4+vQQTbwA+Bm4H3Ar4EfAi8ApwOPRcSQ+owR0R24DxgLPA1MBeYB3wB+HxHdKpz/auByIPL9dOAE4JGI2LOFrkmSJEmS1EZ1rnUDWkpE7AR8DXgR2DeltLwkbSxwCXA2MDI/fC6wH3BxSmlcSd5JwHlkwff3So4PB0YBM4EPpZTeyo/fCPwWuAIY0VLXJ0mSJElqezpyT/Zgsut7oDTAzt2W73eA7LVysoB5JXBhWd4L8+Ojyo6fnu/H1wfYACml3wEzgGMiYpetuwRJkiRJUnvSkYPsZ4G3gEMjoldZ2sfy/T35fk9gIFlAvqo0Y/79A8DuEbFzSdKRQH1auTvy/RFb3HpJkiRJUrvTYV8XTyktiYhxwBTg6Yi4FVgB7At8EPgR2SvdkAXZkAXmlTxL9ur3nsBL+fjtAcCTKaV1VfKXnrdBETGrStKQKsclSZIkSW1Qhw2yAVJKUyPiFeA64MslSfcDN6aU1ubf98735a+V11tRlq+5+SVJkiRJ24CO/Lo4EfEt4GfAd4GdgZ7A4UA3YEZEfKKGzdsgpTSs0gY8Weu2SZIkSZKarsMG2RFxNDARuCKlNCml9HJKaWVK6X7g48DbZK+Sw8Ye6Wo9z73K8jU3vyRJkiRpG9Bhg2zgw/n+3vKElNJCsrWw94iIHjQ+hnqTMdv5ZGivArtFxHaN5ZckSZIkbRs6cpC9fb7foUr6DsB6sh7tZ4EFZDORdy/NlH9/KPBCSumlkqT7gPq0cvXrY8/csqZLkiRJktqjjhxk1y+tdXZEbPJad0R8GRgEzEoprUkpJbLJ0XoA48vOMz4/fm3Z8Wvy/YURUR/QExEfJlve686U0otFXIgkSZIkqX3oyLOL/xL4CvDPwDMR8RtgGXAA2RJeq4GzS/J/HzgOOC8i9gfm5nmPAR4BLi09eUrp3oi4DhgFzI2I28mW9ToJWAqc2XKXJkmSJElqizpsT3a+fvUxwPnAK8Bnga8B7yObcXxoSunhkvyrgCPIgum9ga8De5FNjnZUSml1hWrOAMbkX48BPgLcAhycUnqmBS5LkiRJktSGdeSebFJKa4BJ+daU/MuBsfnWlPzrgcvzTZIkSZK0jeuwPdmSJEmSJLU2g2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgrSudYNkCRJ0qYGj7u91k1ot+ZP+mitmyBpG2dPtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEKCbIj4ryI6F/EuSRJkiRJaq+K6sn+HvBSRPwqIkYUdE5JkiRJktqVooLsUcBc4ATgtxExPyLGR8Sggs4vSZIkSVKbV0iQnVKallIaBgwBLge6AxOBFyLifyPiExHh+G9JkiRJUodWaOCbUvpzSmksMBA4BZgBfAS4hex18u9ExO5F1ilJkiRJUlvRIr3LKaW3U0q/SCl9CDgMeBUYAPw78ExE/DYiPtASdUuSJEmSVCstEmRH5iMRcQtZb/ZA4EXgO8B0YATwQER8viXqlyRJkiSpFjoXebKI2AX4V+CLwLuB9cDtwNXA9JRSyvP9A3Ab8C3ghiLbIEmSJElSrRQSZEfEiWQzjB9N1jv+CnABcF1K6ZXy/CmlP0fET8leH5ckSZIkqUMoqif7v8l6re8ArgJuTymtb6TM08D9BdUvSZIkSVLNFRVkXwRcm1L6a1MLpJRuAm4qqH5JkiRJkmqukCA7pTS+iPNIkiRJktSeFTK7eET8U0RcEhE7VUkfkKcfUkR9kiRJkiS1RUUt4fV14OMppYWVElNKrwIfA8YWVJ8kSZIkSW1OUUH2QTQ+idlMwJ5sSZIkSVKHVVSQ3Z9s2a6GLMzzSZIkSZLUIRUVZC8Ddmkkz67AyoLqkyRJkiSpzSkqyJ4NfDIidq6UGBG7AMcDDxZUnyRJkiRJbU5RQfYlwDuAByLi8xExADbMKv4F4AGgDphSUH2SJEmSJLU5Ra2TPTMiziYLon8CEBEJiDzLemBMSmlmEfVJkiRJktQWFRJkA6SULouIe4Evk8023ptsrPbDwFUppSeLqkuSJEmSpLaosCAbIKX0ODC6yHNKkiRJktReFDUmW5IkSZKkbV6hPdkRsRMwFOgDbFcpT0rphiLrlCRJkiSprSgkyI6IbsC1wMlU7x0PIAEG2ZIkSZKkDqmonuxJwKnAM8BNwMvA2oLOLUmSJElSu1BUkP0Z4M/A0JTSmoLOKUmSJElSu1LUxGd9gOkG2JIkSZKkbVlRQfY8YMeCzlW4iPhkRNwVEUsi4s2IeCEiboqIncvy9YqISyLixYhYExHzI2JyRPSoct5OEXFmRDwREasj4rX8vLu3zpVJkiRJktqSooLsycBxEbFHQecrRGSuBv4H2A34OXAp8Afgn4BdS/J2B+4DxgJPA1PJ/njwDeD3+eRu5a4GLieb1O1yYDpwAvBIROzZQpclSZIkSWqjihqT/TJwB/BwRFwKzAVWVMqYUppZUJ1NcRbwJeBHwFkppXWliRFRev3nAvsBF6eUxpXkmQScRxZ8f6/k+HBgFDAT+FBK6a38+I3Ab4ErgBEtcE2SJEmSpDaqqCB7BtnyXAFMyL+upuL62UWLiDrg28DzwJjyABsgpbQ2zxtkAfNK4MKybBcCX83Tv1dy/PR8P74+wM7P+buImAEcExG7pJT+WswVSZIkSZLauqKC7AtoOLCuhWOAvsBPgO0i4hPAe4FlwN0ppb+U5N0TGAjckVJaVXqSlNKqiHgAGBERO6eUXsqTjgRWAQ9UqPuOPP0I4KeNNTQiZlVJGtJYWUmSJElS21FIkJ1SmlDEeQo2NN+vAx4nC7DrrY+IqSmlb+Tf14+ffrbKuZ4le/V7T+ClfPz2AODJSj3kJedxXLYkSZIkbUOK6slui/rn+7PJxogfDDwF7A9cA3w9Ip5LKV0J9M7zLq9yrvrx5b3L9k3N36CU0rBKx/Me7kOacg5JkiRJUu0VNbs4ABGxf0R8PyJ+ExF3lxzfNSI+ExH9iqyvEfXX9hZwfErpkZTSypTSH4BPA+uBr7dieyRJkiRJHVxhPdkR8X2yoDXyQ6VjtAO4MU+/rKg6G1Hfy/xoSmlBaUJK6cmIeB7YIyL6lOSt1vPcq+yczc0vSZIkSdoGFNKTHRFfJFtP+jbgH9l0Fm5SSvOBh4FPFFFfE83L98uqpNcfr6PxMdSbjNnOJ0d7FdgtIirNlt7YGG9JkiRJUgdU1Ovio8nGO38qpfQk2Sva5Z6mdScCuzff712eEBFdgD3IZgd/jSwYXgAcmk9qVpq3O3Ao8ELJzOIA9wH1aeXq18duzTXBJUmSJEk1VlSQ/Q/AXfXrTlexiI2TkbW4lNJzwJ1kr4SPKkseB/QBbkkprU0pJeA6oAcwvizv+Pz4tWXHr8n3F0bE9vUHI+LDZMt33ZlSerGIa5EkSZIktQ9FjcleC2zfSJ6BwMqC6muq0cCDwLURcTxZb/r+wAeBF4FzSvJ+HzgOOC8i9iebkfwAsvW2HwEuLT1xSuneiLgOGAXMjYjbyZb1OglYCpzZgtclSZIkSWqDiurJfgL4YJXxyUTEO4CjgTkF1dckeW/2gcD1ZOtmn0X2yvp/AAenlBaW5F0FHEEWTO9NNknbXsAU4KiU0uoKVZwBjMm/HgN8BLglP/czLXBJkiRJkqQ2rKie7Glkr1tfFRH/VpoQEb3ytJ3YGJC2mnwc9RebmHc5MDbfmpJ/PXB5vkmSJEmStnGFBNkppWkRcTTwr2SvSy8DiIiHyXqFuwPXp5R+VUR9kiRJkiS1RUW9Lk5K6bNkr0+/ALybbG3sA4G/Al9JKY0sqi5JkiRJktqiol4XByCldC3ZJGN1QF9gRUqptSc7kyRJkiSpJgoNsuvlk4RVmihMkiRJkqQOq7DXxSVJkiRJ2tYV0pMdEeuB1ISsKaXUIr3nkiRJkiTVWlEB70wqB9m9ydal7g78kXzWcUmSJEmSOqKilvA6slpaRLwDmAQcC3yoiPokSZIkSWqLWnxMdkrp7ymls4DlwOSWrk+SJEmSpFppzYnP/gB8tBXrkyRJkiSpVbVmkL0D0KMV65MkSZIkqVW1+EzfEdEJOBU4CXi0peuTJEmSJKlWilrC6/kGzt8f6AK8DZxfRH2SJEmSJLVFRfVkd6LyEl5vA08CjwBXpJT+VFB9kiRJkiS1OUUt4TW4iPNIkiRJktSetebEZ5IkSZIkdWgG2ZIkSZIkFaSoic9+v4VFU0rpqCLaIEmSJElSrRU18dmR+T4BUSG9oeOSJEmSJHUIRb0uXgfcBjwDfA4YnB8bDHw+P/6/QF1KqVPJtl1B9UuSJEmSVHNFBdkTgX2Ag1JK/5VS+mtKaU2+/xnwAWDfPJ8kSZIkSR1SUUH2Z4GbU0orKyWmlFYANwOnFFSfJEmSJEltTlFB9g5Al0bydAb6F1SfJEmSJEltTlFB9nPApyPinZUSI2IH4DPAXwqqT5IkSZKkNqeoIPtSYCdgbkSMiYihEbFzvv8aMIesF3tqQfVJkiRJktTmFLKEV0rpuogYAIwHLilLDmAdMCGlNK2I+iRJkiRJaouKWieblNKFEXEjcCrwj0BvYDnwR+DGlNJzRdUlSZIkSVJbVFiQDZAH0hcUeU5JkiRJktqLosZkbyIi+kXEzi1xbkmSJEmS2qrCguyI6B0Rl0XEIuA14IWStA9ExG8jYmhR9UmSJEmS1NYUEmRHRD/gIeBM4CXgKbIJz+o9DhxKNl5bkiRJkqQOqaie7AnAe4GTU0oHAr8sTUwprQbuAz5YUH2SJEmSJLU5RQXZnwBuSyn9dwN55gODCqpPkiRJkqQ2p6ggewDw50byrAG6F1SfJEmSJEltTlFB9hKgsdnE9wJeLag+SZIkSZLanKLWyZ4JHBcRg1JKL5cnRsQ/AMcCPymoPkmSJGkzg8fdXusmtFvzJ3201k2QOoSierIvArYDHoiIU4F3AUTE3hHxr8DvyV4Xn1xQfZIkSZIktTmF9GSnlJ6IiJOAnwI35IcDeDLfvwF8JqX0bBH1SZIkSZLUFhX1ujgppd9ExG7AF4APAP2AFWTrZ/8kpbS4qLokSZIkSWqLCguyAVJKS4GpRZ5TkiRJkqT2opAx2RGxLiL+q4hzSZIkSZLUXhU18dkK4KWCziVJkiRJUrtUVJD9MLBvQeeSJEmSJKldKirIngB8MCI+X9D5JEmSJElqd4qa+OxDwAzgJxFxJvAIsAhIZflSSunCguqUJEmSJKlNKSrInlDy9dB8qyQBBtmSJEmSpA5pi4LsiPgE8HRK6Zn80PDimiRJkiRJUvu0pT3ZtwATgQvy738CXJpSuryQVkmSJEmS1A5t6cRnbwNdSr4fDPTZ6tZIkiRJktSObWmQ/VfgsIjYruRY+SRnkiRJkiRtU7b0dfEbgW8BSyNiSX5sbER8sZFyKaX0ni2sU5IkSZKkNm1Lg+zvAG8CHwUGkvViR741pLF0SZIkSZLarS0KslNKa4FJ+UZErAemppQuaLCgJEmSJEkd2JaOyS43EZhR0LlaVEScFxEp3w6pkN4rIi6JiBcjYk1EzI+IyRHRo8r5OkXEmRHxRESsjojXIuKmiNi95a9GkiRJktSWFBJkp5QmppRmFnGulhQRQ8j+ILCqSnp34D5gLPA0MBWYB3wD+H1EdKtQ7GrgcrJX4S8HpgMnAI9ExJ5FX4MkSZIkqe0qqie7zYuILsB/Ao+RrfNdybnAfsDFKaURKaVxKaURwMXAQWTBd+k5hwOjgJnAASml81JKnwOOB/oBV7TIxUiSJEmS2qRtJsgGvgm8HxgJrCtPjIggC5hXAheWJV+YHx9Vdvz0fD8+pfRW/cGU0u/IXp8/JiJ2KaLxkiRJkqS2b5sIsiPiALIge2JK6c9Vsu1JNlP6AymlTV4nz79/ANg9InYuSTqS7NXzByqc7458f8RWNF2SJEmS1I5s6RJe7UZEdAVuIHtN/PsNZK0fP/1slfRngRF5vpfy8dsDgCdTSpv1jJecp9Fx2RExq0rSkMbKSpIkSZLajg4fZAMXkAW6Q6sEw/V65/vlVdJXlOVrbn5JkiRJUgfXoYPsiBhGNjP4hJTSk7VuTzUppWGVjuc93JstMyZJkiRJaps6bJAdEZ3JZhN/HJjUhCL1PdLVep57leVrbn5JBRg87vZaN0GSJEmqqsMG2UAPNo6HfiubPHwzs/LjnwTqJ0SrNoZ6kzHbKaVVEfEqsFtEbFfhVfTGxnhLkiRJkjqYjhxkrwF+XCXtn8mC4N8ArwHzyYLhBcChEdG9dIbxfJKzQ4EXUkovlZznPuDkPG1mWR0j8n35cUmSJElSB9Vhg+yU0mo2X9cagIi4nizI/l5KaXbJ8euAbwHjgXElRcaT9Yx/t+xU15AF2RdGxIfq18qOiA+TLe91Z0rpxSKuR5IkSZLU9nXYIHsLfR84DjgvIvYH5gIHAMcAjwCXlmZOKd2bB+ajgLkRcTvZsl4nAUuBM1ux7ZIkSZKkGutU6wa0Jfkr4keQBdN7A18H9gKmAEflvePlzgDG5F+PAT4C3AIcnFJ6psUbLUmSJElqM7bJnuyU0mnAaVXSlgNj860p51oPXJ5vkiRJkqRtmD3ZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiT9//buPdiyqr4T+PcnIHRoaSGjNG0URZmYDBnQ9kEH5GEZ1BKTKI4PNMJQqFRFR8EewagRNaOog46WGaMYnxmRGCNxBIHxHQgM2AwJZBQZFVABMSrPNO81f+x99XC4l+57e/c9p29/PlW79u291l5nna5V99zvWXvtDTAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAxEyAYAAICBCNkAAAAwECEbAAAABiJkAwAAwECEbAAAABiIkA0AAAADEbIBAABgIEI2AAAADETIBgAAgIEI2QAAADAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAxEyAYAAICBCNkAAAAwECEbAAAABiJkAwAAwEC2nXQHAACAyXvkCWdMugtbrCtPetaku8AUMZMNAAAAAxGyAQAAYCAuFwcAANgELrVfuKV4qb2ZbAAAABiIkA0AAAADWbIhu6oeVlWvqapzqurqqrqjqq6rqs9V1ZPnOGenqnpPVV1VVbdX1ZVV9e6qWj5H/QdU1auq6tKqWl9VP62qU6tqj8377gAAAJhGSzZkJ3lVkvcm2SPJOUlOTnJukj9I8g9V9YLRylW1Y5JvJDk2yXf6cy9PsjbJV6tqh1le40NJ3p+k+v1ZSZ6b5KKq2nMzvCcAAACm2FK+8dmFSQ5qrX1j9GBVPSXJV5J8sKpOb63d3he9Lsk+Sd7ZWjthpP5JSY5PF77fMXL84CRHJ/lmkt9rrd3RH/90kjOTfCDJ0zfTewMAAGAKLdmZ7Nba344H7P743yf5WpKdk/xOklRVpQvMtyR529gpb+uPHz12/GX9/k0zAbtv/0tJvp7kkKp6xKa/EwAAALYUSzZkb8Cd/f6ufr9nklVJzmut3Tpasf/3eUn2qKqHjxQdlGSmbNzZ/f7AoToMAADA9FvKl4vPqp9dflqSa5Nc2h+eWT99xRynXZHu0u89k/ywX7+9W5LLWmt3z1F/tN0N9en8OYr22pjzAQAAmA5b1Ux2VW2X5FNJtk9y/EhAXtHvb5zj1JvG6s23PgAAAFuBrWYmu6oekOTjSQ5Ickpr7VOT7dGvtNbWzHa8n+Hed5G7AwAAwAJtFTPZfcD+aJLDk/xVkmPGqszMSM8187zTWL351gcAAGArsORDdh+wP5bkiCSnJjmytXbPWLUNraG+15rt/mZo1yZ5VFVts6H6AAAAbB2WdMgeCdgvTXJakj+6nxuVXZNkv/6mZqNt7JhkvyQ/aK39cKToG0lmysbNPB/7m5v2DgAAANiSLNmQPXKJ+EuTfDbJS+YI2GmttSQfSbI8yZvGit/UHz9l7PiH+/3bquqBI6/7zHSP9zqntXbVJr4NAAAAtiBL+cZnf5ruEvFbknw3yRurarzO6a21S/qf35XkD5IcX1WPS3JxkscnOSTJRUn+2+iJrbWvVdVHkhyd5OKqOiPdY71ekOTnSV61Od4UAAAA02sph+xH9vvlSd4wR50rk1ySdOusq+rAJCcmOSzJwenWXZ+c5C2ttfWznP+KdM/afnmSV6cL9J9P8obW2veGeBMAAABsOZZsyG6tHZnkyHmec2OSY/ttY+rfk+T9/QYAAMBWbsmuyQYAAIDFJmQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAxEyAYAAICBCNkAAAAwECEbAAAABiJkAwAAwECEbAAAABiIkA0AAAADEbIBAABgIEI2AAAADETIBgAAgIEI2QAAADAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCDbTroDsDV65AlnTLoLAADAZmAmGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAzEI7xYMI+hAgAAuDcz2QAAADAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhewBV9cSqOrOqbqiqW6vqgqp6/qT7Gqs6HAAAERtJREFUBQAAwOLadtId2NJV1cFJzk5yW5LPJLk5yWFJTquqh7fWTp5k/wAAAFg8ZrI3QVVtm+SUJPckOaC19vLW2muT7J3ku0neXlW7T7KPAAAALB4he9M8Ncmjk3y6tXbJzMHW2o1J3p7kgUmOmFDfAAAAWGQuF980B/X7c2YpO7vfH7ihRqrq/DmK9lpAnwAAAJgQIXvT7NnvrxgvaK1dV1W3jNRZcq486VmT7gIAi+yM756RQ089dN7nffFFX8yz/u39fG6ccUZy6PzbzRe/mDzL5xEA00PI3jQr+v2Nc5TfNFJnTq21NbMd72e4911Y1wBgeDttv9OCzluxwwY+DndaWLtZscGPWQBYVNZkAwAb7QmrnpCdd9h5XufssmyXrN5t9QYafkKy8/zazS67JKs30C4ALDIhe9PMzGDP9TX6Tpl7lhsAtjjLtluWox531LzOOWqfo7Jsu2UbaHhZctT82s1RR3XnAcAUEbI3zcxa7Pusu66qlUmWZ5b12gCwJTtuzXHZdcddN6ruyuUrc+yaYzey4eOSXTeu3axcmRy7ke0CwCISsjfNN/r9IbOUPX2sDgAsCasetCpnveSsDQbtlctX5qwXn5VVD1q1kQ2vSs46a8NBe+XKrt6qjWwXABaRkL1pvpLk+0kOr6p9Zg5W1Yokf5LkjiSfnFDfAGCz2WflPrn4FRdn7Zq12WXZLvcq22XZLlm7Zm3WvXxd9l659zwb3ie5+OJk7dpuzfW9Gt6lO75uXbL3PNsFgEVSrbVJ92GLVlUHp3sm9m1JPpPk5iSHJdk9ydrW2smb0Pb5++67777nnz/XY7QBYPLW37k+665dlxtvuzErdliR1but3vAa7I1qeH0XqG+8sbuL+OrV1mADsGjWrFmTCy644IK5ngY1F4/w2kStta9V1f5J3pLkBUm2S3JpkuNba6dNtHMAsAiWbbcs+z9i/83Q8LJk/83QLgBsRkL2AFprFyZ55qT7AQAAwGRZkw0AAAADEbIBAABgIEI2AAAADETIBgAAgIEI2QAAADAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAOp1tqk+8Acquony5cvf+hee+016a4AAABsVS677LLccsst17fWdp3PeUL2FKuq7yTZOcn3F/mlZ1L9ZYv8ujAUY5ilwDhmS2cMs6UzhtkjyS9aa4+dz0lCNvdRVecnSWttzaT7AgthDLMUGMds6YxhtnTGMAtlTTYAAAAMRMgGAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBA3F0cAAAABmImGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAxEyAYAAICBCNn8UlU9sarOrKobqurWqrqgqp4/6X7BjKp6WFW9pqrOqaqrq+qOqrquqj5XVU+e45ydquo9VXVVVd1eVVdW1buravli9x/mUlXHV1Xrt31nKTeOmUpV9Zyq+l9V9bOquq2qflBVp1bVw8fqGcNMleo8t6q+VlXXVtW/VtXlVfWhqtpjlvrGMButWmuT7gNToKoOTnJ2ktuSfCbJzUkOS7J7krWttZMn2D1IklTVSUmOT/K9JF9P8tMkeyb5wySV5PDW2mkj9XdMcm6SfZKck+T/JHlckkOSXJTkgNbabYv4FuA+qmqvJN9KcleSHZOsaa1dMFJuHDN1qqqS/EWSl6f7nXx2ur8dViU5MMmLW2vn9nWNYaZOVZ2c5Lgk1yb5uyQ3Jdk73bi8JcnvttYu6+saw8xPa822lW9Jtk3y/9IF7H1Gjq9IcnmS25PsPul+2mxJnpvkwFmOPyXJHUl+nmT7keNvSdKSnDRW/6T++Osn/Z5sW/eWZLsk65JckORT/bjcd6yOcWybui3Jq/vx9+dJtpmlfNuRn41h21RtSVYmuTvJlUlWjJUd24/Lj44cM4Zt89rMZJOqOiTdN9Afa60dNVZ2RJKPJ3lza+2tE+gebJSqOjvdN8pPbK19q59l+VGSnZKsbK3dOlJ3xyTXJbm+tfboiXQYklTViUlOSPL4JK9LckRGZrKNY6ZRVS1L8uMkv0jym621u+6nrjHM1OmX5Zyf5NOttRePle2Z5LtJvthae7YxzEJYk02SHNTvz5ml7Ox+f+DidAUW7M5+P/PH3p7pLls8b/QDMUn6f5+XZI/xdYOwWKrq8UnekOQtrbX/O0c145hpdEiSnZOcnmSbfl3rCVV1TFU9ZqyuMcw0uiLdFXD7VdVOY2WH9vuv9HtjmHkTskm6Xx5J9wvnXlpr16Vbl7LneBlMi6p6RJKnpVtXdWl/eM5xPXbc2GbRVdX2ST6Z5JIk77qfqsYx02h1v787yT8l+VySdyT5YJLLq+q/jtQ1hpk6rbWfpbuK6BFJvlNVH6yqd1bVWUnemeS/J/lAX90YZt62nXQHmAor+v2Nc5TfNFIHpkpVbZduLev2SY5vrd3dF23MuB6tB4vpren+IFs9MmZnYxwzjR7a749LcnGSJyX5drobQX04yWur6nuttQ/GGGZKtdbeW1U/TvKRJMeMFJ2b7jLymSvjjGHmzUw2sMWqqgeku2fAAUlOaa19arI9gg2rqjVJ1ib5s9bfuRa2MDN/P96R5A9baxe11m5prf19kv+Q5J4kr51Y72AjVNWfJvmrJG9P8vAkD0p3I9Udkny9qn5/gt1jCydkk/zqm7m5voHbKXN/ewcT0QfsjyY5PN2H5DFjVTZmXI/Wg82uqrZN8ol0l9ietBGnGMdMo5nx9q3W2jWjBf0XR99P8uiqenCMYaZQVT0t3R3DP9BaO6m19qP+i6Jzkzw73X1eZh5fawwzby4XJ7n3WpJ1owVVtTLJ8iQXLnanYC59wP5YkpcmOTXJka21e8aqbWiN1IbWWMHmsDy/Gnt3dDetvY/z++PPSTJzQzTjmGlyeb+/YY7ymePL4ncx0+mZ/f5r4wWtteuq6jtJHldVy2MMswBCNknyjSSvT3e30M+MlT19pA5M3FjAPi3JH82xpvWKJNeku3PojrM8cmO/JD9orf1wEboNM25P8pdzlB2Q7o+1LyT5abrntxrHTKOZYPJb4wX9fTIek+TWdOP4uhjDTJ8H9vuHzFH+kHTLHu6M38MsgMvFSbpHFHw/yeFVtc/MwapakeRP0q25+uSE+ga/NHKJ+EuTfDbJS+a6aVRrraW7mcnyJG8aK35Tf/yUzddbuK/W2vrW2tGzbUn+oa/2jv7YJcYx06i19r10j/18TFUdPVZ8QpIHJ/l8a+0uY5gpdV6/P67/e/eXquqYJL+R5PzW2u3GMAtR3bhha1dVB6d7JvZt6Wazb05yWJLdk6xtrZ18P6fDoqiqE5O8Od1j5d6XXz0Te9TprbVL+vo7pvsg3TvdH4QXJ3l8uqs2LkpyYGtt/ebvOWxYVX08yRFJ1rTWLhg5bhwzdarq0em+GHpokjOSfCfd3cWfmuSqJPv2jwE1hpk6VbVNkq+mu4Lo+nRXEN2Qblw+Ncn6JAe11i7s6xvDzIuQzS9V1ZPS3QTid5Nsl+55w+9prZ020Y5BbySE3J//2Fr7+Mg5K5KcmO5Lo5XpnqX92SRvaa3dvFk6CgswV8juy4xjpk5VPTzd4+iekeTX010a/oUkb22tXT9W1xhmqlTV9kmOTfL8JL+Z7hLyn6RbDvH21tq3x+obw2w0IRsAAAAGYk02AAAADETIBgAAgIEI2QAAADAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhGwC2YFV1YlW1qjpo0n0BAIRsAAAAGIyQDQAAAAMRsgEAAGAgQjYATLGqOqCqTq+qn1TV7VX1w6r626raf5a6h1fVJVW1vqqurar3VdWysTpH9mu4j5zl/IP6shPHjreq+npVPayqPllV11XVPTPrwKvqyn5b3r/mNX1f/6mqnrcJ7311VX2gqi6rqhv793VpVZ1QVdvNcc6BVfXNqrq1qn5WVadV1cP7/rdZ6ldVHVVV51XVTVX1r1X1rao6aqH9BmDrtu2kOwAAzK6qXp3kvUnWJ/l8kquTPCzJ/kmel+TckeqvTPKMJH+X5Kv9z/8pyb9J8uIBuvPrSc5P8vMkn0myQ5KbRsq3S3JOkp2TfC7JryV5YZK/rqpntNbOWcBrvizJs5N8M8mZfZsHJXlHkicmOWy0clUdkuSMJHcnOS3JNUkOTvf/9IvxxquqkvyPJC9KckWSTye5I8nvJfnLqvrt1traBfQbgK2YkA0AU6iq9k7yniTXJtmvtXblSFkl2W3slKclWd1au7yv84YklyR5YVX959baNZvYpb2SfCzJy1prd89SvirJRUkOaq3d0ffh00m+nOS4dAF8vt6e5I9HX69/7x9JclRV7ddaO68/vk2SDyfZJsnBrbVzR875RJKXztL+0ekC9seSvKK1dmdf/4FJ/ibJa6vq1NbaugX0HYCtlMvFAWA6vSLd5/QbRwN2krTOeGh+30zA7uusT3Jq38bqAfpzR5LXzRGwZxw7E7D7PnwlyVXpZp3nrbV29fjrtdZakj/v//m0kaL9k+ye5H+OBuzeG9PNbo97ZZJb0wX5O0de444kb+j/+aKF9B2ArZeZbACYTk/q9xs7AzzbbOuP+v2DN707+UFr7V/up/yG1toP5ujDmoW8YD+j/Mp0l50/NsnyJDVSZdXIz3v3+/GAndbaD6vq6iSPGmn715L8TrpLyo/vJsjvZWbN92MX0ncAtl5CNgBMpxVJWrrLxTfGTbMcu6vfbzNAf36ygfIb5zh+VxZ+5dzfpFuT/d10a6yvT3Jnui8NXp1k+5G6O/X76+do6ycZCdnp1o5XujXub76fPuw4714DsFUTsgFgOt2QLgTuluTHA7Z7T7+f7W+AFfdz3n3uzL05VdUT0wXss5M8a2xd9r7pQvaomS8ZHjpHk7vOUX9da+0Jm9hdAPgla7IBYDpd2O8PGbjdmbtsP2yWsscN/Fqb4tH9/oxZ1oE/ZZb6/9jv9xsvqKrfSPKI0WOttZuTfDvJb1XVEJfTA0ASIRsAptVfpLtZ159V1e6jBf2znVfNftoGrUs3K/3CqtphpM09c9/Z4Um6qt/f63ngVfXvkrx+lvrnpnvE2bOranwN+Nsy+yXz70/3WLBTquo+l4VX1aOq6pHz6zYAWzuXiwPAFGqtXVpVr0kXBP+5qk5PFzxXJjkg3fOgX7OAdq+pqlOTHJ5kXVWdle4S6+ckOStjz56eoAv77flVtVuSC9LNRv9+uvf+vNHKrbW7q+qYJF9I8tWqOi3devYD083a/2OSfz/2Gh9Ksm+SI5LsV1VfTncjtF3T3fDsyen+n67cDO8PgCVKyAaAKdVa+0BVXZbktUmeme7u2tcn+d9J/noTmj46yb8keUGSP05yeZKXpwuYUxGy+9B8aJKTkjwj3WPArkiyNsmXMhay+3O+VFWHJHlrkucnWZ/kK+ne55kZuzlc/ziwI6vqzCQvS3JofvV/PPNaX94c7w+Apau6zxcAgKWpqh6U7u7il7bWnjzp/gCwtFmTDQAsCVW1Yx+oR49tk+TdSZYlOX0iHQNgq2ImGwBYEqpqn3Q3QDs7yfeTPCjdnch/O8k/J3lya+3WyfUQgK2BkA0ALIqqOijJQRtR9ZLW2rxnnavqIUnele5mZ7umu/fM1elmsP9La+2G+bYJAPMlZAMAi6KqTkzy5o2o+onW2pGbtzcAsHkI2QAAADAQNz4DAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAxEyAYAAICBCNkAAAAwECEbAAAABiJkAwAAwED+P8eeSpzelo4tAAAAAElFTkSuQmCC\n", 1098 | "text/plain": [ 1099 | "
" 1100 | ] 1101 | }, 1102 | "metadata": { 1103 | "needs_background": "light" 1104 | }, 1105 | "output_type": "display_data" 1106 | } 1107 | ], 1108 | "source": [ 1109 | "# setting image resolution\n", 1110 | "plt.figure(figsize = (8,4), dpi = 140)\n", 1111 | "\n", 1112 | "# Plotting histogram and descriptive summary\n", 1113 | "plt.scatter(churn_age.mean(), 0, label = 'mean', color = 'red')\n", 1114 | "plt.scatter(churn_age.median(), 0, label = 'median', color = 'green')\n", 1115 | "plt.hist(churn_age,bins=10)\n", 1116 | "\n", 1117 | "# axes labels\n", 1118 | "plt.xlabel('churn_age')\n", 1119 | "plt.ylabel('frequency')\n", 1120 | "plt.title('churn_age: mean, median, range')\n", 1121 | "plt.legend()" 1122 | ] 1123 | }, 1124 | { 1125 | "cell_type": "code", 1126 | "execution_count": null, 1127 | "metadata": {}, 1128 | "outputs": [], 1129 | "source": [ 1130 | "* Mean Value is higher than the Median Value\n", 1131 | "* The peak occurs at the interval (36,45) that means highest number of customers age lie in this interval\n", 1132 | "* Most customer age between 30 and 65\n" 1133 | ] 1134 | } 1135 | ], 1136 | "metadata": { 1137 | "colab": { 1138 | "collapsed_sections": [], 1139 | "name": "Mean_Variance.ipynb", 1140 | "provenance": [] 1141 | }, 1142 | "kernelspec": { 1143 | "display_name": "Python 3", 1144 | "language": "python", 1145 | "name": "python3" 1146 | }, 1147 | "language_info": { 1148 | "codemirror_mode": { 1149 | "name": "ipython", 1150 | "version": 3 1151 | }, 1152 | "file_extension": ".py", 1153 | "mimetype": "text/x-python", 1154 | "name": "python", 1155 | "nbconvert_exporter": "python", 1156 | "pygments_lexer": "ipython3", 1157 | "version": "3.6.9" 1158 | } 1159 | }, 1160 | "nbformat": 4, 1161 | "nbformat_minor": 1 1162 | } 1163 | --------------------------------------------------------------------------------