├── README.md
├── Intro-Prgrm.py
├── 06.Read_Write.py
├── 18.hc.py
├── 05.DataSummarization.py
├── 11.Simple Linear Regression.py
├── 03.Apply_Functions.py
├── 12.Multiple Linear Regression.py
├── 17.kmeans.py
├── 07.Joins.py
├── 08.Index_Select_Filter.py
├── 16.RF.py
├── 15.DecisionTree.py
├── 09.MissingValues.py
├── 04.Loops.py
├── 02.Functions_Basics.py
├── 01.DataStructures.py
├── 19.MarketBasketAnalysis_AprioriAlgo.py
├── 13.multiple_linear_regression_BackwardElimination.py
├── 10.Graphs.py
├── 14.logistic_regression.py
├── 31.Reading Files into Python.ipynb
├── 32.Min_Max_Range_Updated.ipynb
└── 33.Mean_Variance.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # Data Science with Python
2 |
--------------------------------------------------------------------------------
/Intro-Prgrm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sun Apr 25 16:45:56 2021
4 |
5 | @author: pc
6 | """
7 |
8 | V = [1,2,3,4,5]
9 | print(V)
10 |
11 | import matplotlib.pyplot as plt
12 | plt.plot(V)
13 |
--------------------------------------------------------------------------------
/06.Read_Write.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sat Aug 28 15:58:59 2021
4 |
5 | @author: Admin
6 | """
7 | #-------------------------Reading & Writing data in Files----------------------
8 |
9 | import pandas
10 |
11 | # Reading CSV Files with Pandas:
12 | df = pandas.read_csv('F:/WORK/pyWork/AnalyticsEdge_Python/pyData/User_Data.csv')
13 | print(df)
14 |
15 | # Writing CSV Files with Pandas:
16 | df.to_csv('F:/WORK/pyWork/AnalyticsEdge_Python/pyData/IIT-B.csv')
17 |
18 | # Reading Excel Files with Pandas
19 | df1 = pandas.read_excel('F:/WORK/pyWork/AnalyticsEdge_Python/pyData/User_Data.xlsx')
20 |
21 | df1 = pandas.read_excel('User_Data.xlsx')
22 | print(df1)
23 |
24 | # Writing Excel Files with Pandas
25 | df1.to_excel('IIT-B.xlsx')
26 | df2 = pandas.DataFrame(df1)
27 | print (df2)
28 |
--------------------------------------------------------------------------------
/18.hc.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Oct 19 19:56:10 2021
4 |
5 | @author: Admin
6 | """
7 | # Hierarchical Clustering
8 |
9 | # Importing the libraries
10 | import matplotlib.pyplot as plt
11 | import pandas as pd
12 |
13 | # Importing the dataset
14 | dataset = pd.read_csv('F:/WORK/pyWork/pyData/Mall_Customers.csv')
15 | X = dataset.iloc[:, [3, 4]].values
16 |
17 | # Using the dendrogram to find the optimal number of clusters
18 | import scipy.cluster.hierarchy as sch
19 | dendrogram = sch.dendrogram(sch.linkage(X, method = 'ward'))
20 | plt.title('Dendrogram')
21 | plt.xlabel('Customers')
22 | plt.ylabel('Euclidean distances')
23 |
24 | #cut the dendrogram tree with a horizontal line at a height where the line can traverse
25 | #without intersecting the merging point. Hence, we can see the ideal no. of clusters is 5
26 |
27 | # Fitting Hierarchical Clustering to the dataset
28 | from sklearn.cluster import AgglomerativeClustering
29 | hc = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean', linkage = 'ward')
30 | y_hc = hc.fit_predict(X)
31 |
32 | # Visualising the clusters
33 | plt.scatter(X[y_hc == 0, 0], X[y_hc == 0, 1], s = 100, c = 'red', label = 'Cluster 1')
34 | plt.scatter(X[y_hc == 1, 0], X[y_hc == 1, 1], s = 100, c = 'blue', label = 'Cluster 2')
35 | plt.scatter(X[y_hc == 2, 0], X[y_hc == 2, 1], s = 100, c = 'green', label = 'Cluster 3')
36 | plt.scatter(X[y_hc == 3, 0], X[y_hc == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4')
37 | plt.scatter(X[y_hc == 4, 0], X[y_hc == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5')
38 |
39 | plt.title('Clusters of customers')
40 | plt.xlabel('Annual Income (k$)')
41 | plt.ylabel('Spending Score (1-100)')
42 | plt.legend()
43 |
--------------------------------------------------------------------------------
/05.DataSummarization.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sat Aug 28 15:58:01 2021
4 |
5 | @author: Admin
6 | """
7 | #-------------------------------Data Summary-------------------------------
8 | #Describe()- Used to get summary statistics in python.
9 | #Describe Function gives the mean, std and IQR values.
10 | #It analyzes both numeric and object series and also the DataFrame column sets of mixed data types.
11 | # creation of DataFrame
12 | import pandas as pd
13 | import numpy as np
14 |
15 | #Example 1:
16 | a1 = pd.Series([1, 2, 3,4])
17 | a1
18 | a1.describe()
19 |
20 | a2 = pd.Series(['q', 'r', 'r', 'r','q','s','p'])
21 | a2
22 | a2.describe()
23 |
24 | info = pd.DataFrame({'numeric': [1, 2, 3, 4],
25 | 'object': ['p', 'q', 'r','e']
26 | })
27 | info
28 |
29 | info.describe(include=[np.number])
30 | info.describe(include=[np.object])
31 | info.describe()
32 |
33 | #Example 2:
34 | #Create a Dictionary of series
35 | d = {'Name':['Cathrine','Alisa','Bobby','Madonna','Rocky','Sebastian','Jaqluine',
36 | 'Rahul','David','Andrew','Ajay','Teresa'],
37 | 'Age':[26,27,25,24,31,27,25,33,42,32,51,47],
38 | 'Score':[89,87,67,55,47,72,76,79,44,92,99,69]}
39 |
40 | #Create a DataFrame
41 | df = pd.DataFrame(d)
42 | df
43 |
44 | #Descriptive or Summary Statistic of the numeric columns:
45 | #Summary statistics
46 | print(df.describe())
47 |
48 | #Descriptive or Summary Statistic of the character columns:
49 | #Summary statistics of character column
50 | print(df.describe(include='object'))
51 |
52 | #Descriptive or Summary Statistic of all the columns
53 | #Summary statistics of both - character & numerical columns
54 | print(df.describe(include='all'))
55 | #---------------------------------------------------------------------------------------------------------------
56 |
--------------------------------------------------------------------------------
/11.Simple Linear Regression.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sat Sep 18 19:04:28 2021
4 |
5 | @author: Admin
6 | """
7 | # Simple Linear Regression
8 |
9 | # Importing the libraries
10 | import matplotlib.pyplot as plt
11 | import pandas as pd
12 |
13 | # Importing the dataset
14 | dataset = pd.read_csv('F:/pyWork/pyData/stud_reg.csv')
15 | print(type(dataset))
16 |
17 | X = dataset.iloc[:,:-1].values
18 | y = dataset.iloc[:, 1].values
19 |
20 | # Splitting the dataset into the Training set and Test set
21 | from sklearn.model_selection import train_test_split
22 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3, random_state = 0)
23 |
24 | #Note: The parameter 'random_state' is used to randomly bifurcate the dataset into training &
25 | #testing datasets. That number should be supplied as arguments to parameter 'random_state'
26 | #which helps us get the max accuracy. And that number is decided by hit & trial method.
27 |
28 | # Fitting Simple Linear Regression to the Training set
29 | from sklearn.linear_model import LinearRegression
30 | regressor = LinearRegression()
31 | regressor.fit(X_train, y_train)
32 |
33 | #Calculating the coefficients:
34 | print(regressor.coef_)
35 |
36 | #Calculating the intercept:
37 | print(regressor.intercept_)
38 |
39 | # Predicting the Test set results
40 | y_pred = regressor.predict(X_test)
41 |
42 | # Accuracy of the model
43 |
44 | #Calculating the r squared value:
45 | from sklearn.metrics import r2_score
46 | r2_score(y_test,y_pred)
47 |
48 | #Create a DataFrame
49 | df1 = {'Actual Applicants':y_test,
50 | 'Predicted Applicants':y_pred}
51 | df1 = pd.DataFrame(df1,columns=['Actual Applicants','Predicted Applicants'])
52 | print(df1)
53 |
54 | # Visualising the predicted results
55 | line_chart1 = plt.plot(X_test,y_pred, '--', c ='red')
56 | line_chart2 = plt.plot(X_test,y_test, ':', c='blue')
57 |
58 | #--------------------------------------------------------
59 |
--------------------------------------------------------------------------------
/03.Apply_Functions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sat Aug 21 15:33:29 2021
4 |
5 | @author: Admin
6 | """
7 | #-----------------Apply Family of Functions--------------------------------
8 | #To apply our own functions to dataset, pandas provides three functions from
9 | #apply family of functons: pipe(), apply(), applymap()
10 |
11 | # pipe():Table wise Function Application.
12 | # It performs the custom operation for the entire dataframe.
13 | import pandas as pd
14 | # own function
15 | def adder(adder1,adder2):return adder1+adder2
16 |
17 | #Create a Dictionary of series
18 | d = {'Score_Math':pd.Series([66,57,75,44,31,67,85,33,42,62,51,47]),
19 | 'Score_Science':pd.Series([89,87,67,55,47,72,76,79,44,92,93,69])}
20 |
21 | print(type(d))
22 | print(d)
23 | df = pd.DataFrame(d)
24 | print (df)
25 | print (df.pipe(adder,2))
26 |
27 | # apply():Row or Column Wise Function Application.
28 | # It performs the custom operation for either row wise or column wise.
29 | import numpy as np
30 | #Create a DataFrame
31 | d = {'Score_Math':pd.Series([66,57,75,44,31,67,85,33,42,62,51,47]),
32 | 'Score_Science':pd.Series([89,87,67,55,47,72,76,79,44,92,93,69])}
33 |
34 | df = pd.DataFrame(d)
35 | print (df)
36 | #Row Wise Fxn Application:
37 | #row wise mean
38 | print (df.apply(np.mean,axis=1))
39 |
40 | #Column Wise Fxn Application:
41 | #column wise mean
42 | print (df.apply(np.mean,axis=0))
43 |
44 | # applymap():Element wise Function Application.
45 |
46 | # applymap():Element wise Function Application.
47 | # It performs specified operation on all the elements of the dataframe.
48 |
49 | #Create a DataFrame
50 | d = {'Score_Math':pd.Series([66,57,75,44,31,67,85,33,42,62,51,47]),
51 | 'Score_Science':pd.Series([89,87,67,55,47,72,76,79,44,92,93,69])}
52 |
53 | df = pd.DataFrame(d)
54 | print (df)
55 |
56 | #Example 1:
57 | print (df.applymap(lambda x:x*2))
58 | #Example2:
59 | import math as m
60 | print (df.applymap(lambda x:m.sqrt(x)))
61 |
--------------------------------------------------------------------------------
/12.Multiple Linear Regression.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sun Sep 19 15:26:33 2021
4 |
5 | @author: Admin
6 | """
7 | #Multiple Regression
8 |
9 | # Importing the libraries
10 | import pandas as pd
11 | import seaborn as sns
12 |
13 | # Importing the dataset
14 | dataset = pd.read_csv('F:/WORK/pyWork/AnalyticsEdge_Python/pyData/stud_reg_2.csv')
15 | print(type(dataset))
16 |
17 | #Data Visualization:
18 | sns.heatmap(dataset)
19 |
20 | X = dataset.iloc[:, :-1].values
21 | y = dataset.iloc[:,2].values
22 |
23 | # Splitting the dataset into the Training set and Test set
24 | from sklearn.model_selection import train_test_split
25 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1)
26 |
27 | #Note: The parameter 'random_state' is used to randomly bifurcate the dataset into training &
28 | #testing datasets. That number should be supplied as arguments to parameter 'random_state'
29 | #which helps us get the max accuracy. And that number is decided by hit & trial method.
30 |
31 | # Fitting Linear Regression to the Training set
32 | from sklearn.linear_model import LinearRegression
33 | regressor = LinearRegression()
34 | regressor.fit(X_train, y_train)
35 |
36 | #Calculating the coefficients:
37 | print(regressor.coef_)
38 |
39 | #Calculating the intercept:
40 | print(regressor.intercept_)
41 |
42 | # Predicting the Test set results
43 | y_pred = regressor.predict(X_test)
44 |
45 | # Accuracy of the model
46 |
47 | #Calculating the r squared value:
48 | from sklearn.metrics import r2_score
49 | r2_score(y_test,y_pred)
50 |
51 | #Create a DataFrame
52 | df1 = {'Actual Applicants':y_test,
53 | 'Predicted Applicants':y_pred}
54 | df1 = pd.DataFrame(df1,columns=['Actual Applicants','Predicted Applicants'])
55 | print(df1)
56 |
57 | # Visualising the predicted results
58 | import matplotlib.pyplot as plt
59 | line_chart1 = plt.plot(y_pred,X_test, '--',c='green')
60 | line_chart2 = plt.plot(y_test,X_test, ':', c='red')
61 | plt.show()
62 | #------------------------------
63 |
--------------------------------------------------------------------------------
/17.kmeans.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sun Oct 17 09:41:39 2021
4 |
5 | @author: Admin
6 | """
7 | # K-Means Clustering
8 | #Projects: Customer Segmentation
9 | #A Company wants to identify segments of customers for targetted marketing.
10 |
11 | # Importing the libraries
12 | import matplotlib.pyplot as plt
13 | import pandas as pd
14 |
15 | # Importing the dataset
16 | dataset = pd.read_csv('D:\SkillEdge\Python\Final\Codes\pyData\Mall_Customers.csv')
17 | X = dataset.iloc[:, [3,4]].values
18 |
19 | # Using the elbow method to find the optimal number of clusters
20 | from sklearn.cluster import KMeans
21 | help(KMeans())
22 | wcss = []
23 | for i in range(1, 11):
24 | kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 0)
25 | kmeans.fit(X)
26 | wcss.append(kmeans.inertia_)
27 | plt.plot(range(1, 11), wcss)
28 | plt.title('The Elbow Method')
29 | plt.xlabel('Number of clusters')
30 | plt.ylabel('WCSS')
31 | #if you want save figure, use savefig method in returned figure object.
32 | plt.savefig('output.png')
33 |
34 | # Fitting K-Means to the dataset
35 | kmeans = KMeans(n_clusters = 5, init = 'k-means++', random_state = 42)
36 | y_kmeans = kmeans.fit_predict(X)
37 |
38 | kmeans = pd.DataFrame(y_kmeans)
39 | dataset_1 = pd.concat([dataset,kmeans],axis=1)
40 |
41 | # Visualising the clusters
42 | plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1')
43 | plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Cluster 2')
44 | plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Cluster 3')
45 | plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4')
46 | plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5')
47 | #plt.scatter(X[y_kmeans == 5, 0], X[y_kmeans == 5, 1], s = 100, c = 'yellow', label = 'Cluster 3')
48 | #plt.scatter(X[y_kmeans == 6, 0], X[y_kmeans == 6, 1], s = 100, c = 'black', label = 'Cluster 4')
49 | #plt.scatter(X[y_kmeans == 7, 0], X[y_kmeans == 7, 1], s = 100, c = 'orange', label = 'Cluster 5')
50 |
51 |
52 |
53 |
54 | #plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroids')
55 | plt.title('Clusters of customers')
56 | plt.xlabel('Annual Income (k$)')
57 | plt.ylabel('Spending Score (1-100)')
58 | plt.legend()
59 | plt.show()
60 |
--------------------------------------------------------------------------------
/07.Joins.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Aug 31 18:44:17 2021
4 |
5 | @author: Admin
6 | """
7 | #---------------------------------Joins----------------------------------------
8 | #We can merge two data frames in python by using the merge() function of pandas
9 | #Create dataframe:
10 | import pandas as pd
11 |
12 | # Example 1:
13 |
14 | # data frame 1
15 | d1 = {'Customer_id':pd.Series([1,2,3,4,5,6]),
16 | 'Product':pd.Series(['Oven','Oven','Oven','Television','Television','Television'])}
17 | df1 = pd.DataFrame(d1)
18 | print(df1)
19 |
20 | # data frame 2
21 | d2 = {'Customer_id':pd.Series([2,4,6]),
22 | 'State':pd.Series(['California','California','Texas'])}
23 | df2 = pd.DataFrame(d2)
24 | print(df2)
25 |
26 | #Inner join using pandas:
27 | #Return only those rows where left table have matching keys in the right table
28 | print (pd.merge(df1, df2, on='Customer_id', how='inner'))
29 |
30 | #Full join using pandas
31 | #Returns all rows from both tables.
32 |
33 | print (pd.merge(df1, df2, on='Customer_id', how='outer'))
34 | #join records from left table which have matching keys in right table.
35 |
36 | #Left Join using pandas
37 | #Returns all rows from left table and any rows with matching keys from right table.
38 | print (pd.merge(df1, df2, on='Customer_id', how='left'))
39 |
40 | #Right Join using pandas
41 | #Returns all rows from right table and any rows with matching keys from left table.
42 | print (pd.merge(df1, df2, on='Customer_id', how='right'))
43 |
44 | #Example 2:
45 |
46 | # Dataset 1
47 | emp_1 = {"Name": ["Penn", "Smith", "William", "Parker"],
48 | "Age": [21, 32, 29, 28]}
49 | EmpList_1 = pd.DataFrame(emp_1)
50 | print(EmpList_1)
51 |
52 | # Dataset 2
53 | emp_2 = {"Name": ["Penn", "Suzzane", "William"],
54 | "Education-Level": ["Under-Grad", "PG", "Grad"]}
55 | EmpList_2 = pd.DataFrame(emp_2)
56 | print(EmpList_2)
57 |
58 | #Inner join using pandas:
59 | print (pd.merge(EmpList_1, EmpList_2, on='Name', how='inner'))
60 |
61 | #Full join using pandas
62 | print (pd.merge(EmpList_1, EmpList_2, on='Name', how='outer'))
63 | #join records from left table which have matching keys in right table.
64 |
65 | #Left Join using pandas
66 | print (pd.merge(EmpList_1, EmpList_2, on='Name', how='left'))
67 |
68 | #Right Join using pandas
69 | #Returns all rows from right table and any rows with matching keys from left table.
70 | print (pd.merge(EmpList_1, EmpList_2, on='Name', how='right'))
71 |
--------------------------------------------------------------------------------
/08.Index_Select_Filter.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Aug 31 18:48:19 2021
4 |
5 | @author: Admin
6 | """
7 | #------------------------Index, Select & Filter--------------------------------
8 | #Create dataframe :
9 | import pandas as pd
10 |
11 | #Create a DataFrame
12 | d = {'Name':['Alisa','Bobby','Cathrine','Alisa','Bobby','Cathrine',
13 | 'Alisa','Bobby','Cathrine','Alisa','Bobby','Cathrine'],
14 | 'Exam':['Semester 1','Semester 1','Semester 1','Semester 1','Semester 1','Semester 1',
15 | 'Semester 2','Semester 2','Semester 2','Semester 2','Semester 2','Semester 2'],
16 | 'Subject':['Mathematics','Mathematics','Mathematics','Science','Science','Science',
17 | 'Mathematics','Mathematics','Mathematics','Science','Science','Science'],
18 | 'Score':[62,47,55,74,31,77,85,63,42,67,89,81]}
19 |
20 | df = pd.DataFrame(d,columns=['Name','Exam','Subject','Score'])
21 | df
22 |
23 | #View a column of the dataframe in pandas:
24 | df['Name']
25 |
26 | #View two columns of the dataframe in pandas:
27 | df[['Name','Score','Exam']]
28 |
29 | #View first two rows of the dataframe in pandas:
30 | df[0:2]
31 |
32 | #-------Filter in Pandas dataframe:--------------
33 | #View all rows where score greater than 70
34 | df['Score'] > 70
35 | df[df['Score'] > 70]
36 |
37 | #View all the rows where score greater than 70 and less than 85
38 | df[(df['Score'] > 70) & (df['Score'] < 85)]
39 |
40 |
41 | #-----------------Select in Pandas dataframe-----------------------------------
42 | #select row by using row number in pandas with .iloc
43 | #.iloc [1:m, 1:n] – is used to select or index rows based on their position
44 | #from 1 to m rows and 1 to n columns
45 |
46 | # select first 2 rows
47 | df.iloc[:2]
48 | # or
49 | df.iloc[:2,]
50 |
51 | #select 3rd to 5th rows
52 | df.iloc[2:5]
53 | # or
54 | df.iloc[2:5,]
55 |
56 | #select all rows starting from third row
57 | df.iloc[2:]
58 | # or
59 | df.iloc[2:,]
60 |
61 | #Select column by using column number in pandas with .iloc
62 | # select first 2 columns
63 | df.iloc[:,:2]
64 | #select first 1st and 4th columns
65 | df.iloc[[2,4],[0,3]]
66 |
67 | #Select value by using row name and column name in pandas with .loc:
68 | #.loc [[Row_names],[ column_names]] –used to select or index rows or columns based on their name
69 |
70 | #select value by row label and column label using loc
71 | df.loc[[1,2,4,8,11],['Name','Score']]
72 |
--------------------------------------------------------------------------------
/16.RF.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Oct 12 19:15:52 2021
4 |
5 | @author: Admin
6 | """
7 | #------------------------------Random Forest--------------------------------
8 | # Random Forest Classification
9 |
10 | # Importing the libraries
11 | import pandas as pd
12 |
13 | # Importing the dataset
14 | dataset = pd.read_csv('Purchase_History.csv')
15 | X = dataset.iloc[:, [2, 3]].values
16 | y = dataset.iloc[:, 4].values
17 |
18 | # Splitting the dataset into the Training set and Test set
19 | from sklearn.model_selection import train_test_split
20 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
21 |
22 |
23 | # Fitting Random Forest Classification to the Training set
24 | from sklearn.ensemble import RandomForestClassifier
25 | classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy',max_depth = 3, min_samples_leaf=5)
26 | classifier.fit(X_train, y_train)
27 |
28 | #To see no. of decision trees created
29 | len(classifier.estimators_)
30 |
31 | #To see the decision trees created
32 | classifier.estimators_
33 |
34 | #To access a particular decision tree, we can use indexing
35 | classifier.estimators_[0]
36 |
37 | # Predicting the Test set results
38 | y_pred = classifier.predict(X_test)
39 |
40 | # Making the Confusion Matrix
41 | from sklearn.metrics import confusion_matrix
42 | cm = confusion_matrix(y_test, y_pred)
43 | cm
44 | #Accuracy = 96%
45 |
46 | # Random Forest visualization
47 |
48 | #Since RF is quite big & clumpsy to draw due to large no. of DT, its not possible to
49 | #visualiza an entire RF on a small system like our laptop.
50 | #Hence, we visualize individual DTs from this RF.
51 |
52 | # Decision Tree -1 visualization-----------------
53 | from sklearn import tree
54 | #Lets create a blank chart of desired size using matplotlib library and place our Decision tree there.
55 | import matplotlib.pyplot as plt
56 | fig, axes= plt.subplots(nrows = 1,ncols = 1,figsize = (4,4), dpi=300)
57 | cn=['0','1']
58 | tree.plot_tree(classifier.estimators_[0],class_names=cn,filled = True)
59 |
60 | #if you want save figure, use savefig method in returned figure object.
61 | fig.savefig('RF-DT-1.png')
62 |
63 | # Decision Tree-2 visualization-----------------
64 | from sklearn import tree
65 | #Lets create a blank chart of desired size using matplotlib library and place our Decision tree there.
66 | import matplotlib.pyplot as plt
67 | fig, axes= plt.subplots(nrows = 1,ncols = 1,figsize = (4,4), dpi=300)
68 | cn=['0','1']
69 | tree.plot_tree(classifier.estimators_[1],class_names=cn,filled = True)
70 |
71 | #if you want save figure, use savefig method in returned figure object.
72 | fig.savefig('RF-DT-2.png')
73 |
74 | #-----------
75 |
--------------------------------------------------------------------------------
/15.DecisionTree.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sat Oct 9 16:48:32 2021
4 |
5 | @author: Admin
6 | """
7 |
8 | # Importing the libraries
9 | import pandas as pd
10 |
11 | # Importing the dataset
12 | dataset = pd.read_csv('F:\WORK\pyWork\AnalyticsEdge_Python\pyData\Purchase_History.csv')
13 |
14 | #Method-1 (Handling Categorical Variables)
15 | pd.get_dummies(dataset["Gender"])
16 | pd.get_dummies(dataset["Gender"],drop_first=True)
17 | S_Dummy = pd.get_dummies(dataset["Gender"],drop_first=True)
18 | S_Dummy.head(5)
19 | #Now, lets concatenate these dummy var columns in our dataset.
20 | dataset = pd.concat([dataset,S_Dummy],axis=1)
21 | dataset.head(5)
22 | dataset.tail(2)
23 | #dropping the columns whose dummy var have been created
24 | dataset.drop(["Gender",],axis=1,inplace=True)
25 | dataset.head(5)
26 | #------------------------------------------------------------------------------
27 |
28 | #Obtaining DV & IV from the dataset
29 | X = dataset.iloc[:, [1,2,4]].values
30 | y = dataset.iloc[:, 3].values
31 |
32 | # Splitting the dataset into the Training set and Test set
33 | from sklearn.model_selection import train_test_split
34 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 2)
35 |
36 |
37 | # Fitting Decision Tree Classification to the Training set
38 | from sklearn.tree import DecisionTreeClassifier
39 | #classifier = DecisionTreeClassifier(criterion = 'entropy')
40 | #If desired we can supply extra parameters to decision trees fxn, but
41 | #it may or may not give better accuracy.
42 | classifier = DecisionTreeClassifier(criterion = 'entropy',max_depth = 3, min_samples_leaf=5)
43 |
44 | classifier.fit(X_train, y_train)
45 |
46 | # Predicting the Test set results
47 | y_pred = classifier.predict(X_test)
48 |
49 | # Making the Confusion Matrix
50 | from sklearn.metrics import confusion_matrix
51 | cm = confusion_matrix(y_test, y_pred)
52 | print(cm)
53 | #Accuracy = 91%
54 |
55 | # Decision Tree visualization-----------------
56 | from sklearn import tree
57 |
58 | #Simple Decision Tree
59 | tree.plot_tree(classifier)
60 | #image is quite blurred
61 |
62 | #Lets try to make decision tree more interpretable by adding filling colors.
63 | tree.plot_tree(classifier,filled = True)
64 | #Although the Decision tree shows class name & leafs are colred but still its view is blurred.
65 |
66 | #Lets create a blank chart of desired size using matplotlib library and place our Decision tree there.
67 | import matplotlib.pyplot as plt
68 | fig, axes = plt.subplots(nrows = 1,ncols = 1,figsize = (4,4), dpi=300)
69 | #The above line is used to set the pixels of the Decision Trees nodes so that
70 | #the content mentioned in each node of Decision tree is visible.
71 | cn=['0','1']
72 | tree.plot_tree(classifier,class_names=cn,filled = True)
73 |
74 | #if you want save figure, use savefig method in returned figure object.
75 | fig.savefig('Skilledge-Python-April-batch.png')
76 |
--------------------------------------------------------------------------------
/09.MissingValues.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Thu Sep 2 20:05:47 2021
4 |
5 | @author: Admin
6 | """
7 | #--------------------------Handling Missing Values----------------------------
8 |
9 | #Counting the Missing Values---------------------------
10 | import pandas as pd
11 | import numpy as np
12 |
13 | #Create a DataFrame
14 | df1 = {'Subject':['semester1','semester2','semester3','semester4','semester1',
15 | 'semester2','semester3'],
16 | 'Score':[62,47,np.nan,74,np.nan,77,85]}
17 |
18 | df1 = pd.DataFrame(df1,columns=['Subject','Score'])
19 | print(df1)
20 |
21 | '''Is there any missing values in dataframe '''
22 | df1.isnull()
23 | df1.notnull()
24 |
25 | '''Is there any missing values across columns'''
26 | df1.isnull().any()
27 |
28 | '''How many missing values are there across each column'''
29 | df1.isnull().sum()
30 |
31 | #Dropping rows with Missing Values-----------------------
32 |
33 | #Create a DataFrame
34 | df1 = {'Name':['George','Andrea','micheal','maggie','Ravi','Xien','Jalpa',np.nan],
35 | 'State':['Arizona','Georgia','Newyork','Indiana','Florida','California',np.nan,np.nan],
36 | 'Gender':["M","F","M","F","M","M",np.nan,np.nan],
37 | 'Score':[63,48,56,75,np.nan,77,np.nan,np.nan]}
38 |
39 | df1 = pd.DataFrame(df1,columns=['Name','State','Gender','Score'])
40 | print(df1)
41 |
42 | #Drop all rows that have any NaN (missing) values
43 | df1.dropna()
44 |
45 | #Drop only if entire row has NaN values
46 | df1.dropna(how='all')
47 |
48 | #Drop only if a row has more than 2 NaN values
49 | df1.dropna(thresh=2)
50 |
51 | #Drop NaN in a specific column
52 | df1.dropna(subset=['Gender'])
53 | df2 = df1.dropna(subset=['Gender','Score'])
54 | df2
55 | #Dropping rows using axis values:
56 | df1
57 | df1.dropna(axis=0)
58 |
59 | #Dropping columns using axis values:
60 | df1.dropna(axis=1)
61 |
62 | #------------------Creating Data Frame Again-----------------------------------
63 | df1 = {'Name':['George','Andrea','micheal','maggie','Ravi','Xien','Jalpa',np.nan],
64 | 'State':['Arizona','Georgia','Newyork','Indiana','Florida','California',np.nan,np.nan],
65 | 'Gender':["M","F","M","F","M","M",np.nan,np.nan],
66 | 'Score':[63,48,56,75,np.nan,77,np.nan,np.nan]}
67 |
68 | df1 = pd.DataFrame(df1,columns=['Name','State','Gender','Score'])
69 | print(df1)
70 | #------------------Replacing Missing Values with Zero--------------------------
71 |
72 | df1
73 | df1.fillna(0)
74 |
75 | #-----------------Replacing Missing Values with Mean of the column-------------
76 |
77 | df1
78 | df1["Score"].fillna(df1["Score"].mean(),inplace=True)
79 | print(df1)
80 |
81 | #----------------Replacing Missing Value with Median of the column-------------
82 | df1["Score"].fillna(df1["Score"].median(), inplace=True)
83 | print(df1)
84 |
85 | #Replace Missing (or) Generic Values using replace() method
86 | #Many times, we have to replace a generic value with some specific value.
87 | #We can achieve this by applying the replace method.
88 | df = pd.DataFrame({'one':[10,20,30,40,50,2000], 'two':[1000,0,30,40,50,60]})
89 | print(df)
90 |
91 | print (df.replace({1000:10,2000:60}))
92 |
93 | #------------------Handling Duplicate Values--------------------------------
94 |
95 | #The drop_duplicates() function performs common data cleaning task that deals with duplicate values
96 | #in the DataFrame. This method helps in removing duplicate values from the DataFrame.
97 |
98 | emp = {"Name": ["Parker", "Smith", "William", "Parker"],
99 | "Age": [21, 32, 29, 21]}
100 | info = pd.DataFrame(emp)
101 | print(info)
102 | info = info.drop_duplicates()
103 | print(info)
104 |
105 |
106 | emp = {"Name": ["Parker", "Smith", "William", "Parker"],
107 | "Age": [21, 32, 29, 22]}
108 | info = pd.DataFrame(emp)
109 | print(info)
110 | info = info.drop_duplicates()
111 | print(info)
112 |
--------------------------------------------------------------------------------
/04.Loops.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Aug 24 18:51:52 2021
4 |
5 | @author:
6 | """
7 | #--------------------------------Loops----------------------------------------
8 | #-------------For Loop---------------
9 | # for loop is used in case where we need to execute some part of the code until the given condition
10 | # is satisfied. It is better to use for loop if the number of iteration is known in advance.
11 | #It is frequently used to traverse the data structures like list, tuple, or dictionary.
12 | #Example1:
13 | i=0
14 | for i in range(0,10):
15 | print(i,end =',')
16 |
17 | #Example2:printing the table of the given number
18 | i=1
19 | num = int(input("Enter a number:"))
20 | for i in range(1,11):
21 | print("%a X %a = %a" %(num,i,num*i))
22 |
23 | #Example3:Nested For loop
24 | n = int(input("Enter the number of rows you want to print?"))
25 | i,j=0,0
26 | for i in range(0,n):
27 | print()
28 | for j in range(0,i+1):
29 | print("*",end="")
30 |
31 | #Exampl4: Else statement with For loop
32 | for i in range(0,5):
33 | print(i)
34 | else:print("for loop completely exhausted, since there is no break.")
35 |
36 | #------------While Loop-------------
37 | # while loop is to be used in the scenario where we don't know the number of iterations in advance.
38 | #The block of statements is executed in the while loop until the condition specified in while loop
39 | #is satisfied.
40 | #Example1:
41 | i=1;
42 | while i<=10:
43 | print(i);
44 | i=i+1;
45 |
46 | #Example2:
47 | i=1
48 | number=0
49 |
50 | number = int(input("Enter the number?"))
51 | while i<=10:
52 | print("%a X %a = %a \n"%(number,i,number*i));
53 | i = i+1;
54 |
55 | #Example3:Infinite while loop
56 | var = 1
57 | while var != 2:
58 | i = int(input("Enter the number?"))
59 | print ("Entered value is %d"%(i))
60 |
61 | while (1):
62 | print("Hi! we are inside the infinite while loop");
63 |
64 | # For loop is ran finite no. of times even if we give only one value
65 | for i in range(0,1):
66 | print("Hi! we are inside the finite for loop");
67 |
68 | #Example4: Using else with while loop
69 | i=1;
70 | while i<=5:
71 | print(i)
72 | i=i+1;
73 | else:print("The while loop exhausted");
74 |
75 | #-------------If Statement----------------
76 | #The if statement is used to test a specific condition.
77 | #If the condition is true, a block of code (if-block) will be executed.
78 | #Exampl1:
79 | num = int(input("enter the number?"))
80 | if num%2 == 0:
81 | print("Number is even")
82 |
83 | #Example2:
84 | a = int(input("Enter a? "));
85 | b = int(input("Enter b? "));
86 | c = int(input("Enter c? "));
87 | if a>b and a>c:
88 | print("a is largest");
89 |
90 | if b>a and b>c:
91 | print("b is largest");
92 |
93 | if c>a and c>b:
94 | print("c is largest");
95 |
96 | #-----------If Else Statement-------------
97 | #If the condition provided in the if statement is false, then the else statement will be executed.
98 | #Example1:
99 | age = int (input("Enter your age? "))
100 | if age>=18:
101 | print("You are eligible to vote !!");
102 | else:
103 | print("Sorry! you have to wait !!");
104 |
105 | #Example2:
106 | num = int (input("enter the number?"))
107 | if num%2 == 0:
108 | print("Number is even...")
109 | else:
110 | print("Number is odd...")
111 |
112 | #-------Elif Statement------------------
113 | #The elif statement enables us to check multiple conditions and execute the specific block of
114 | #statements depending upon the true condition among them.It works like if-else-if ladder statement.
115 | #Example:
116 | number = int(input("Enter the number?"))
117 | if number==10:
118 | print("number is equals to 10")
119 | elif number==50:
120 | print("number is equal to 50");
121 | elif number==100:
122 | print("number is equal to 100");
123 | else:
124 | print("number is not equal to 10, 50 or 100");
125 |
--------------------------------------------------------------------------------
/02.Functions_Basics.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sat Aug 21 15:32:07 2021
4 |
5 | @author: Admin
6 | """
7 | # -*- coding: utf-8 -*-
8 | """
9 |
10 | #abs(): Returns the absolute value of a number.
11 | # integer number
12 |
13 | integer = -20
14 | abs(integer)
15 | print('Absolute value of -20 is:', abs(integer))
16 |
17 | # floating number
18 |
19 | floating = -20.83
20 | print('Absolute value of -20.83 is:', abs(floating))
21 |
22 | #all(): It returns true if all items passed in iterable object are true.
23 | #Otherwise, it returns False.
24 | #This fxn accepts an iterable object (such as list, dictionary, etc.).
25 | # all values true
26 |
27 | k = [1, 3, 4, 6]
28 | print(all(k))
29 |
30 | # all values false
31 |
32 | k = [0, False]
33 | print(all(k))
34 |
35 | # one false value
36 | k = [1, 3, 7, 0]
37 | print(all(k))
38 |
39 | # empty iterable
40 | k = []
41 | print(all(k))
42 |
43 | #------------------------------------------------------------------------------------
44 |
45 | #bool(): Converts a value to boolean(True or False)
46 | test1 = []
47 | print(test1,'is',bool(test1))
48 |
49 | test1 = [0]
50 | print(test1,'is',bool(test1))
51 |
52 | test1 = None
53 | print(test1,'is',bool(test1))
54 |
55 | test1 = 'Easy string'
56 | print(test1,'is',bool(test1))
57 |
58 | #sum(): Used to get the sum of numbers of an iterable, i.e., list.
59 |
60 | list_1 = [1,2,4]
61 | s = sum(list_1)
62 | print(s)
63 |
64 | s = sum(list_1, 10)
65 | print(s)
66 |
67 | #len(): Returns the length (the number of items) of an object.
68 |
69 | strA = 'Python'
70 | print(len(strA))
71 |
72 | #list() creates a list in python.
73 | # empty list
74 |
75 | Gaurav = list()
76 | print(Gaurav)
77 |
78 | #Converting string to list
79 | String = 'abcde'
80 | print(list(String))
81 |
82 | #divmod(): Used to get quotient and remainder of two numbers.
83 | #This function takes two numeric arguments and returns a tuple.
84 | #Both arguments are required and numeric
85 | # Calling function
86 | result = divmod(10,2)
87 | # Displaying result
88 | print(result)
89 |
90 | #dict(): Its a constructor which creates a dictionary.
91 | # Calling function
92 | result = dict() # returns an empty dictionary
93 | print(result)
94 |
95 | result2 = dict(a=1,b=2)
96 | # Displaying result
97 | print(result2)
98 |
99 | #set(): It is used to create a new set using elements passed during the call.
100 | #It takes an iterable object as an argument and returns a new set object.
101 | # Calling function
102 | result = set() # empty set
103 | result2 = set('12')
104 | result3 = set('javatpoint')
105 | result4 = {1,2}
106 | print (result4)
107 | # Displaying result
108 | print(result)
109 | print(result2)
110 | print(result3)
111 |
112 | #pow(): Used to compute the power of a number.
113 | # positive x, positive y (x**y)
114 | print(pow(4, 2))
115 |
116 | # negative x, positive y
117 | print(pow(-4, 2))
118 |
119 | #tuple(): Used to create a tuple object.
120 | t1 = tuple()
121 | print('t1=', t1)
122 |
123 | # creating a tuple from a list
124 | l = [1, 6, 9]
125 | t2 = tuple(l)
126 | print('t2=', t2)
127 |
128 | # creating a tuple from a string
129 | t1 = tuple('Java')
130 | print('t1=',t1)
131 |
132 | #----------------------------------------------------------------------
133 | #lambda()- Helps creating anonymous functions.
134 | #Lambda functions can accept any number of arguments,
135 | #but they can return only one value in the form of expression.
136 |
137 | #Multiple arguments to Lambda function
138 | x = lambda a,b:a+b
139 | # a and b are the arguments and a+b is the expression which gets evaluated and returned.
140 | print("Addition = ",x(20,10))
141 |
142 | #Program to filter out the list which contains numbers divisible by 3.
143 | List = [1,2,3,4,10,123,22]
144 | Oddlist = list(filter(lambda x:(x%3 == 0),List))
145 | # the list contains all the items of the list for which the lambda function evaluates to true
146 | print(Oddlist)
147 |
148 | #program to triple each number of the list using map
149 | List = [1,2,3,4,10,123,22]
150 | new_list = list(map(lambda x:x*3,List))
151 | # this will return the triple of each item of the list and add it to new_list
152 | print(new_list)
153 |
--------------------------------------------------------------------------------
/01.DataStructures.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sat April 4 01:46:06 2020
4 |
5 | @author: Admin
6 | """
7 |
8 | print("hello world")
9 |
10 | #Numbers
11 | #a=3 , b=5 #a and b are number objects
12 |
13 | #String
14 | str1 = 'Hello Students' #string str1
15 | str2 = ' how are you' #string str2
16 | str1
17 | str2
18 | print (str1[0:5]) #printing first five character using slice operator
19 | (str1[0:5])
20 | print (str1[4]) #printing 5th character of the string
21 | print (str1*2) #printing the string twice
22 | print (str1 + str2) #printing the concatenation of str1 and str2
23 |
24 | #Lists
25 | l = [1, "hi", "python", True]
26 | print (l[3:])
27 | print (l[0:2])
28 | print (l)
29 | print (l + l)
30 | print (l * 3)
31 | print (type(l))
32 | #Lets try mutation
33 | l[1] = "Bye"
34 | print (l)
35 |
36 | #Tuple
37 | t = ('hi', 'python', 2, 4)
38 | t
39 | print (t[1:]);
40 | print (t[0:3]);
41 | print (t);
42 | print (t + t)
43 | print (t * 3)
44 | print (type(t))
45 | #Lets try mutation
46 | t[1] = "Bye"
47 | print (t)
48 |
49 | #Dictionary
50 | d = {1:"Jimmy", 2:'Alex', 3:'john', 4:'mike'}
51 | d
52 | print("1st name is "+d[1])
53 | print("2nd name is "+ d[4])
54 | print (d);
55 | print (d.keys());
56 | print (d.values());
57 |
58 | #----ADVANCED----
59 | #list
60 | #ordered collection of items; sequence of items in a list
61 | shoplist =['apple','carrot','mango', 'banana']
62 | shoplist
63 | len(shoplist)
64 | print(shoplist)
65 |
66 | #add item to list
67 | shoplist.append('rice')
68 | shoplist
69 |
70 | #sort
71 | shoplist.sort() #inplace sort
72 | shoplist
73 |
74 | #index/select
75 | shoplist[0]
76 | shoplist[0:4]
77 |
78 | #delete item
79 | del shoplist[0]
80 | shoplist
81 |
82 | #Tuple
83 | #Used to hold multiple object; similar to lists; less functionality than list
84 | #immutable - cannot modify- fast ; ( )
85 | zoo = ('python','lion','elephant','bird')
86 | zoo
87 | len(zoo)
88 | languages = 'c', 'java', 'php' , 1 #better to put (), this also works
89 | languages
90 | type(languages)
91 |
92 | #Dictionary - like an addressbook. use of associate keys with values
93 | #key-value pairs { 'key1':value1, 'key2':value2} ; { } bracket, :colon
94 |
95 | student = {'A101': 'Abhinav', 'A102': 'Ravi', 'A103':'Prafull', 'A104': 'Karan'}
96 | student
97 | student['A103']
98 | print('Name of rollno A103 is ' +student['A103'])
99 | del student['A104']
100 | student
101 | len(student)
102 |
103 | #for rollno, name in student.items():
104 | #print('name of {} is {} '.format(rollno, name) )
105 |
106 | #Lets test Mutation:
107 | #adding a value
108 | student['A104'] = 'Hitesh'
109 | student
110 |
111 | #Set
112 | Anubhav = {1,2,3,4,5}
113 | Anubhav
114 | Aman_1 = set()
115 | Aman_1
116 |
117 | #Sets are unordered collections of objects; ( [ , ])
118 | teamA = set(['india','england','australia','sri lanka','ireland'])
119 | teamA
120 | teamB = set(['pakistan', 'south africa','bangladesh','ireland'])
121 | teamB
122 |
123 | #Checking whether a data value exists in a set or not.
124 | 'india' in teamA
125 | 'india' in teamB
126 |
127 | #Adding values in a set
128 | teamA.add('China')
129 | teamA #puts in order
130 | teamA.add('india')
131 | teamA #no duplicates
132 | teamA.remove('australia')
133 | teamA
134 |
135 | #Create dataframe :
136 | import pandas as pd
137 |
138 | #Create a DataFrame
139 | d = {'Name':['Alisa','Bobby','Cathrine','Alisa','Bobby','Cathrine',
140 | 'Alisa','Bobby','Cathrine','Alisa','Bobby','Cathrine'],
141 | 'Exam':['Semester 1','Semester 1','Semester 1','Semester 1','Semester 1','Semester 1',
142 | 'Semester 2','Semester 2','Semester 2','Semester 2','Semester 2','Semester 2'],
143 | 'Subject':['Mathematics','Mathematics','Mathematics','Science','Science','Science',
144 | 'Mathematics','Mathematics','Mathematics','Science','Science','Science'],
145 | 'Score':[62,47,55,74,31,77,85,63,42,67,89,81]}
146 |
147 | d
148 |
149 | df = pd.DataFrame(d,columns=['Name','Exam','Subject','Score'])
150 | df
151 |
152 | #View a column of the dataframe in pandas:
153 | df['Name']
154 |
155 | #View two columns of the dataframe in pandas:
156 | df[['Name','Score','Exam']]
157 |
158 | #View first two rows of the dataframe in pandas:
159 | df[0:2]
160 |
161 |
162 |
163 |
164 |
165 |
--------------------------------------------------------------------------------
/19.MarketBasketAnalysis_AprioriAlgo.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Oct 26 14:46:14 2021
4 |
5 | @author: Admin
6 | """
7 | #Import Libraries----------
8 | import numpy as np
9 | import pandas as pd
10 | from mlxtend.frequent_patterns import apriori, association_rules
11 |
12 | #Loading and exploring the data-----------------
13 | #Loading the Data
14 | data = pd.read_excel('Online_Retail_Store.xlsx')
15 | data.head()
16 | data.info()
17 | # Exploring the columns of the data
18 | data.columns
19 | # Exploring the different regions of transactions
20 | data.Country.unique()
21 |
22 | #Cleaning the Data-----------------
23 | #Identifying missing values:
24 | '''Is there any missing values across columns'''
25 | data.isnull().any()
26 |
27 | '''How many missing values are there across each column'''
28 | data.isnull().sum()
29 |
30 | # Dropping the rows without any invoice number
31 | data.dropna(axis = 0, subset =['InvoiceNo'], inplace = True)
32 | data.isnull().sum()
33 |
34 | # Dropping all transactions which were done on credit ('C')
35 | data.info()
36 | data = data[~data['InvoiceNo'].str.contains('C')]
37 | #For the above cmd to work, we need to ensure that we convert Column "Invoinve No." to string form.
38 | data['InvoiceNo'] = data['InvoiceNo'].astype('str')
39 | data = data[~data['InvoiceNo'].str.contains('C')]
40 | #Hence, now we have been able to remove the rows with credit (C) type billing.
41 |
42 | # Stripping extra spaces in the description
43 | data['Description'] = data['Description'].str.strip()
44 |
45 | #Splitting the data according to the region of transaction-------
46 | # Transactions done in France
47 | basket_France = (data[data['Country'] =="France"]
48 | .groupby(['InvoiceNo', 'Description'])['Quantity']
49 | .sum().unstack().reset_index()
50 | .fillna(0)
51 | .set_index('InvoiceNo'))
52 |
53 | # Transactions done in the United Kingdom
54 | basket_UK = (data[data['Country'] =="United Kingdom"]
55 | .groupby(['InvoiceNo', 'Description'])['Quantity']
56 | .sum().unstack().reset_index().fillna(0)
57 | .set_index('InvoiceNo'))
58 |
59 | # Transactions done in Portugal
60 | basket_Por = (data[data['Country'] =="Portugal"]
61 | .groupby(['InvoiceNo', 'Description'])['Quantity']
62 | .sum().unstack().reset_index().fillna(0)
63 | .set_index('InvoiceNo'))
64 |
65 | basket_Sweden = (data[data['Country'] =="Sweden"]
66 | .groupby(['InvoiceNo', 'Description'])['Quantity']
67 | .sum().unstack().reset_index().fillna(0)
68 | .set_index('InvoiceNo'))
69 |
70 | #Hot encoding the Data------------
71 | # Defining the hot encoding function to make the data suitable
72 | # for the concerned libraries
73 | def hot_encode(x):
74 | if(x<= 0):
75 | return 0
76 | if(x>= 1):
77 | return 1
78 |
79 | # Encoding the datasets
80 | basket_encoded = basket_France.applymap(hot_encode)
81 | basket_France = basket_encoded
82 |
83 | basket_encoded = basket_UK.applymap(hot_encode)
84 | basket_UK = basket_encoded
85 |
86 | basket_encoded = basket_Por.applymap(hot_encode)
87 | basket_Por = basket_encoded
88 |
89 | basket_encoded = basket_Sweden.applymap(hot_encode)
90 | basket_Sweden = basket_encoded
91 |
92 | #Building the models and analyzing the results-----------------
93 |
94 | #France:
95 | # Building the model
96 | frq_items = apriori(basket_France, min_support = 0.15, use_colnames = True)
97 | frq_items
98 |
99 | # Collecting the inferred rules in a dataframe
100 | rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
101 | print(rules.head())
102 | France_rules=pd.DataFrame(rules)
103 |
104 | #Portugal
105 | frq_items = apriori(basket_Por, min_support = 0.15, use_colnames = True)
106 | rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
107 | print(rules.head())
108 | Portugal_rules=pd.DataFrame(rules)
109 |
110 | #Sweden
111 | frq_items = apriori(basket_Sweden, min_support = 0.10, use_colnames = True)
112 | rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
113 | print(rules.head())
114 | Sweden_rules=pd.DataFrame(rules)
115 |
116 | #UK
117 | frq_items = apriori(basket_UK, min_support = 0.09, use_colnames = True)
118 | rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
119 | print(rules.head())
120 | UK_rules=pd.DataFrame(rules)
121 |
122 | #Here Empty DataFrame signifies that none of the Rules in UK satisfy the levels mentioned for
123 | #Support & Lift in above freq items sets
--------------------------------------------------------------------------------
/13.multiple_linear_regression_BackwardElimination.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Sep 21 18:49:36 2021
4 |
5 | @author: Admin
6 | """
7 | # Multiple Linear Regression
8 |
9 | # Importing the libraries
10 |
11 | 'import matplotlib.pyplot as plt'
12 | import pandas as pd
13 |
14 | # Importing the dataset
15 | dataset = pd.read_csv('D:\SkillEdge\Python\Final\Codes\pyData/50_Startups.csv')
16 |
17 | #Method-1 (Handling Categorical Variables)
18 | pd.get_dummies(dataset["State"])
19 | pd.get_dummies(dataset["State"],drop_first=True)
20 | S_Dummy = pd.get_dummies(dataset["State"],drop_first=True)
21 | S_Dummy.head(5)
22 | #Now, lets concatenate these dummy var columns in our dataset.
23 | dataset = pd.concat([dataset,S_Dummy],axis=1)
24 | dataset.head(5)
25 | #dropping the columns whose dummy var have been created
26 | dataset.drop(["State",],axis=1,inplace=True)
27 | dataset.head(5)
28 | #------------------------------------------------------------------------------
29 |
30 | #Obtaining DV & IV from the dataset
31 | X = dataset.iloc[:,[0,1,2,4,5]].values
32 | y = dataset.iloc[:,3].values
33 |
34 | # Splitting the dataset into the Training set and Test set
35 | from sklearn.model_selection import train_test_split
36 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
37 |
38 |
39 | # Fitting Multiple Linear Regression to the Training set
40 | from sklearn.linear_model import LinearRegression
41 | regressor = LinearRegression()
42 | regressor.fit(X_train, y_train)
43 |
44 | # Predicting the Test set results
45 | y_pred = regressor.predict(X_test)
46 |
47 | # Accuracy of the model
48 |
49 | #Calculating the r squared value:
50 | from sklearn.metrics import r2_score
51 | r2_score(y_test,y_pred)
52 |
53 | #Coefficient
54 | regressor.coef_
55 |
56 | # Intercept
57 | regressor.intercept_
58 |
59 | #The above score tells that our model is 93% accurate with the test dataset.
60 |
61 | #--------------------------Backward Elimination--------------------------------
62 | #Backward elimination is a feature selection technique while building a machine learning model. It is used
63 | #to remove those features that do not have significant effect on dependent variable or prediction of output.
64 |
65 | #Step: 1- Preparation of Backward Elimination:
66 |
67 | #Importing the library:
68 | import statsmodels.api as sm
69 |
70 | #Adding a column in matrix of features:
71 | import numpy as nm
72 | X = nm.append(arr = nm.ones((50,1)).astype(int), values=X, axis=1)
73 |
74 | #Applying backward elimination process now
75 | #Firstly we will create a new feature vector x_opt, which will only contain a set of
76 | #independent features that are significantly affecting the dependent variable.
77 | x_opt=X[:, [ 0,1,2,3,4,5]]
78 |
79 | #for fitting the model, we will create a regressor_OLS object of new class OLS of
80 | #statsmodels library. Then we will fit it by using the fit() method.
81 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
82 |
83 | #We will use summary() method to get the summary table of all the variables.
84 | regressor_OLS.summary()
85 |
86 | #In the above summary table, we can clearly see the p-values of all the variables.
87 | #Here x1, x2 are dummy variables, x3 is R&D spend, x4 is Administration spend, and x5 is Marketing spend.
88 |
89 | #Now since x5 has highest p-value greater than 0.05, hence, will remove the x1 variable
90 | #(dummy variable) from the table and will refit the model.
91 | x_opt= X[:, [0,1,2,3,4]]
92 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
93 | regressor_OLS.summary()
94 |
95 | #Now since x4 has highest p-value greater than 0.05, hence, will remove the x4 variable
96 | #(dummy variable) from the table and will refit the model.
97 | x_opt= X[:, [0,1,2,3]]
98 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
99 | regressor_OLS.summary()
100 |
101 | #Now we will remove the Admin spend (x2) which is having .602 p-value and
102 | # again refit the model.
103 | x_opt= X[:, [0,1,3]]
104 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
105 | regressor_OLS.summary()
106 |
107 | #Finally, we will remove one more variable, which has .60 p-value for marketing spend,
108 | #that is more than significant level value of 0.05
109 | x_opt= X[:, [0,1]]
110 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
111 | regressor_OLS.summary()
112 |
113 | #Hence,only R&D independent variable is a significant variable for the prediction.
114 | #So we can now predict efficiently using this variable.
115 |
116 | #----------Building Multiple Regression model by only using R&D spend:-----------------
117 | #importing datasets
118 | data_set= pd.read_csv('F:/WORK/pyWork/pyData/50_Startups.csv')
119 | #Extracting Independent and dependent Variable
120 | x_BE= data_set.iloc[:,:-4].values
121 | y_BE= data_set.iloc[:,4].values
122 | # Splitting the dataset into training and test set.
123 | from sklearn.model_selection import train_test_split
124 | x_BE_train, x_BE_test, y_BE_train, y_BE_test= train_test_split(x_BE, y_BE, test_size= 0.2, random_state=0)
125 |
126 | #Fitting the MLR model to the training set:
127 | from sklearn.linear_model import LinearRegression
128 | regressor= LinearRegression()
129 | regressor.fit(x_BE_train, y_BE_train)
130 |
131 | #Predicting the Test set result;
132 | y_pred= regressor.predict(x_BE_test)
133 |
134 | #Cheking the score
135 | #Calculating the r squared value:
136 | from sklearn.metrics import r2_score
137 | r2_score(y_BE_test,y_pred)
138 | #The above score tells that our model is now more accurate with the test dataset with
139 | #accuracy equal to 95%
140 |
141 | #Calculating the coefficients:
142 | print(regressor.coef_)
143 |
144 | #Calculating the intercept:
145 | print(regressor.intercept_)
146 |
147 | #Regression Eq'n: Profit = 48416 + 0.85*R&D_Spend
148 | © 2021 GitHub, Inc.
149 |
--------------------------------------------------------------------------------
/10.Graphs.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sun Sep 5 17:32:15 2021
4 |
5 | @author: Admin
6 | """
7 | import matplotlib.pyplot as plt
8 | #-----------------------------------GRAPHS---------------------------------
9 |
10 | #--------------------------Bar Chart------------------------------------------
11 | #Vertical Bar Chart
12 | import numpy as np
13 |
14 | city=['Delhi','Beijing','Washington','Tokyo','Moscow']
15 | Happiness_Index=[60,40,70,65,85]
16 |
17 | plt.bar(city,Happiness_Index,color='pink',edgecolor='red')
18 | plt.xlabel('City', fontsize=16)
19 | plt.ylabel('Happiness_Index', fontsize=16)
20 | plt.title('Barchart - Happiness index across cities',fontsize=20)
21 |
22 | #Horizontal Bar Chart
23 |
24 | city=['Delhi','Beijing','Washington','Tokyo','Moscow']
25 | Happiness_Index=[60,40,70,65,85]
26 |
27 | plt.barh(city,Happiness_Index,color='blue',edgecolor='black')
28 | plt.xlabel('Happiness_Index', fontsize=16)
29 | plt.ylabel('City', fontsize=16)
30 | plt.title('Horizontal Barchart - Happiness index across cities',fontsize=20)
31 |
32 | #Stacked Bar Chart in Python with legends:
33 |
34 | city=['Delhi','Beijing','Washington','Tokyo','Moscow']
35 | Gender=['Male','Female']
36 | Happiness_Index_Male=[60,40,70,65,85]
37 | Happiness_Index_Female=[30,60,70,55,75]
38 |
39 | plt.bar(city,Happiness_Index_Male,color='blue',edgecolor='black')
40 | plt.bar(city,Happiness_Index_Female,color='pink',edgecolor='black',bottom=Happiness_Index_Male)
41 | #bar() function plots the Happiness_Index_Female on top of Happiness_Index_Male with the help of
42 | #argument bottom=Happiness_Index_Male.
43 | plt.xlabel('City', fontsize=16)
44 | plt.ylabel('Happiness_Index', fontsize=16)
45 | plt.title('Stacked Barchart - Happiness index across cities',fontsize=18)
46 | plt.legend(Gender,loc=2)
47 |
48 | #--------------------------Histogram-------------------------------------------
49 | #Histogram with no Fills:
50 |
51 | values = [82,76,24,40,67,62,75,78,71,32,98,89,78,67,72,82,87,66,56,52]
52 | plt.hist(values,5, histtype='step', align='mid', color='green', label='Test Score Data')
53 | #Here, second argument is the number of bins,
54 | #histype=’step’: it plots the histogram in step,
55 | #format, aligned to mid, color chosen is green.
56 | plt.legend(loc=2)
57 | #argument loc=2 plots the legend on the top left corner.
58 | plt.title('Histogram of score')
59 |
60 | #Histogram with bar Filled:
61 |
62 | values = [82,76,24,40,67,62,75,78,71,32,98,89,78,67,72,82,87,66,56,52]
63 | plt.hist(values,10, histtype='bar', color='cyan', label='Test score Data',edgecolor='black')
64 | #Argument histype=’bar’ plots the histogram in bar filled format.
65 | plt.legend()
66 | plt.title('Histogram of score')
67 |
68 | #----------------------------Box Plot------------------------------------------
69 | #Box Plot
70 |
71 | value1=[82,76,24,40,67,62,75,78,71,32,98,89,78,67,72,82,87,66,56,52]
72 | value2=[62,5,91,25,36,32,96,95,3,90,95,32,27,55,100,15,71,11,37,21]
73 | value3=[23,89,12,78,72,89,25,69,68,86,19,49,15,16,16,75,65,31,25,52]
74 | value4=[59,73,70,16,81,61,88,98,10,87,29,72,16,23,72,88,78,99,75,30]
75 |
76 | box_plot_data=[value1,value2,value3,value4]
77 | plt.boxplot(box_plot_data)
78 |
79 | #Box plot with fills and labels:
80 |
81 | value1 = [82,76,24,40,67,62,75,78,71,32,98,89,78,67,72,82,87,66,56,52]
82 | value2=[62,5,91,25,36,32,96,95,3,90,95,32,27,55,100,15,71,11,37,21]
83 | value3=[23,89,12,78,72,89,25,69,68,86,19,49,15,16,16,75,65,31,25,52]
84 | value4=[59,73,70,16,81,61,88,98,10,87,29,72,16,23,72,88,78,99,75,30]
85 |
86 | box_plot_data=[value1,value2,value3,value4]
87 | plt.boxplot(box_plot_data,patch_artist=True,labels=['course1','course2','course3','course4'])
88 | #argument "patch_artist=True", fills the boxplot and argument "label" takes label to be plotted.
89 |
90 | #Horizontal box plot in python with different colors:
91 |
92 | value1 = [82,76,24,40,67,62,75,78,71,32,98,89,78,67,72,82,87,66,56,52]
93 | value2=[62,5,91,25,36,32,96,95,3,90,95,32,27,55,100,15,71,11,37,21]
94 | value3=[23,89,12,78,72,89,25,69,68,86,19,49,15,16,16,75,65,31,25,52]
95 | value4=[59,73,70,16,81,61,88,98,10,87,29,72,16,23,72,88,78,99,75,30]
96 |
97 | box_plot_data=[value1,value2,value3,value4]
98 | box=plt.boxplot(box_plot_data,vert=0,patch_artist=True,
99 | labels=['course1','course2','course3','course4'],)
100 | #Adding argument vert =0 plots the horizontal box plot.
101 | colors = ['cyan', 'lightblue', 'lightgreen', 'tan']
102 | for patch, color in zip(box['boxes'], colors):
103 | patch.set_facecolor(color)
104 | #Colors array takes four different colors and passes them to four different boxes of the boxplot
105 | #with patch.set_facecolor() function.
106 | #-------------------Line plot or Line chart --------------------
107 |
108 | values = [1, 5, 8, 9, 7, 11, 8, 12, 14, 9]
109 | plt.plot(values)
110 |
111 |
112 | #Multiple Line charts with legends and Labels:
113 | #lets take an example of sale of units in 2016 and 2017 to demonstrate line charts.
114 |
115 | sales1 = [1, 5, 8, 9, 7, 11, 8, 12, 14, 9, 5]
116 | sales2 = [3, 7, 9, 6, 4, 5, 14, 7, 6, 16, 12]
117 | line_chart1 = plt.plot( sales1,range(1,12))
118 | line_chart2 = plt.plot( sales2,range(1,12))
119 | plt.title('Monthly sales of 2016 and 2017')
120 | plt.xlabel('Sales')
121 | plt.ylabel('Month')
122 | plt.legend(['year 2016', 'year 2017'], loc=4)
123 |
124 |
125 | #Charts with different line styles:
126 |
127 | sales1 = [1, 5, 8, 9, 7, 11, 8, 12, 14, 9, 5]
128 | sales2 = [3, 7, 9, 6, 4, 5, 14, 7, 6, 16, 12]
129 | line_chart1 = plt.plot(range(1,12), sales1,'--')
130 | line_chart2 = plt.plot(range(1,12), sales2,':')
131 | plt.title('Monthly sales of 2016 and 2017')
132 |
133 |
134 | #---------------------Pie Chart--------------------------------------------
135 | #Pie chart in Python with legends:
136 |
137 | values = [60, 80, 90, 55, 10, 30]
138 | Col = ['b', 'g', 'r', 'c', 'm', 'y']
139 | labels = ['US', 'UK', 'India', 'Germany', 'Australia', 'South Korea']
140 | Exp = (0.5, 0, 0, 0, 0, 0)
141 | plt.pie(values, colors=Col, labels= values,explode=Exp,counterclock=False, shadow=True)
142 | plt.title('Population Density Index')
143 | plt.legend(labels,loc=3)
144 |
145 | #Pie chart in Python with percentage values:
146 |
147 | values = [60, 80, 90, 55, 10, 30]
148 | colors = ['b', 'g', 'r', 'c', 'm', 'y']
149 | labels = ['US', 'UK', 'India', 'Germany', 'Australia', 'South Korea']
150 | explode = (0.2, 0, 0, 0, 0, 0)
151 | plt.pie(values, colors=colors, labels=labels,
152 | explode=explode, autopct='%1.1f%%', shadow=True)
153 | plt.title('Population Density Index')
154 |
155 | #-------------------------------Scatter Plot----------------------------------
156 | # Scatter plot in Python:
157 |
158 | weight1=[63.3,57,64.3,63,71,61.8,62.9,65.6,64.8,63.1,68.3,69.7,65.4,66.3,60.7]
159 | height1=[156.3,100.7,114.8,156.3,237.1,123.9,151.8,164.7,105.4,136.1,175.2,137.4,164.2,151,124.3]
160 | plt.scatter(weight1,height1,c='r',marker='*')
161 | plt.xlabel('weight', fontsize=16)
162 | plt.ylabel('height', fontsize=16)
163 | plt.title('scatter plot - height vs weight',fontsize=20)
164 |
165 | #Scatter plot for three different groups
166 |
167 | weight1=[57,58.2,58.6,59.6,59.8,60.2,60.5,60.6,60.7,61.3,61.3,61.4,61.8,61.9,62.3]
168 | height1=[100.7,195.6,94.3,127.1,111.7,159.7,135,149.9,124.3,112.9,176.7,110.2,123.9,161.9,107.8]
169 |
170 | weight2=[62.9,63,63.1,63.2,63.3,63.4,63.4,63.4,63.5,63.6,63.7,64.1,64.3,64.3,64.7,64.8,65]
171 | height2=[151.8,156.3,136.1,124.2,156.3,130,181.2,255.9,163.1,123.1,119.5,179.9,114.8,174.1,108.8,105.4,141.4]
172 |
173 |
174 | weight3=[69.2,69.2,69.4,69.7,70,70.3,70.8,71,71.1,71.7,71.9,72.4,73,73.1,76.2]
175 | height3=[166.8,172.9,193.8,137.4,162.4,137.1,169.1,237.1,189.1,179.3,174.8,213.3,198,191.1,220.6]
176 |
177 | import numpy as np
178 | weight=np.concatenate((weight1,weight2,weight3))
179 | height=np.concatenate((height1,height2,height3))
180 |
181 | color_array = ['b'] * 15 + ['g'] * 17 + ['r'] * 15
182 |
183 | plt.scatter(weight, height, marker='*', c=color_array)
184 |
185 | plt.xlabel('weight', fontsize=16)
186 | plt.ylabel('height', fontsize=16)
187 | plt.title('grouped scatter plot - height vs weight',fontsize=20)
188 |
--------------------------------------------------------------------------------
/14.logistic_regression.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Thu Sep 30 19:49:04 2021
4 |
5 | @author: Admin
6 | """
7 | # Logistic Regression
8 |
9 | #-------------Logistic Regression------------------------------
10 | #Import Libraries
11 | import pandas as pd
12 | import seaborn as sns
13 | import matplotlib.pyplot as plt
14 |
15 |
16 | #Import data
17 | titanic_data = pd.read_csv("D:\SkillEdge\Python\Final\Codes\pyData/titanic.csv")
18 | titanic_data.head(5)
19 | titanic_data.tail(5)
20 |
21 | print("No. of passengers in original dataset:" +str(len(titanic_data.index)))
22 |
23 | #Analyzing Data
24 | sns.countplot(x="survived",data=titanic_data)
25 |
26 | sns.countplot(x="survived",hue="sex",data=titanic_data)
27 |
28 | sns.countplot(x="survived",hue="pclass",data=titanic_data)
29 |
30 | #CHECKING DATA TYPE OF A VARIABLE AND CONVERTING IT INTO ANOTHER TYPE-----
31 | titanic_data.info()
32 | titanic_data["age"].plot.hist()
33 | plt.hist(titanic_data["age"])
34 |
35 |
36 | #Converting var "age" from object type to float type
37 | titanic_data["age"] = pd.to_numeric(titanic_data.age, errors='coerce')
38 | titanic_data.info()
39 | #Parameter: errors = 'coerce' in above fxn, replaces missing values (like "?") if any
40 | #in "age" column by "nan" values.
41 |
42 | titanic_data["age"].plot.hist()
43 |
44 | #Converting var "fare" from object type to float type
45 | titanic_data["fare"] = pd.to_numeric(titanic_data.fare, errors='coerce')
46 | titanic_data.info()
47 | #Parameter: errors = 'coerce' in above fxn, replaces missing values (like "?") if any
48 | #in "fare" column by "nan" values.
49 |
50 | titanic_data["fare"].plot.hist()
51 |
52 | #Identifying/Finding missing values if any----
53 | titanic_data.isnull()
54 | titanic_data.isnull().sum()
55 |
56 | sns.heatmap(titanic_data.isnull(),yticklabels=False, cmap="viridis")
57 |
58 | #Note:
59 | #Since missing values in "fare" are quite less, we can delete such rows.
60 | #Since missing values in "age" are high, its better we do imputation in it.
61 |
62 | sns.boxplot(x="age",data=titanic_data)
63 | sns.boxplot(x="fare",data=titanic_data)
64 |
65 | #By boxplot we observe that the no. of outliers in "age" are quite less, hence,
66 | #if we plan to do imputation in "age" we can do it by "mean" imputation.
67 |
68 | #Handling Missing Values------------
69 | titanic_data.head(5)
70 |
71 | #Droping all the rows which have a missing value in column (Fare)
72 | #Drop NaN in a specific column
73 | titanic_data.dropna(subset=['fare'],inplace=True)
74 | sns.heatmap(titanic_data.isnull(),yticklabels=False)
75 |
76 | #Imputing missing values in column (Age) with mean imputation
77 | titanic_data["age"].fillna(titanic_data["age"].mean(), inplace=True)
78 | sns.heatmap(titanic_data.isnull(),yticklabels=False)
79 |
80 | #Hence, we do not have any missing values in the dataset now.
81 | titanic_data.isnull().sum()
82 |
83 | #Note:
84 | #A Heat map is usually drawn for either continuous of categorical var
85 | #Lets take few cont var columns and draw the heat map
86 | #Cont = titanic_data[:,[5,6,7]]
87 | #sns.heatmap(Cont)
88 |
89 | #There are lot of string value var in dataset which have to be converted to numerical
90 | #values for applying machine learing algoritm. Hence, we will now convert string var
91 | #to numerical var.
92 | titanic_data.info()
93 | pd.get_dummies(titanic_data["sex"])
94 |
95 | pd.get_dummies(titanic_data["sex"],drop_first=True)
96 |
97 | Sex_Dummy = pd.get_dummies(titanic_data["sex"],drop_first=True)
98 | Sex_Dummy.head(5)
99 |
100 | pd.get_dummies(titanic_data["embarked"])
101 | Embardked_Dummy = pd.get_dummies(titanic_data["embarked"],drop_first=True)
102 | Embardked_Dummy.head(5)
103 |
104 | pd.get_dummies(titanic_data["pclass"])
105 | PClass_Dummy = pd.get_dummies(titanic_data["pclass"],drop_first=True)
106 | PClass_Dummy.head(5)
107 |
108 | #Now, lets concatenate these dummy var columns in our dataset.
109 | titanic_data = pd.concat([titanic_data,Sex_Dummy,PClass_Dummy,Embardked_Dummy],axis=1)
110 | titanic_data.head(5)
111 |
112 | #dropping the columns whose dummy var have been created
113 | titanic_data.drop(["sex","embarked","pclass","Passenger_id","name","ticket"],axis=1,inplace=True)
114 | titanic_data.head(5)
115 |
116 | #Splitting the dataset into Train & Test dataset
117 | x=titanic_data.drop("survived",axis=1)
118 | y=titanic_data["survived"]
119 |
120 | from sklearn.model_selection import train_test_split
121 | X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 0)
122 |
123 | # Fitting Logistic Regression to the Training set
124 | from sklearn.linear_model import LogisticRegression
125 | help(LogisticRegression())
126 | logmodel = LogisticRegression(solver='liblinear') #It is the default solver for Scikit-learn versions earlier than 0.22.0.
127 | logmodel.fit(X_train, y_train)
128 |
129 | predictions = logmodel.predict(X_test)
130 |
131 | from sklearn.metrics import confusion_matrix
132 | confusion_matrix(y_test,predictions)
133 |
134 | confusion_matrix(predictions,y_test)
135 |
136 | #Hence, accuracy = (165+84)\(165+84+30+44) = 77.5%
137 |
138 | #Calculating the coefficients:
139 | print(logmodel.coef_)
140 |
141 | #Calculating the intercept:
142 | print(logmodel.intercept_)
143 |
144 | #----To Improve the accuracy of the model, lets go with Backward ELimination Method &
145 | # rebuild the logisitc model again with few independent variables--------
146 | titanic_data_1 = titanic_data
147 | titanic_data_1.head(5)
148 |
149 | #--------------------------Backward Elimination--------------------------------
150 | #Backward elimination is a feature selection technique while building a machine learning model. It is used
151 | #to remove those features that do not have significant effect on dependent variable or prediction of output.
152 |
153 | #Step: 1- Preation of Backward Elimination:
154 | #Importing the library:
155 | import statsmodels.api as sm
156 |
157 | #Adding a column in matrix of features:
158 | x1=titanic_data_1.drop("survived",axis=1)
159 | y1=titanic_data_1["survived"]
160 | import numpy as nm
161 | x1 = nm.append(arr = nm.ones((1291,1)).astype(int), values=x1, axis=1)
162 |
163 | #Applying backward elimination process now
164 | #Firstly we will create a new feature vector x_opt, which will only contain a set of
165 | #independent features that are significantly affecting the dependent variable.
166 | x_opt= x1[:, [0,1,2,3,4,5,6,7,8,9,10]]
167 |
168 | #for fitting the model, we will create a regressor_OLS object of new class OLS of statsmodels library.
169 | #Then we will fit it by using the fit() method.
170 | regressor_OLS=sm.OLS(endog = y1, exog=x_opt).fit()
171 |
172 | #We will use summary() method to get the summary table of all the variables.
173 | regressor_OLS.summary()
174 |
175 | #In the above summary table, we can clearly see the p-values of all the variables.
176 | #And remove the ind var with p-value greater than 0.05
177 | x_opt= x1[:, [0,1,2,4,5,6,7,8,9,10]]
178 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
179 | regressor_OLS.summary()
180 |
181 | x_opt= x1[:, [0,1,2,4,5,6,7,9,10]]
182 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
183 | regressor_OLS.summary()
184 |
185 | x_opt= x1[:, [0,1,2,5,6,7,9,10]]
186 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
187 | regressor_OLS.summary()
188 |
189 | x_opt= x1[:, [0,1,2,5,6,7,10]]
190 | regressor_OLS=sm.OLS(endog = y, exog=x_opt).fit()
191 | regressor_OLS.summary()
192 | #Hence,independent var - age, sibsp, sex, pclass & embarked are significant variable
193 | #for the predicting the value of Dependent Var "survived".
194 | #So we can now predict efficiently using these variables.
195 |
196 | #-------Building Logistic Regression model using ind var: age, sibsip, sex, pclass & embarked--------
197 | # Splitting the dataset into training and test set.
198 | from sklearn.model_selection import train_test_split
199 | x_BE_train, x_BE_test, y_BE_train, y_BE_test= train_test_split(x_opt, y1, test_size= 0.25, random_state=0)
200 |
201 | # Fitting Logistic Regression to the Training set
202 | from sklearn.linear_model import LogisticRegression
203 | logmodel = LogisticRegression(solver='liblinear')
204 | logmodel.fit(x_BE_train, y_BE_train)
205 |
206 | predictions = logmodel.predict(x_BE_test)
207 |
208 | from sklearn.metrics import confusion_matrix
209 | confusion_matrix(y_BE_test,predictions)
210 |
211 | #Accuracy = (170+87)/(170+87+25+41) = 80%
212 |
213 | #Calculating the coefficients:
214 | print(logmodel.coef_)
215 |
216 | #Calculating the intercept:
217 | print(logmodel.intercept_)
218 |
219 | #So, ur final Predicitve Modelling Equation becomes:
220 | #Survived =
221 | #exp(3.74 -0.03*age -0.27*sibsp -2.52*sex(male) -1.03*pclass(2) -2.1*pclass(3) -0.33*embd(S))
222 | # \
223 | #exp(3.74 -0.03*age -0.27*sibsp -2.52*sex(male) -1.03*pclass(2) -2.1*pclass(3) -0.33*embd(S)) + 1
224 |
--------------------------------------------------------------------------------
/31.Reading Files into Python.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"2. Reading Files into Python.ipynb","provenance":[],"authorship_tag":"ABX9TyM/mIl9ka9uA5pMkM+Ll0AK"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"1FzYccB24irQ","colab_type":"text"},"source":["## Reading Files into Python"]},{"cell_type":"code","metadata":{"id":"zDWO5w4jIiWL","colab_type":"code","colab":{}},"source":["# importing libraries\n","import pandas as pd"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"LFxGyLx3L5IS","colab_type":"code","colab":{}},"source":["#importing data\n","data = pd.read_csv('churn_prediction.csv')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"qPcIwT47NQX6","colab_type":"code","outputId":"dd64ac4c-6cd5-4f12-c536-2336c75475b9","executionInfo":{"status":"ok","timestamp":1580377658648,"user_tz":-330,"elapsed":1247,"user":{"displayName":"Sharoon Saxena","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64","userId":"01167841530696023488"}},"colab":{"base_uri":"https://localhost:8080/","height":224}},"source":["#first 5 instances using \"head()\" function\n","data.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","
\n"," \n"," \n"," | \n"," customer_id | \n"," vintage | \n"," age | \n"," gender | \n"," dependents | \n"," occupation | \n"," city | \n"," customer_nw_category | \n"," branch_code | \n"," current_balance | \n"," previous_month_end_balance | \n"," average_monthly_balance_prevQ | \n"," average_monthly_balance_prevQ2 | \n"," current_month_credit | \n"," previous_month_credit | \n"," current_month_debit | \n"," previous_month_debit | \n"," current_month_balance | \n"," previous_month_balance | \n"," churn | \n"," last_transaction | \n","
\n"," \n"," \n"," \n"," | 0 | \n"," 1 | \n"," 3135 | \n"," 66 | \n"," Male | \n"," 0.0 | \n"," self_employed | \n"," 187.0 | \n"," 2 | \n"," 755 | \n"," 1458.71 | \n"," 1458.71 | \n"," 1458.71 | \n"," 1449.07 | \n"," 0.20 | \n"," 0.20 | \n"," 0.20 | \n"," 0.20 | \n"," 1458.71 | \n"," 1458.71 | \n"," 0 | \n"," 2019-05-21 | \n","
\n"," \n"," | 1 | \n"," 2 | \n"," 310 | \n"," 35 | \n"," Male | \n"," 0.0 | \n"," self_employed | \n"," NaN | \n"," 2 | \n"," 3214 | \n"," 5390.37 | \n"," 8704.66 | \n"," 7799.26 | \n"," 12419.41 | \n"," 0.56 | \n"," 0.56 | \n"," 5486.27 | \n"," 100.56 | \n"," 6496.78 | \n"," 8787.61 | \n"," 0 | \n"," 2019-11-01 | \n","
\n"," \n"," | 2 | \n"," 4 | \n"," 2356 | \n"," 31 | \n"," Male | \n"," 0.0 | \n"," salaried | \n"," 146.0 | \n"," 2 | \n"," 41 | \n"," 3913.16 | \n"," 5815.29 | \n"," 4910.17 | \n"," 2815.94 | \n"," 0.61 | \n"," 0.61 | \n"," 6046.73 | \n"," 259.23 | \n"," 5006.28 | \n"," 5070.14 | \n"," 0 | \n"," NaT | \n","
\n"," \n"," | 3 | \n"," 5 | \n"," 478 | \n"," 90 | \n"," NaN | \n"," NaN | \n"," self_employed | \n"," 1020.0 | \n"," 2 | \n"," 582 | \n"," 2291.91 | \n"," 2291.91 | \n"," 2084.54 | \n"," 1006.54 | \n"," 0.47 | \n"," 0.47 | \n"," 0.47 | \n"," 2143.33 | \n"," 2291.91 | \n"," 1669.79 | \n"," 1 | \n"," 2019-08-06 | \n","
\n"," \n"," | 4 | \n"," 6 | \n"," 2531 | \n"," 42 | \n"," Male | \n"," 2.0 | \n"," self_employed | \n"," 1494.0 | \n"," 3 | \n"," 388 | \n"," 927.72 | \n"," 1401.72 | \n"," 1643.31 | \n"," 1871.12 | \n"," 0.33 | \n"," 714.61 | \n"," 588.62 | \n"," 1538.06 | \n"," 1157.15 | \n"," 1677.16 | \n"," 1 | \n"," 2019-11-03 | \n","
\n"," \n","
\n","
"],"text/plain":[" customer_id vintage age ... previous_month_balance churn last_transaction\n","0 1 3135 66 ... 1458.71 0 2019-05-21\n","1 2 310 35 ... 8787.61 0 2019-11-01\n","2 4 2356 31 ... 5070.14 0 NaT\n","3 5 478 90 ... 1669.79 1 2019-08-06\n","4 6 2531 42 ... 1677.16 1 2019-11-03\n","\n","[5 rows x 21 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"_G5CwMDl2B_8","colab_type":"code","outputId":"14298801-9bf5-4aeb-a582-cfb9b03192d5","executionInfo":{"status":"ok","timestamp":1580377658649,"user_tz":-330,"elapsed":1035,"user":{"displayName":"Sharoon Saxena","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64","userId":"01167841530696023488"}},"colab":{"base_uri":"https://localhost:8080/","height":224}},"source":["#last 5 instances using \"tail()\" function\n","data.tail()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["\n","\n","
\n"," \n"," \n"," | \n"," customer_id | \n"," vintage | \n"," age | \n"," gender | \n"," dependents | \n"," occupation | \n"," city | \n"," customer_nw_category | \n"," branch_code | \n"," current_balance | \n"," previous_month_end_balance | \n"," average_monthly_balance_prevQ | \n"," average_monthly_balance_prevQ2 | \n"," current_month_credit | \n"," previous_month_credit | \n"," current_month_debit | \n"," previous_month_debit | \n"," current_month_balance | \n"," previous_month_balance | \n"," churn | \n"," last_transaction | \n","
\n"," \n"," \n"," \n"," | 28377 | \n"," 30297 | \n"," 1845 | \n"," 10 | \n"," Female | \n"," 0.0 | \n"," student | \n"," 1020.0 | \n"," 2 | \n"," 1207 | \n"," 1076.43 | \n"," 1076.43 | \n"," 2282.19 | \n"," 2787.70 | \n"," 0.30 | \n"," 0.30 | \n"," 0.30 | \n"," 0.30 | \n"," 1076.43 | \n"," 1076.43 | \n"," 0 | \n"," 2019-10-22 | \n","
\n"," \n"," | 28378 | \n"," 30298 | \n"," 4919 | \n"," 34 | \n"," Female | \n"," 0.0 | \n"," self_employed | \n"," 1046.0 | \n"," 2 | \n"," 223 | \n"," 3844.10 | \n"," 4069.21 | \n"," 3668.83 | \n"," 3865.55 | \n"," 1.71 | \n"," 2.29 | \n"," 901.00 | \n"," 1014.07 | \n"," 3738.54 | \n"," 3690.32 | \n"," 0 | \n"," 2019-12-17 | \n","
\n"," \n"," | 28379 | \n"," 30299 | \n"," 297 | \n"," 47 | \n"," Male | \n"," 0.0 | \n"," salaried | \n"," 1096.0 | \n"," 2 | \n"," 588 | \n"," 65511.97 | \n"," 61017.55 | \n"," 53444.81 | \n"," 21925.81 | \n"," 4666.84 | \n"," 3883.06 | \n"," 168.23 | \n"," 71.80 | \n"," 61078.50 | \n"," 57564.24 | \n"," 1 | \n"," 2019-12-31 | \n","
\n"," \n"," | 28380 | \n"," 30300 | \n"," 2585 | \n"," 50 | \n"," Male | \n"," 3.0 | \n"," self_employed | \n"," 1219.0 | \n"," 3 | \n"," 274 | \n"," 1625.55 | \n"," 1625.55 | \n"," 1683.20 | \n"," 1857.42 | \n"," 0.20 | \n"," 0.20 | \n"," 0.20 | \n"," 0.20 | \n"," 1625.55 | \n"," 1625.55 | \n"," 0 | \n"," NaT | \n","
\n"," \n"," | 28381 | \n"," 30301 | \n"," 2349 | \n"," 18 | \n"," Male | \n"," 0.0 | \n"," student | \n"," 1232.0 | \n"," 2 | \n"," 474 | \n"," 2107.05 | \n"," 2821.34 | \n"," 3213.44 | \n"," 4447.45 | \n"," 0.11 | \n"," 7.44 | \n"," 714.40 | \n"," 1094.09 | \n"," 2402.62 | \n"," 3260.58 | \n"," 1 | \n"," 2019-11-02 | \n","
\n"," \n","
\n","
"],"text/plain":[" customer_id vintage ... churn last_transaction\n","28377 30297 1845 ... 0 2019-10-22\n","28378 30298 4919 ... 0 2019-12-17\n","28379 30299 297 ... 1 2019-12-31\n","28380 30300 2585 ... 0 NaT\n","28381 30301 2349 ... 1 2019-11-02\n","\n","[5 rows x 21 columns]"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"code","metadata":{"id":"5Iy0lfDNNZ8U","colab_type":"code","outputId":"5e2a976c-90ad-4dc4-af21-e2d551a8516a","executionInfo":{"status":"ok","timestamp":1580377659085,"user_tz":-330,"elapsed":1033,"user":{"displayName":"Sharoon Saxena","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64","userId":"01167841530696023488"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["#finding out the shape of the data using \"shape\" variable: Output (rows, columns)\n","data.shape"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(28382, 21)"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"code","metadata":{"id":"b75gSeumN50y","colab_type":"code","outputId":"0276bf06-6ea1-4415-edd5-264c0c8d96a4","executionInfo":{"status":"ok","timestamp":1580377659609,"user_tz":-330,"elapsed":1118,"user":{"displayName":"Sharoon Saxena","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64","userId":"01167841530696023488"}},"colab":{"base_uri":"https://localhost:8080/","height":153}},"source":["#Printing all the columns present in data\n","data.columns"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Index(['customer_id', 'vintage', 'age', 'gender', 'dependents', 'occupation',\n"," 'city', 'customer_nw_category', 'branch_code', 'current_balance',\n"," 'previous_month_end_balance', 'average_monthly_balance_prevQ',\n"," 'average_monthly_balance_prevQ2', 'current_month_credit',\n"," 'previous_month_credit', 'current_month_debit', 'previous_month_debit',\n"," 'current_month_balance', 'previous_month_balance', 'churn',\n"," 'last_transaction'],\n"," dtype='object')"]},"metadata":{"tags":[]},"execution_count":15}]},{"cell_type":"code","metadata":{"id":"kKIUtLmZX5tg","colab_type":"code","outputId":"71c127fd-08af-4241-a936-e43dbf7ee0eb","executionInfo":{"status":"ok","timestamp":1580377660322,"user_tz":-330,"elapsed":820,"user":{"displayName":"Sharoon Saxena","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64","userId":"01167841530696023488"}},"colab":{"base_uri":"https://localhost:8080/","height":391}},"source":["data.dtypes"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["customer_id int64\n","vintage int64\n","age int64\n","gender object\n","dependents float64\n","occupation object\n","city float64\n","customer_nw_category int64\n","branch_code int64\n","current_balance float64\n","previous_month_end_balance float64\n","average_monthly_balance_prevQ float64\n","average_monthly_balance_prevQ2 float64\n","current_month_credit float64\n","previous_month_credit float64\n","current_month_debit float64\n","previous_month_debit float64\n","current_month_balance float64\n","previous_month_balance float64\n","churn int64\n","last_transaction object\n","dtype: object"]},"metadata":{"tags":[]},"execution_count":16}]}]}
2 |
--------------------------------------------------------------------------------
/32.Min_Max_Range_Updated.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "LIOaq8QL8CDm"
8 | },
9 | "source": [
10 | "## Min, Max and Range of Data"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 1,
16 | "metadata": {
17 | "colab": {},
18 | "colab_type": "code",
19 | "id": "Jn7K0qBlR_Kn"
20 | },
21 | "outputs": [],
22 | "source": [
23 | "#import libraries\n",
24 | "import pandas as pd\n",
25 | "import numpy as np"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {
31 | "colab_type": "text",
32 | "id": "NxC7Fy8fOuRz"
33 | },
34 | "source": [
35 | "This is the dataset for __Customer Churn Problem__. \n"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 2,
41 | "metadata": {
42 | "colab": {},
43 | "colab_type": "code",
44 | "id": "jtGI5XRpR_Ku"
45 | },
46 | "outputs": [],
47 | "source": [
48 | "# importing dataset\n",
49 | "data = pd.read_csv('churn_prediction.csv')"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 3,
55 | "metadata": {},
56 | "outputs": [
57 | {
58 | "data": {
59 | "text/html": [
60 | "\n",
61 | "\n",
74 | "
\n",
75 | " \n",
76 | " \n",
77 | " | \n",
78 | " customer_id | \n",
79 | " vintage | \n",
80 | " age | \n",
81 | " gender | \n",
82 | " dependents | \n",
83 | " occupation | \n",
84 | " city | \n",
85 | " customer_nw_category | \n",
86 | " branch_code | \n",
87 | " current_balance | \n",
88 | " ... | \n",
89 | " average_monthly_balance_prevQ | \n",
90 | " average_monthly_balance_prevQ2 | \n",
91 | " current_month_credit | \n",
92 | " previous_month_credit | \n",
93 | " current_month_debit | \n",
94 | " previous_month_debit | \n",
95 | " current_month_balance | \n",
96 | " previous_month_balance | \n",
97 | " churn | \n",
98 | " last_transaction | \n",
99 | "
\n",
100 | " \n",
101 | " \n",
102 | " \n",
103 | " | 0 | \n",
104 | " 1 | \n",
105 | " 2101 | \n",
106 | " 66 | \n",
107 | " Male | \n",
108 | " 0.0 | \n",
109 | " self_employed | \n",
110 | " 187.0 | \n",
111 | " 2 | \n",
112 | " 755 | \n",
113 | " 1458.71 | \n",
114 | " ... | \n",
115 | " 1458.71 | \n",
116 | " 1449.07 | \n",
117 | " 0.20 | \n",
118 | " 0.20 | \n",
119 | " 0.20 | \n",
120 | " 0.20 | \n",
121 | " 1458.71 | \n",
122 | " 1458.71 | \n",
123 | " 0 | \n",
124 | " 2019-05-21 | \n",
125 | "
\n",
126 | " \n",
127 | " | 1 | \n",
128 | " 2 | \n",
129 | " 2348 | \n",
130 | " 35 | \n",
131 | " Male | \n",
132 | " 0.0 | \n",
133 | " self_employed | \n",
134 | " NaN | \n",
135 | " 2 | \n",
136 | " 3214 | \n",
137 | " 5390.37 | \n",
138 | " ... | \n",
139 | " 7799.26 | \n",
140 | " 12419.41 | \n",
141 | " 0.56 | \n",
142 | " 0.56 | \n",
143 | " 5486.27 | \n",
144 | " 100.56 | \n",
145 | " 6496.78 | \n",
146 | " 8787.61 | \n",
147 | " 0 | \n",
148 | " 2019-11-01 | \n",
149 | "
\n",
150 | " \n",
151 | " | 2 | \n",
152 | " 4 | \n",
153 | " 2194 | \n",
154 | " 31 | \n",
155 | " Male | \n",
156 | " 0.0 | \n",
157 | " salaried | \n",
158 | " 146.0 | \n",
159 | " 2 | \n",
160 | " 41 | \n",
161 | " 3913.16 | \n",
162 | " ... | \n",
163 | " 4910.17 | \n",
164 | " 2815.94 | \n",
165 | " 0.61 | \n",
166 | " 0.61 | \n",
167 | " 6046.73 | \n",
168 | " 259.23 | \n",
169 | " 5006.28 | \n",
170 | " 5070.14 | \n",
171 | " 0 | \n",
172 | " NaT | \n",
173 | "
\n",
174 | " \n",
175 | " | 3 | \n",
176 | " 5 | \n",
177 | " 2329 | \n",
178 | " 90 | \n",
179 | " NaN | \n",
180 | " NaN | \n",
181 | " self_employed | \n",
182 | " 1020.0 | \n",
183 | " 2 | \n",
184 | " 582 | \n",
185 | " 2291.91 | \n",
186 | " ... | \n",
187 | " 2084.54 | \n",
188 | " 1006.54 | \n",
189 | " 0.47 | \n",
190 | " 0.47 | \n",
191 | " 0.47 | \n",
192 | " 2143.33 | \n",
193 | " 2291.91 | \n",
194 | " 1669.79 | \n",
195 | " 1 | \n",
196 | " 2019-08-06 | \n",
197 | "
\n",
198 | " \n",
199 | " | 4 | \n",
200 | " 6 | \n",
201 | " 1579 | \n",
202 | " 42 | \n",
203 | " Male | \n",
204 | " 2.0 | \n",
205 | " self_employed | \n",
206 | " 1494.0 | \n",
207 | " 3 | \n",
208 | " 388 | \n",
209 | " 927.72 | \n",
210 | " ... | \n",
211 | " 1643.31 | \n",
212 | " 1871.12 | \n",
213 | " 0.33 | \n",
214 | " 714.61 | \n",
215 | " 588.62 | \n",
216 | " 1538.06 | \n",
217 | " 1157.15 | \n",
218 | " 1677.16 | \n",
219 | " 1 | \n",
220 | " 2019-11-03 | \n",
221 | "
\n",
222 | " \n",
223 | "
\n",
224 | "
5 rows × 21 columns
\n",
225 | "
"
226 | ],
227 | "text/plain": [
228 | " customer_id vintage age gender dependents occupation city \\\n",
229 | "0 1 2101 66 Male 0.0 self_employed 187.0 \n",
230 | "1 2 2348 35 Male 0.0 self_employed NaN \n",
231 | "2 4 2194 31 Male 0.0 salaried 146.0 \n",
232 | "3 5 2329 90 NaN NaN self_employed 1020.0 \n",
233 | "4 6 1579 42 Male 2.0 self_employed 1494.0 \n",
234 | "\n",
235 | " customer_nw_category branch_code current_balance ... \\\n",
236 | "0 2 755 1458.71 ... \n",
237 | "1 2 3214 5390.37 ... \n",
238 | "2 2 41 3913.16 ... \n",
239 | "3 2 582 2291.91 ... \n",
240 | "4 3 388 927.72 ... \n",
241 | "\n",
242 | " average_monthly_balance_prevQ average_monthly_balance_prevQ2 \\\n",
243 | "0 1458.71 1449.07 \n",
244 | "1 7799.26 12419.41 \n",
245 | "2 4910.17 2815.94 \n",
246 | "3 2084.54 1006.54 \n",
247 | "4 1643.31 1871.12 \n",
248 | "\n",
249 | " current_month_credit previous_month_credit current_month_debit \\\n",
250 | "0 0.20 0.20 0.20 \n",
251 | "1 0.56 0.56 5486.27 \n",
252 | "2 0.61 0.61 6046.73 \n",
253 | "3 0.47 0.47 0.47 \n",
254 | "4 0.33 714.61 588.62 \n",
255 | "\n",
256 | " previous_month_debit current_month_balance previous_month_balance churn \\\n",
257 | "0 0.20 1458.71 1458.71 0 \n",
258 | "1 100.56 6496.78 8787.61 0 \n",
259 | "2 259.23 5006.28 5070.14 0 \n",
260 | "3 2143.33 2291.91 1669.79 1 \n",
261 | "4 1538.06 1157.15 1677.16 1 \n",
262 | "\n",
263 | " last_transaction \n",
264 | "0 2019-05-21 \n",
265 | "1 2019-11-01 \n",
266 | "2 NaT \n",
267 | "3 2019-08-06 \n",
268 | "4 2019-11-03 \n",
269 | "\n",
270 | "[5 rows x 21 columns]"
271 | ]
272 | },
273 | "execution_count": 3,
274 | "metadata": {},
275 | "output_type": "execute_result"
276 | }
277 | ],
278 | "source": [
279 | "data.head()"
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": 4,
285 | "metadata": {},
286 | "outputs": [
287 | {
288 | "data": {
289 | "text/plain": [
290 | "(28382, 21)"
291 | ]
292 | },
293 | "execution_count": 4,
294 | "metadata": {},
295 | "output_type": "execute_result"
296 | }
297 | ],
298 | "source": [
299 | "data.shape"
300 | ]
301 | },
302 | {
303 | "cell_type": "markdown",
304 | "metadata": {
305 | "colab_type": "text",
306 | "id": "TkE9qtAvOuSD"
307 | },
308 | "source": [
309 | "Identification of __Datatypes__"
310 | ]
311 | },
312 | {
313 | "cell_type": "code",
314 | "execution_count": 0,
315 | "metadata": {
316 | "colab": {},
317 | "colab_type": "code",
318 | "id": "D1eopfppOuSG",
319 | "outputId": "7b2b8784-bce5-4424-e2a1-7e649fc2ce62"
320 | },
321 | "outputs": [
322 | {
323 | "data": {
324 | "text/plain": [
325 | "customer_id int64\n",
326 | "vintage int64\n",
327 | "age int64\n",
328 | "gender object\n",
329 | "dependents float64\n",
330 | "occupation object\n",
331 | "city float64\n",
332 | "customer_nw_category int64\n",
333 | "branch_code int64\n",
334 | "days_since_last_transaction float64\n",
335 | "current_balance float64\n",
336 | "previous_month_end_balance float64\n",
337 | "average_monthly_balance_prevQ float64\n",
338 | "average_monthly_balance_prevQ2 float64\n",
339 | "current_month_credit float64\n",
340 | "previous_month_credit float64\n",
341 | "current_month_debit float64\n",
342 | "previous_month_debit float64\n",
343 | "current_month_balance float64\n",
344 | "previous_month_balance float64\n",
345 | "churn int64\n",
346 | "dtype: object"
347 | ]
348 | },
349 | "execution_count": 4,
350 | "metadata": {
351 | "tags": []
352 | },
353 | "output_type": "execute_result"
354 | }
355 | ],
356 | "source": [
357 | "data.dtypes"
358 | ]
359 | },
360 | {
361 | "cell_type": "markdown",
362 | "metadata": {
363 | "colab_type": "text",
364 | "id": "s3iJ7r43Ac0W"
365 | },
366 | "source": [
367 | "## Isolating numerical columns"
368 | ]
369 | },
370 | {
371 | "cell_type": "markdown",
372 | "metadata": {
373 | "colab_type": "text",
374 | "id": "-l0diipkOuSb"
375 | },
376 | "source": [
377 | "Storing indices of __Integer and Float__ in numercial_cols because we are dealing with __numerical variables__"
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "execution_count": 5,
383 | "metadata": {
384 | "colab": {
385 | "base_uri": "https://localhost:8080/",
386 | "height": 153
387 | },
388 | "colab_type": "code",
389 | "executionInfo": {
390 | "elapsed": 1441,
391 | "status": "ok",
392 | "timestamp": 1581504105970,
393 | "user": {
394 | "displayName": "Sharoon Saxena",
395 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
396 | "userId": "01167841530696023488"
397 | },
398 | "user_tz": -330
399 | },
400 | "id": "0PrPd53w-rp6",
401 | "outputId": "6d46988f-e384-43d7-9622-353cd5cfc487"
402 | },
403 | "outputs": [
404 | {
405 | "data": {
406 | "text/plain": [
407 | "Index(['customer_id', 'vintage', 'age', 'dependents', 'city',\n",
408 | " 'customer_nw_category', 'branch_code', 'current_balance',\n",
409 | " 'previous_month_end_balance', 'average_monthly_balance_prevQ',\n",
410 | " 'average_monthly_balance_prevQ2', 'current_month_credit',\n",
411 | " 'previous_month_credit', 'current_month_debit', 'previous_month_debit',\n",
412 | " 'current_month_balance', 'previous_month_balance', 'churn'],\n",
413 | " dtype='object')"
414 | ]
415 | },
416 | "execution_count": 5,
417 | "metadata": {},
418 | "output_type": "execute_result"
419 | }
420 | ],
421 | "source": [
422 | "# storing indices of all numerical data types in numerical_cols\n",
423 | "numerical_cols = data.select_dtypes(include=['int64', 'float64']).columns\n",
424 | "\n",
425 | "# checking\n",
426 | "numerical_cols"
427 | ]
428 | },
429 | {
430 | "cell_type": "markdown",
431 | "metadata": {
432 | "colab_type": "text",
433 | "id": "Vnm7rEvEAbel"
434 | },
435 | "source": [
436 | "### Min obseravtion"
437 | ]
438 | },
439 | {
440 | "cell_type": "code",
441 | "execution_count": 0,
442 | "metadata": {
443 | "colab": {},
444 | "colab_type": "code",
445 | "id": "6oGzqFxqR_K9",
446 | "outputId": "f19a1072-9be5-403c-9337-6dc22cce9394"
447 | },
448 | "outputs": [
449 | {
450 | "data": {
451 | "text/html": [
452 | "\n",
453 | "\n",
466 | "
\n",
467 | " \n",
468 | " \n",
469 | " | \n",
470 | " customer_id | \n",
471 | " vintage | \n",
472 | " age | \n",
473 | " gender | \n",
474 | " dependents | \n",
475 | " occupation | \n",
476 | " city | \n",
477 | " customer_nw_category | \n",
478 | " branch_code | \n",
479 | " days_since_last_transaction | \n",
480 | " ... | \n",
481 | " previous_month_end_balance | \n",
482 | " average_monthly_balance_prevQ | \n",
483 | " average_monthly_balance_prevQ2 | \n",
484 | " current_month_credit | \n",
485 | " previous_month_credit | \n",
486 | " current_month_debit | \n",
487 | " previous_month_debit | \n",
488 | " current_month_balance | \n",
489 | " previous_month_balance | \n",
490 | " churn | \n",
491 | "
\n",
492 | " \n",
493 | " \n",
494 | " \n",
495 | " | 12608 | \n",
496 | " 13467 | \n",
497 | " 3082 | \n",
498 | " 80 | \n",
499 | " Male | \n",
500 | " 0.0 | \n",
501 | " retired | \n",
502 | " 1096.0 | \n",
503 | " 1 | \n",
504 | " 27 | \n",
505 | " 5.0 | \n",
506 | " ... | \n",
507 | " 423.06 | \n",
508 | " 1694.57 | \n",
509 | " 868.26 | \n",
510 | " 9471.01 | \n",
511 | " 2680.04 | \n",
512 | " 15229.44 | \n",
513 | " 7859.37 | \n",
514 | " 1050.17 | \n",
515 | " 2002.97 | \n",
516 | " 1 | \n",
517 | "
\n",
518 | " \n",
519 | "
\n",
520 | "
1 rows × 21 columns
\n",
521 | "
"
522 | ],
523 | "text/plain": [
524 | " customer_id vintage age gender dependents occupation city \\\n",
525 | "12608 13467 3082 80 Male 0.0 retired 1096.0 \n",
526 | "\n",
527 | " customer_nw_category branch_code days_since_last_transaction ... \\\n",
528 | "12608 1 27 5.0 ... \n",
529 | "\n",
530 | " previous_month_end_balance average_monthly_balance_prevQ \\\n",
531 | "12608 423.06 1694.57 \n",
532 | "\n",
533 | " average_monthly_balance_prevQ2 current_month_credit \\\n",
534 | "12608 868.26 9471.01 \n",
535 | "\n",
536 | " previous_month_credit current_month_debit previous_month_debit \\\n",
537 | "12608 2680.04 15229.44 7859.37 \n",
538 | "\n",
539 | " current_month_balance previous_month_balance churn \n",
540 | "12608 1050.17 2002.97 1 \n",
541 | "\n",
542 | "[1 rows x 21 columns]"
543 | ]
544 | },
545 | "execution_count": 6,
546 | "metadata": {
547 | "tags": []
548 | },
549 | "output_type": "execute_result"
550 | }
551 | ],
552 | "source": [
553 | "# observation with minimum current balance\n",
554 | "data[data['current_balance'] == data['current_balance'].min()]"
555 | ]
556 | },
557 | {
558 | "cell_type": "markdown",
559 | "metadata": {
560 | "colab_type": "text",
561 | "id": "yDJjIU9POuS9"
562 | },
563 | "source": [
564 | "* Customer's id is 13467\n",
565 | "* Customer has __minimum current balance__ is -5503.96 \n"
566 | ]
567 | },
568 | {
569 | "cell_type": "markdown",
570 | "metadata": {
571 | "colab_type": "text",
572 | "id": "SCHZvc0XOuTC"
573 | },
574 | "source": [
575 | "### Max observation"
576 | ]
577 | },
578 | {
579 | "cell_type": "code",
580 | "execution_count": 0,
581 | "metadata": {
582 | "colab": {},
583 | "colab_type": "code",
584 | "id": "YqXn9lcyR_LA",
585 | "outputId": "c48e8ca4-f9d6-471c-dd0d-9f8c14c8af28"
586 | },
587 | "outputs": [
588 | {
589 | "data": {
590 | "text/html": [
591 | "\n",
592 | "\n",
605 | "
\n",
606 | " \n",
607 | " \n",
608 | " | \n",
609 | " customer_id | \n",
610 | " vintage | \n",
611 | " age | \n",
612 | " gender | \n",
613 | " dependents | \n",
614 | " occupation | \n",
615 | " city | \n",
616 | " customer_nw_category | \n",
617 | " branch_code | \n",
618 | " days_since_last_transaction | \n",
619 | " ... | \n",
620 | " previous_month_end_balance | \n",
621 | " average_monthly_balance_prevQ | \n",
622 | " average_monthly_balance_prevQ2 | \n",
623 | " current_month_credit | \n",
624 | " previous_month_credit | \n",
625 | " current_month_debit | \n",
626 | " previous_month_debit | \n",
627 | " current_month_balance | \n",
628 | " previous_month_balance | \n",
629 | " churn | \n",
630 | "
\n",
631 | " \n",
632 | " \n",
633 | " \n",
634 | " | 24095 | \n",
635 | " 25712 | \n",
636 | " 1192 | \n",
637 | " 90 | \n",
638 | " Male | \n",
639 | " 0.0 | \n",
640 | " retired | \n",
641 | " 1020.0 | \n",
642 | " 2 | \n",
643 | " 5 | \n",
644 | " 18.0 | \n",
645 | " ... | \n",
646 | " 24270.54 | \n",
647 | " 11728.39 | \n",
648 | " 111617.41 | \n",
649 | " 12269845.39 | \n",
650 | " 0.21 | \n",
651 | " 7637857.36 | \n",
652 | " 0.21 | \n",
653 | " 8399.62 | \n",
654 | " 24270.54 | \n",
655 | " 1 | \n",
656 | "
\n",
657 | " \n",
658 | "
\n",
659 | "
1 rows × 21 columns
\n",
660 | "
"
661 | ],
662 | "text/plain": [
663 | " customer_id vintage age gender dependents occupation city \\\n",
664 | "24095 25712 1192 90 Male 0.0 retired 1020.0 \n",
665 | "\n",
666 | " customer_nw_category branch_code days_since_last_transaction ... \\\n",
667 | "24095 2 5 18.0 ... \n",
668 | "\n",
669 | " previous_month_end_balance average_monthly_balance_prevQ \\\n",
670 | "24095 24270.54 11728.39 \n",
671 | "\n",
672 | " average_monthly_balance_prevQ2 current_month_credit \\\n",
673 | "24095 111617.41 12269845.39 \n",
674 | "\n",
675 | " previous_month_credit current_month_debit previous_month_debit \\\n",
676 | "24095 0.21 7637857.36 0.21 \n",
677 | "\n",
678 | " current_month_balance previous_month_balance churn \n",
679 | "24095 8399.62 24270.54 1 \n",
680 | "\n",
681 | "[1 rows x 21 columns]"
682 | ]
683 | },
684 | "execution_count": 7,
685 | "metadata": {
686 | "tags": []
687 | },
688 | "output_type": "execute_result"
689 | }
690 | ],
691 | "source": [
692 | "# obseravtion with maxximum current month debit\n",
693 | "data[data['current_month_debit'] == data['current_month_debit'].max()]"
694 | ]
695 | },
696 | {
697 | "cell_type": "markdown",
698 | "metadata": {
699 | "colab_type": "text",
700 | "id": "qKH3ErymOuTU"
701 | },
702 | "source": [
703 | "* Customer's id is 25712\n",
704 | "* Customer has __maximum current month debit__ is 7637857.36 \n"
705 | ]
706 | },
707 | {
708 | "cell_type": "markdown",
709 | "metadata": {
710 | "colab_type": "text",
711 | "id": "X0UJT_w3OuTW"
712 | },
713 | "source": [
714 | "### Range "
715 | ]
716 | },
717 | {
718 | "cell_type": "markdown",
719 | "metadata": {
720 | "colab_type": "text",
721 | "id": "-RO9lwQeOuTa"
722 | },
723 | "source": [
724 | " __Range of Age__ in our datase indicating the difference of Age between the oldest and youngest customers"
725 | ]
726 | },
727 | {
728 | "cell_type": "code",
729 | "execution_count": 0,
730 | "metadata": {
731 | "colab": {
732 | "base_uri": "https://localhost:8080/",
733 | "height": 34
734 | },
735 | "colab_type": "code",
736 | "executionInfo": {
737 | "elapsed": 1044,
738 | "status": "ok",
739 | "timestamp": 1581505193463,
740 | "user": {
741 | "displayName": "Sharoon Saxena",
742 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
743 | "userId": "01167841530696023488"
744 | },
745 | "user_tz": -330
746 | },
747 | "id": "kkHzHmLrR_LF",
748 | "outputId": "223f34f6-1355-49b8-86a5-33a50d4b7d24"
749 | },
750 | "outputs": [
751 | {
752 | "name": "stdout",
753 | "output_type": "stream",
754 | "text": [
755 | "1 90\n"
756 | ]
757 | }
758 | ],
759 | "source": [
760 | "# Range of Age \n",
761 | "\n",
762 | "print(data['age'].min(), data['age'].max())"
763 | ]
764 | },
765 | {
766 | "cell_type": "markdown",
767 | "metadata": {
768 | "colab_type": "text",
769 | "id": "QRiwOGUTOuTp"
770 | },
771 | "source": [
772 | "* Oldest Customer Age is 90\n",
773 | "* Youngest Customer Age is 1\n",
774 | "* Range is [1,90]"
775 | ]
776 | },
777 | {
778 | "cell_type": "markdown",
779 | "metadata": {
780 | "colab_type": "text",
781 | "id": "ObUYzPs5OuTt"
782 | },
783 | "source": [
784 | "### Max, Min, Range for each column"
785 | ]
786 | },
787 | {
788 | "cell_type": "code",
789 | "execution_count": 0,
790 | "metadata": {
791 | "colab": {
792 | "base_uri": "https://localhost:8080/",
793 | "height": 408
794 | },
795 | "colab_type": "code",
796 | "executionInfo": {
797 | "elapsed": 1055,
798 | "status": "ok",
799 | "timestamp": 1581505222868,
800 | "user": {
801 | "displayName": "Sharoon Saxena",
802 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
803 | "userId": "01167841530696023488"
804 | },
805 | "user_tz": -330
806 | },
807 | "id": "VusaDbjqR_LH",
808 | "outputId": "b045a934-0606-4298-d0c6-edb1df53eae1"
809 | },
810 | "outputs": [
811 | {
812 | "data": {
813 | "text/plain": [
814 | "customer_id 30301.00\n",
815 | "vintage 12899.00\n",
816 | "age 90.00\n",
817 | "dependents 52.00\n",
818 | "city 1649.00\n",
819 | "customer_nw_category 3.00\n",
820 | "branch_code 4782.00\n",
821 | "days_since_last_transaction 365.00\n",
822 | "current_balance 5905904.03\n",
823 | "previous_month_end_balance 5740438.63\n",
824 | "average_monthly_balance_prevQ 5700289.57\n",
825 | "average_monthly_balance_prevQ2 5010170.10\n",
826 | "current_month_credit 12269845.39\n",
827 | "previous_month_credit 2361808.29\n",
828 | "current_month_debit 7637857.36\n",
829 | "previous_month_debit 1414168.06\n",
830 | "current_month_balance 5778184.77\n",
831 | "previous_month_balance 5720144.50\n",
832 | "churn 1.00\n",
833 | "dtype: float64"
834 | ]
835 | },
836 | "execution_count": 9,
837 | "metadata": {
838 | "tags": []
839 | },
840 | "output_type": "execute_result"
841 | }
842 | ],
843 | "source": [
844 | "# Printing Max of evey numerical column\n",
845 | "data[numerical_cols].max()"
846 | ]
847 | },
848 | {
849 | "cell_type": "markdown",
850 | "metadata": {
851 | "colab_type": "text",
852 | "id": "MM_lBwUHYXa2"
853 | },
854 | "source": [
855 | "* Maximum value of vintage for a customer is 12899.\n",
856 | "* Maximum age of a customer in our dataset is 90\n",
857 | "* Maximum number of dependents in our dataset is 52\n",
858 | "* Maximum day since last transaction is 365\n",
859 | "* Maximum values for __current_balance, previous_month_end_balance,average_monthly_balance_prevQ, current_month_balance, previous_month_balance__ are close to 57 lakhs.\n",
860 | "* Maximum value for current_month_credit is 12269845.39\n",
861 | "* Maximum value for previous_month_credit is 2361808.29\n",
862 | "* maximum value for current_month_debit and previous_month debit is respectively 7637857.36 and 1414168.06.\n",
863 | "* The features like __customer_id, city, customer_nw_category, branch_code, churn__ are required to be treated as categorcial variable so their maximum value don't represent numerical significance.\n"
864 | ]
865 | },
866 | {
867 | "cell_type": "code",
868 | "execution_count": 0,
869 | "metadata": {
870 | "colab": {
871 | "base_uri": "https://localhost:8080/",
872 | "height": 408
873 | },
874 | "colab_type": "code",
875 | "executionInfo": {
876 | "elapsed": 1084,
877 | "status": "ok",
878 | "timestamp": 1581505242279,
879 | "user": {
880 | "displayName": "Sharoon Saxena",
881 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
882 | "userId": "01167841530696023488"
883 | },
884 | "user_tz": -330
885 | },
886 | "id": "8OSHRZvfR_LK",
887 | "outputId": "a6e584fc-e627-4ca4-bdf6-e02d6348760b"
888 | },
889 | "outputs": [
890 | {
891 | "data": {
892 | "text/plain": [
893 | "customer_id 1.00\n",
894 | "vintage 180.00\n",
895 | "age 1.00\n",
896 | "dependents 0.00\n",
897 | "city 0.00\n",
898 | "customer_nw_category 1.00\n",
899 | "branch_code 1.00\n",
900 | "days_since_last_transaction 0.00\n",
901 | "current_balance -5503.96\n",
902 | "previous_month_end_balance -3149.57\n",
903 | "average_monthly_balance_prevQ 1428.69\n",
904 | "average_monthly_balance_prevQ2 -16506.10\n",
905 | "current_month_credit 0.01\n",
906 | "previous_month_credit 0.01\n",
907 | "current_month_debit 0.01\n",
908 | "previous_month_debit 0.01\n",
909 | "current_month_balance -3374.18\n",
910 | "previous_month_balance -5171.92\n",
911 | "churn 0.00\n",
912 | "dtype: float64"
913 | ]
914 | },
915 | "execution_count": 10,
916 | "metadata": {
917 | "tags": []
918 | },
919 | "output_type": "execute_result"
920 | }
921 | ],
922 | "source": [
923 | "# printing min of every numercial column\n",
924 | "data[numerical_cols].min()"
925 | ]
926 | },
927 | {
928 | "cell_type": "code",
929 | "execution_count": 0,
930 | "metadata": {
931 | "colab": {},
932 | "colab_type": "code",
933 | "id": "ZMnoIdSmYXa5",
934 | "outputId": "08e05724-a51a-4a81-9815-c2f869813b45"
935 | },
936 | "outputs": [
937 | {
938 | "name": "stdout",
939 | "output_type": "stream",
940 | "text": [
941 | "range of customer_id: [1, 30301]\n",
942 | "range of vintage: [180, 12899]\n",
943 | "range of age: [1, 90]\n",
944 | "range of dependents: [0.0, 52.0]\n",
945 | "range of city: [0.0, 1649.0]\n",
946 | "range of customer_nw_category: [1, 3]\n",
947 | "range of branch_code: [1, 4782]\n",
948 | "range of days_since_last_transaction: [0.0, 365.0]\n",
949 | "range of current_balance: [-5503.96, 5905904.03]\n",
950 | "range of previous_month_end_balance: [-3149.57, 5740438.63]\n",
951 | "range of average_monthly_balance_prevQ: [1428.69, 5700289.57]\n",
952 | "range of average_monthly_balance_prevQ2: [-16506.1, 5010170.1]\n",
953 | "range of current_month_credit: [0.01, 12269845.39]\n",
954 | "range of previous_month_credit: [0.01, 2361808.29]\n",
955 | "range of current_month_debit: [0.01, 7637857.36]\n",
956 | "range of previous_month_debit: [0.01, 1414168.06]\n",
957 | "range of current_month_balance: [-3374.18, 5778184.77]\n",
958 | "range of previous_month_balance: [-5171.92, 5720144.5]\n",
959 | "range of churn: [0, 1]\n"
960 | ]
961 | }
962 | ],
963 | "source": [
964 | "for col in numerical_cols:\n",
965 | " print(\"range of {}{}{}{}{}{}{}{}\".format(col,\":\",\" \",\"[\",data[col].min(), \", \",data[col].max(),\"]\"))"
966 | ]
967 | },
968 | {
969 | "cell_type": "markdown",
970 | "metadata": {
971 | "colab_type": "text",
972 | "id": "TzIxUenlYXa8"
973 | },
974 | "source": [
975 | "* Range of current_month_credit is highest among all features.\n",
976 | "* Range of days_since_last_transaction is 1 year."
977 | ]
978 | },
979 | {
980 | "cell_type": "code",
981 | "execution_count": 0,
982 | "metadata": {
983 | "colab": {},
984 | "colab_type": "code",
985 | "id": "rZ-87c8xYXa8"
986 | },
987 | "outputs": [],
988 | "source": []
989 | }
990 | ],
991 | "metadata": {
992 | "colab": {
993 | "collapsed_sections": [],
994 | "name": "Min_Max_Range_Updated.ipynb",
995 | "provenance": []
996 | },
997 | "kernelspec": {
998 | "display_name": "Python 3",
999 | "language": "python",
1000 | "name": "python3"
1001 | },
1002 | "language_info": {
1003 | "codemirror_mode": {
1004 | "name": "ipython",
1005 | "version": 3
1006 | },
1007 | "file_extension": ".py",
1008 | "mimetype": "text/x-python",
1009 | "name": "python",
1010 | "nbconvert_exporter": "python",
1011 | "pygments_lexer": "ipython3",
1012 | "version": "3.8.5"
1013 | }
1014 | },
1015 | "nbformat": 4,
1016 | "nbformat_minor": 1
1017 | }
1018 |
--------------------------------------------------------------------------------
/33.Mean_Variance.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 14,
6 | "metadata": {
7 | "colab": {},
8 | "colab_type": "code",
9 | "id": "UtrBkZutQ_nz"
10 | },
11 | "outputs": [],
12 | "source": [
13 | "# importing libraries\n",
14 | "import pandas as pd\n",
15 | "import numpy as np\n",
16 | "import matplotlib.pyplot as plt"
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "### This is our dataset of Customer Churn Prediction."
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 15,
29 | "metadata": {
30 | "colab": {},
31 | "colab_type": "code",
32 | "id": "BSs0Y24MQ_n6"
33 | },
34 | "outputs": [],
35 | "source": [
36 | "# importing data\n",
37 | "data = pd.read_csv('churn_prediction.csv')"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 16,
43 | "metadata": {
44 | "colab": {
45 | "base_uri": "https://localhost:8080/",
46 | "height": 211
47 | },
48 | "colab_type": "code",
49 | "executionInfo": {
50 | "elapsed": 1240,
51 | "status": "ok",
52 | "timestamp": 1581056281581,
53 | "user": {
54 | "displayName": "Sharoon Saxena",
55 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
56 | "userId": "01167841530696023488"
57 | },
58 | "user_tz": -330
59 | },
60 | "id": "36nlgrybQ_n9",
61 | "outputId": "3903402a-332e-44c7-9d45-7511ca730b3c"
62 | },
63 | "outputs": [
64 | {
65 | "data": {
66 | "text/html": [
67 | "\n",
68 | "\n",
81 | "
\n",
82 | " \n",
83 | " \n",
84 | " | \n",
85 | " customer_id | \n",
86 | " vintage | \n",
87 | " age | \n",
88 | " gender | \n",
89 | " dependents | \n",
90 | " occupation | \n",
91 | " city | \n",
92 | " customer_nw_category | \n",
93 | " branch_code | \n",
94 | " current_balance | \n",
95 | " ... | \n",
96 | " previous_month_credit | \n",
97 | " current_month_debit | \n",
98 | " previous_month_debit | \n",
99 | " current_month_balance | \n",
100 | " previous_month_balance | \n",
101 | " churn | \n",
102 | " doy_ls_tran | \n",
103 | " woy_ls_tran | \n",
104 | " moy_ls_tran | \n",
105 | " dow_ls_tran | \n",
106 | "
\n",
107 | " \n",
108 | " \n",
109 | " \n",
110 | " | 0 | \n",
111 | " 1 | \n",
112 | " 2101 | \n",
113 | " 66 | \n",
114 | " Male | \n",
115 | " 0.0 | \n",
116 | " self_employed | \n",
117 | " 187.0 | \n",
118 | " 2 | \n",
119 | " 755 | \n",
120 | " 1458.71 | \n",
121 | " ... | \n",
122 | " 0.20 | \n",
123 | " 0.20 | \n",
124 | " 0.20 | \n",
125 | " 1458.71 | \n",
126 | " 1458.71 | \n",
127 | " 0 | \n",
128 | " 141.0 | \n",
129 | " 21.0 | \n",
130 | " 5.0 | \n",
131 | " 1.0 | \n",
132 | "
\n",
133 | " \n",
134 | " | 1 | \n",
135 | " 2 | \n",
136 | " 2348 | \n",
137 | " 35 | \n",
138 | " Male | \n",
139 | " 0.0 | \n",
140 | " self_employed | \n",
141 | " NaN | \n",
142 | " 2 | \n",
143 | " 3214 | \n",
144 | " 5390.37 | \n",
145 | " ... | \n",
146 | " 0.56 | \n",
147 | " 5486.27 | \n",
148 | " 100.56 | \n",
149 | " 6496.78 | \n",
150 | " 8787.61 | \n",
151 | " 0 | \n",
152 | " 305.0 | \n",
153 | " 44.0 | \n",
154 | " 11.0 | \n",
155 | " 4.0 | \n",
156 | "
\n",
157 | " \n",
158 | " | 2 | \n",
159 | " 4 | \n",
160 | " 2194 | \n",
161 | " 31 | \n",
162 | " Male | \n",
163 | " 0.0 | \n",
164 | " salaried | \n",
165 | " 146.0 | \n",
166 | " 2 | \n",
167 | " 41 | \n",
168 | " 3913.16 | \n",
169 | " ... | \n",
170 | " 0.61 | \n",
171 | " 6046.73 | \n",
172 | " 259.23 | \n",
173 | " 5006.28 | \n",
174 | " 5070.14 | \n",
175 | " 0 | \n",
176 | " NaN | \n",
177 | " NaN | \n",
178 | " NaN | \n",
179 | " NaN | \n",
180 | "
\n",
181 | " \n",
182 | " | 3 | \n",
183 | " 5 | \n",
184 | " 2329 | \n",
185 | " 90 | \n",
186 | " NaN | \n",
187 | " NaN | \n",
188 | " self_employed | \n",
189 | " 1020.0 | \n",
190 | " 2 | \n",
191 | " 582 | \n",
192 | " 2291.91 | \n",
193 | " ... | \n",
194 | " 0.47 | \n",
195 | " 0.47 | \n",
196 | " 2143.33 | \n",
197 | " 2291.91 | \n",
198 | " 1669.79 | \n",
199 | " 1 | \n",
200 | " 218.0 | \n",
201 | " 32.0 | \n",
202 | " 8.0 | \n",
203 | " 1.0 | \n",
204 | "
\n",
205 | " \n",
206 | " | 4 | \n",
207 | " 6 | \n",
208 | " 1579 | \n",
209 | " 42 | \n",
210 | " Male | \n",
211 | " 2.0 | \n",
212 | " self_employed | \n",
213 | " 1494.0 | \n",
214 | " 3 | \n",
215 | " 388 | \n",
216 | " 927.72 | \n",
217 | " ... | \n",
218 | " 714.61 | \n",
219 | " 588.62 | \n",
220 | " 1538.06 | \n",
221 | " 1157.15 | \n",
222 | " 1677.16 | \n",
223 | " 1 | \n",
224 | " 307.0 | \n",
225 | " 44.0 | \n",
226 | " 11.0 | \n",
227 | " 6.0 | \n",
228 | "
\n",
229 | " \n",
230 | "
\n",
231 | "
5 rows × 24 columns
\n",
232 | "
"
233 | ],
234 | "text/plain": [
235 | " customer_id vintage age gender dependents occupation city \\\n",
236 | "0 1 2101 66 Male 0.0 self_employed 187.0 \n",
237 | "1 2 2348 35 Male 0.0 self_employed NaN \n",
238 | "2 4 2194 31 Male 0.0 salaried 146.0 \n",
239 | "3 5 2329 90 NaN NaN self_employed 1020.0 \n",
240 | "4 6 1579 42 Male 2.0 self_employed 1494.0 \n",
241 | "\n",
242 | " customer_nw_category branch_code current_balance ... \\\n",
243 | "0 2 755 1458.71 ... \n",
244 | "1 2 3214 5390.37 ... \n",
245 | "2 2 41 3913.16 ... \n",
246 | "3 2 582 2291.91 ... \n",
247 | "4 3 388 927.72 ... \n",
248 | "\n",
249 | " previous_month_credit current_month_debit previous_month_debit \\\n",
250 | "0 0.20 0.20 0.20 \n",
251 | "1 0.56 5486.27 100.56 \n",
252 | "2 0.61 6046.73 259.23 \n",
253 | "3 0.47 0.47 2143.33 \n",
254 | "4 714.61 588.62 1538.06 \n",
255 | "\n",
256 | " current_month_balance previous_month_balance churn doy_ls_tran \\\n",
257 | "0 1458.71 1458.71 0 141.0 \n",
258 | "1 6496.78 8787.61 0 305.0 \n",
259 | "2 5006.28 5070.14 0 NaN \n",
260 | "3 2291.91 1669.79 1 218.0 \n",
261 | "4 1157.15 1677.16 1 307.0 \n",
262 | "\n",
263 | " woy_ls_tran moy_ls_tran dow_ls_tran \n",
264 | "0 21.0 5.0 1.0 \n",
265 | "1 44.0 11.0 4.0 \n",
266 | "2 NaN NaN NaN \n",
267 | "3 32.0 8.0 1.0 \n",
268 | "4 44.0 11.0 6.0 \n",
269 | "\n",
270 | "[5 rows x 24 columns]"
271 | ]
272 | },
273 | "execution_count": 16,
274 | "metadata": {},
275 | "output_type": "execute_result"
276 | }
277 | ],
278 | "source": [
279 | "# First look\n",
280 | "data.head()"
281 | ]
282 | },
283 | {
284 | "cell_type": "code",
285 | "execution_count": 17,
286 | "metadata": {
287 | "colab": {
288 | "base_uri": "https://localhost:8080/",
289 | "height": 33
290 | },
291 | "colab_type": "code",
292 | "executionInfo": {
293 | "elapsed": 1196,
294 | "status": "ok",
295 | "timestamp": 1581056282914,
296 | "user": {
297 | "displayName": "Sharoon Saxena",
298 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
299 | "userId": "01167841530696023488"
300 | },
301 | "user_tz": -330
302 | },
303 | "id": "VRCymO1kQ_oA",
304 | "outputId": "1fbba1c9-53da-4e3d-c6fd-edb0a8eed4bf"
305 | },
306 | "outputs": [
307 | {
308 | "data": {
309 | "text/plain": [
310 | "(28382, 24)"
311 | ]
312 | },
313 | "execution_count": 17,
314 | "metadata": {},
315 | "output_type": "execute_result"
316 | }
317 | ],
318 | "source": [
319 | "#shape of the data\n",
320 | "data.shape"
321 | ]
322 | },
323 | {
324 | "cell_type": "code",
325 | "execution_count": 18,
326 | "metadata": {
327 | "colab": {
328 | "base_uri": "https://localhost:8080/",
329 | "height": 163
330 | },
331 | "colab_type": "code",
332 | "executionInfo": {
333 | "elapsed": 1135,
334 | "status": "ok",
335 | "timestamp": 1581056285543,
336 | "user": {
337 | "displayName": "Sharoon Saxena",
338 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
339 | "userId": "01167841530696023488"
340 | },
341 | "user_tz": -330
342 | },
343 | "id": "0zQJF63XQ_oD",
344 | "outputId": "de4bdf95-994e-4964-d223-b01626604ea8"
345 | },
346 | "outputs": [
347 | {
348 | "data": {
349 | "text/plain": [
350 | "Index(['customer_id', 'vintage', 'age', 'gender', 'dependents', 'occupation',\n",
351 | " 'city', 'customer_nw_category', 'branch_code', 'current_balance',\n",
352 | " 'previous_month_end_balance', 'average_monthly_balance_prevQ',\n",
353 | " 'average_monthly_balance_prevQ2', 'current_month_credit',\n",
354 | " 'previous_month_credit', 'current_month_debit', 'previous_month_debit',\n",
355 | " 'current_month_balance', 'previous_month_balance', 'churn',\n",
356 | " 'doy_ls_tran', 'woy_ls_tran', 'moy_ls_tran', 'dow_ls_tran'],\n",
357 | " dtype='object')"
358 | ]
359 | },
360 | "execution_count": 18,
361 | "metadata": {},
362 | "output_type": "execute_result"
363 | }
364 | ],
365 | "source": [
366 | "#columns of data\n",
367 | "data.columns"
368 | ]
369 | },
370 | {
371 | "cell_type": "markdown",
372 | "metadata": {},
373 | "source": [
374 | "### Identificaiton of Data types "
375 | ]
376 | },
377 | {
378 | "cell_type": "code",
379 | "execution_count": 19,
380 | "metadata": {
381 | "colab": {
382 | "base_uri": "https://localhost:8080/",
383 | "height": 440
384 | },
385 | "colab_type": "code",
386 | "executionInfo": {
387 | "elapsed": 964,
388 | "status": "ok",
389 | "timestamp": 1581056286919,
390 | "user": {
391 | "displayName": "Sharoon Saxena",
392 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
393 | "userId": "01167841530696023488"
394 | },
395 | "user_tz": -330
396 | },
397 | "id": "IoHu82GiQ_oG",
398 | "outputId": "6fed1d1f-4fc8-42a2-ab9e-88dcc4f67557"
399 | },
400 | "outputs": [
401 | {
402 | "data": {
403 | "text/plain": [
404 | "customer_id int64\n",
405 | "vintage int64\n",
406 | "age int64\n",
407 | "gender object\n",
408 | "dependents float64\n",
409 | "occupation object\n",
410 | "city float64\n",
411 | "customer_nw_category int64\n",
412 | "branch_code int64\n",
413 | "current_balance float64\n",
414 | "previous_month_end_balance float64\n",
415 | "average_monthly_balance_prevQ float64\n",
416 | "average_monthly_balance_prevQ2 float64\n",
417 | "current_month_credit float64\n",
418 | "previous_month_credit float64\n",
419 | "current_month_debit float64\n",
420 | "previous_month_debit float64\n",
421 | "current_month_balance float64\n",
422 | "previous_month_balance float64\n",
423 | "churn int64\n",
424 | "doy_ls_tran float64\n",
425 | "woy_ls_tran float64\n",
426 | "moy_ls_tran float64\n",
427 | "dow_ls_tran float64\n",
428 | "dtype: object"
429 | ]
430 | },
431 | "execution_count": 19,
432 | "metadata": {},
433 | "output_type": "execute_result"
434 | }
435 | ],
436 | "source": [
437 | "#different data types\n",
438 | "data.dtypes"
439 | ]
440 | },
441 | {
442 | "cell_type": "markdown",
443 | "metadata": {},
444 | "source": [
445 | "### Mean Value"
446 | ]
447 | },
448 | {
449 | "cell_type": "code",
450 | "execution_count": 20,
451 | "metadata": {
452 | "colab": {
453 | "base_uri": "https://localhost:8080/",
454 | "height": 33
455 | },
456 | "colab_type": "code",
457 | "executionInfo": {
458 | "elapsed": 1018,
459 | "status": "ok",
460 | "timestamp": 1581056294995,
461 | "user": {
462 | "displayName": "Sharoon Saxena",
463 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
464 | "userId": "01167841530696023488"
465 | },
466 | "user_tz": -330
467 | },
468 | "id": "5VbQGrVYQ_oJ",
469 | "outputId": "8ef087dd-dc40-4932-fb78-fa71905f2b85"
470 | },
471 | "outputs": [
472 | {
473 | "name": "stdout",
474 | "output_type": "stream",
475 | "text": [
476 | "48.208336269466564\n",
477 | "47.461216730038025\n"
478 | ]
479 | }
480 | ],
481 | "source": [
482 | "#mean of age\n",
483 | "print(data['age'].mean())\n",
484 | "\n",
485 | "#mean of age who are likely to churn\n",
486 | "print(data[data['churn'] == 1]['age'].mean())"
487 | ]
488 | },
489 | {
490 | "cell_type": "markdown",
491 | "metadata": {},
492 | "source": [
493 | "The __mean value of Age of Customers__ and the __mean value of Age of Customers who are likely to churn__ is around 48"
494 | ]
495 | },
496 | {
497 | "cell_type": "code",
498 | "execution_count": 21,
499 | "metadata": {
500 | "colab": {
501 | "base_uri": "https://localhost:8080/",
502 | "height": 33
503 | },
504 | "colab_type": "code",
505 | "executionInfo": {
506 | "elapsed": 781,
507 | "status": "ok",
508 | "timestamp": 1581056297085,
509 | "user": {
510 | "displayName": "Sharoon Saxena",
511 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
512 | "userId": "01167841530696023488"
513 | },
514 | "user_tz": -330
515 | },
516 | "id": "WvLj2Fo9Q_oR",
517 | "outputId": "05b175ec-f135-471c-f91f-d348d2ecd3a7"
518 | },
519 | "outputs": [
520 | {
521 | "name": "stdout",
522 | "output_type": "stream",
523 | "text": [
524 | "7380.55180360792\n",
525 | "5220.884321292776\n"
526 | ]
527 | }
528 | ],
529 | "source": [
530 | "#current balance mean\n",
531 | "print(data['current_balance'].mean())\n",
532 | "\n",
533 | "#current balance of customers who are likely to churn\n",
534 | "print(data[data['churn']==1]['current_balance'].mean())"
535 | ]
536 | },
537 | {
538 | "cell_type": "markdown",
539 | "metadata": {},
540 | "source": [
541 | "* The __mean value of current balance of Customers___ is __7380.55180360792__ \n",
542 | "* The __mean value of Current Balance of Customers__ who are likely to churn is __5220.884321292776__."
543 | ]
544 | },
545 | {
546 | "cell_type": "markdown",
547 | "metadata": {},
548 | "source": [
549 | "### Median"
550 | ]
551 | },
552 | {
553 | "cell_type": "code",
554 | "execution_count": 22,
555 | "metadata": {
556 | "colab": {
557 | "base_uri": "https://localhost:8080/",
558 | "height": 33
559 | },
560 | "colab_type": "code",
561 | "executionInfo": {
562 | "elapsed": 659,
563 | "status": "ok",
564 | "timestamp": 1581056300650,
565 | "user": {
566 | "displayName": "Sharoon Saxena",
567 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
568 | "userId": "01167841530696023488"
569 | },
570 | "user_tz": -330
571 | },
572 | "id": "aHIkif6QQ_oW",
573 | "outputId": "cbf009f7-8780-425e-b2ec-3c703be9759b"
574 | },
575 | "outputs": [
576 | {
577 | "data": {
578 | "text/plain": [
579 | "46.0"
580 | ]
581 | },
582 | "execution_count": 22,
583 | "metadata": {},
584 | "output_type": "execute_result"
585 | }
586 | ],
587 | "source": [
588 | "#median of age\n",
589 | "data['age'].median()"
590 | ]
591 | },
592 | {
593 | "cell_type": "markdown",
594 | "metadata": {},
595 | "source": [
596 | "* __Median__ of age is 46. \n",
597 | "* That means 46 is the __50th percentile__ of the age of customers."
598 | ]
599 | },
600 | {
601 | "cell_type": "markdown",
602 | "metadata": {},
603 | "source": [
604 | "### Standard Deviation and Variance"
605 | ]
606 | },
607 | {
608 | "cell_type": "code",
609 | "execution_count": 23,
610 | "metadata": {
611 | "colab": {
612 | "base_uri": "https://localhost:8080/",
613 | "height": 33
614 | },
615 | "colab_type": "code",
616 | "executionInfo": {
617 | "elapsed": 1079,
618 | "status": "ok",
619 | "timestamp": 1581056392277,
620 | "user": {
621 | "displayName": "Sharoon Saxena",
622 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
623 | "userId": "01167841530696023488"
624 | },
625 | "user_tz": -330
626 | },
627 | "id": "_pRqupeJQ_ok",
628 | "outputId": "309e9107-c905-44ff-8e68-c0705e2907d4"
629 | },
630 | "outputs": [
631 | {
632 | "name": "stdout",
633 | "output_type": "stream",
634 | "text": [
635 | "42598.711923233204\n",
636 | "1814650257.5186107\n"
637 | ]
638 | }
639 | ],
640 | "source": [
641 | "print(data['current_balance'].std())\n",
642 | "print(data['current_balance'].var())"
643 | ]
644 | },
645 | {
646 | "cell_type": "markdown",
647 | "metadata": {},
648 | "source": [
649 | "* __standard deviation__ for current balance of the customers is 42598.711923233204\n",
650 | "* __variance__ for current balance of the customers is 1814650257.5186107"
651 | ]
652 | },
653 | {
654 | "cell_type": "markdown",
655 | "metadata": {},
656 | "source": [
657 | "### Describe Function"
658 | ]
659 | },
660 | {
661 | "cell_type": "markdown",
662 | "metadata": {
663 | "colab": {
664 | "base_uri": "https://localhost:8080/",
665 | "height": 33
666 | },
667 | "colab_type": "code",
668 | "executionInfo": {
669 | "elapsed": 659,
670 | "status": "ok",
671 | "timestamp": 1581056393920,
672 | "user": {
673 | "displayName": "Sharoon Saxena",
674 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
675 | "userId": "01167841530696023488"
676 | },
677 | "user_tz": -330
678 | },
679 | "id": "TZ6WONNhQ_oo",
680 | "outputId": "c73c8bda-6079-4f12-85c1-068f1632537a"
681 | },
682 | "source": [
683 | "describe() function is used to view some basic __descriptive statistical details__ like __percentile, mean, std etc.__ of a data frame."
684 | ]
685 | },
686 | {
687 | "cell_type": "code",
688 | "execution_count": 24,
689 | "metadata": {
690 | "colab": {
691 | "base_uri": "https://localhost:8080/",
692 | "height": 297
693 | },
694 | "colab_type": "code",
695 | "executionInfo": {
696 | "elapsed": 1279,
697 | "status": "ok",
698 | "timestamp": 1581056402110,
699 | "user": {
700 | "displayName": "Sharoon Saxena",
701 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
702 | "userId": "01167841530696023488"
703 | },
704 | "user_tz": -330
705 | },
706 | "id": "G2aklW5KQ_ow",
707 | "outputId": "3a64f669-809f-41b6-a077-06cf350af978"
708 | },
709 | "outputs": [
710 | {
711 | "data": {
712 | "text/html": [
713 | "\n",
714 | "\n",
727 | "
\n",
728 | " \n",
729 | " \n",
730 | " | \n",
731 | " customer_id | \n",
732 | " vintage | \n",
733 | " age | \n",
734 | " dependents | \n",
735 | " city | \n",
736 | " customer_nw_category | \n",
737 | " branch_code | \n",
738 | " current_balance | \n",
739 | " previous_month_end_balance | \n",
740 | " average_monthly_balance_prevQ | \n",
741 | " ... | \n",
742 | " previous_month_credit | \n",
743 | " current_month_debit | \n",
744 | " previous_month_debit | \n",
745 | " current_month_balance | \n",
746 | " previous_month_balance | \n",
747 | " churn | \n",
748 | " doy_ls_tran | \n",
749 | " woy_ls_tran | \n",
750 | " moy_ls_tran | \n",
751 | " dow_ls_tran | \n",
752 | "
\n",
753 | " \n",
754 | " \n",
755 | " \n",
756 | " | count | \n",
757 | " 28382.000000 | \n",
758 | " 28382.000000 | \n",
759 | " 28382.000000 | \n",
760 | " 25919.000000 | \n",
761 | " 27579.000000 | \n",
762 | " 28382.000000 | \n",
763 | " 28382.000000 | \n",
764 | " 2.838200e+04 | \n",
765 | " 2.838200e+04 | \n",
766 | " 2.838200e+04 | \n",
767 | " ... | \n",
768 | " 2.838200e+04 | \n",
769 | " 2.838200e+04 | \n",
770 | " 2.838200e+04 | \n",
771 | " 2.838200e+04 | \n",
772 | " 2.838200e+04 | \n",
773 | " 28382.000000 | \n",
774 | " 25159.000000 | \n",
775 | " 25159.000000 | \n",
776 | " 25159.000000 | \n",
777 | " 25159.000000 | \n",
778 | "
\n",
779 | " \n",
780 | " | mean | \n",
781 | " 15143.508667 | \n",
782 | " 2091.144105 | \n",
783 | " 48.208336 | \n",
784 | " 0.347236 | \n",
785 | " 796.109576 | \n",
786 | " 2.225530 | \n",
787 | " 925.975019 | \n",
788 | " 7.380552e+03 | \n",
789 | " 7.495771e+03 | \n",
790 | " 7.496780e+03 | \n",
791 | " ... | \n",
792 | " 3.261694e+03 | \n",
793 | " 3.658745e+03 | \n",
794 | " 3.339761e+03 | \n",
795 | " 7.451133e+03 | \n",
796 | " 7.495177e+03 | \n",
797 | " 0.185329 | \n",
798 | " 295.045709 | \n",
799 | " 39.116300 | \n",
800 | " 10.142255 | \n",
801 | " 3.042728 | \n",
802 | "
\n",
803 | " \n",
804 | " | std | \n",
805 | " 8746.454456 | \n",
806 | " 272.676775 | \n",
807 | " 17.807163 | \n",
808 | " 0.997661 | \n",
809 | " 432.872102 | \n",
810 | " 0.660443 | \n",
811 | " 937.799129 | \n",
812 | " 4.259871e+04 | \n",
813 | " 4.252935e+04 | \n",
814 | " 4.172622e+04 | \n",
815 | " ... | \n",
816 | " 2.968889e+04 | \n",
817 | " 5.198542e+04 | \n",
818 | " 2.430111e+04 | \n",
819 | " 4.203394e+04 | \n",
820 | " 4.243198e+04 | \n",
821 | " 0.388571 | \n",
822 | " 86.284356 | \n",
823 | " 15.889797 | \n",
824 | " 2.788671 | \n",
825 | " 1.712724 | \n",
826 | "
\n",
827 | " \n",
828 | " | min | \n",
829 | " 1.000000 | \n",
830 | " 73.000000 | \n",
831 | " 1.000000 | \n",
832 | " 0.000000 | \n",
833 | " 0.000000 | \n",
834 | " 1.000000 | \n",
835 | " 1.000000 | \n",
836 | " -5.503960e+03 | \n",
837 | " -3.149570e+03 | \n",
838 | " 1.428690e+03 | \n",
839 | " ... | \n",
840 | " 1.000000e-02 | \n",
841 | " 1.000000e-02 | \n",
842 | " 1.000000e-02 | \n",
843 | " -3.374180e+03 | \n",
844 | " -5.171920e+03 | \n",
845 | " 0.000000 | \n",
846 | " 1.000000 | \n",
847 | " 1.000000 | \n",
848 | " 1.000000 | \n",
849 | " 0.000000 | \n",
850 | "
\n",
851 | " \n",
852 | " | 25% | \n",
853 | " 7557.250000 | \n",
854 | " 1958.000000 | \n",
855 | " 36.000000 | \n",
856 | " 0.000000 | \n",
857 | " 409.000000 | \n",
858 | " 2.000000 | \n",
859 | " 176.000000 | \n",
860 | " 1.784470e+03 | \n",
861 | " 1.906000e+03 | \n",
862 | " 2.180945e+03 | \n",
863 | " ... | \n",
864 | " 3.300000e-01 | \n",
865 | " 4.100000e-01 | \n",
866 | " 4.100000e-01 | \n",
867 | " 1.996765e+03 | \n",
868 | " 2.074408e+03 | \n",
869 | " 0.000000 | \n",
870 | " 270.000000 | \n",
871 | " 33.000000 | \n",
872 | " 9.000000 | \n",
873 | " 1.000000 | \n",
874 | "
\n",
875 | " \n",
876 | " | 50% | \n",
877 | " 15150.500000 | \n",
878 | " 2154.000000 | \n",
879 | " 46.000000 | \n",
880 | " 0.000000 | \n",
881 | " 834.000000 | \n",
882 | " 2.000000 | \n",
883 | " 572.000000 | \n",
884 | " 3.281255e+03 | \n",
885 | " 3.379915e+03 | \n",
886 | " 3.542865e+03 | \n",
887 | " ... | \n",
888 | " 6.300000e-01 | \n",
889 | " 9.193000e+01 | \n",
890 | " 1.099600e+02 | \n",
891 | " 3.447995e+03 | \n",
892 | " 3.465235e+03 | \n",
893 | " 0.000000 | \n",
894 | " 335.000000 | \n",
895 | " 47.000000 | \n",
896 | " 12.000000 | \n",
897 | " 3.000000 | \n",
898 | "
\n",
899 | " \n",
900 | " | 75% | \n",
901 | " 22706.750000 | \n",
902 | " 2292.000000 | \n",
903 | " 60.000000 | \n",
904 | " 0.000000 | \n",
905 | " 1096.000000 | \n",
906 | " 3.000000 | \n",
907 | " 1440.000000 | \n",
908 | " 6.635820e+03 | \n",
909 | " 6.656535e+03 | \n",
910 | " 6.666887e+03 | \n",
911 | " ... | \n",
912 | " 7.492350e+02 | \n",
913 | " 1.360435e+03 | \n",
914 | " 1.357553e+03 | \n",
915 | " 6.667958e+03 | \n",
916 | " 6.654693e+03 | \n",
917 | " 0.000000 | \n",
918 | " 354.000000 | \n",
919 | " 50.000000 | \n",
920 | " 12.000000 | \n",
921 | " 5.000000 | \n",
922 | "
\n",
923 | " \n",
924 | " | max | \n",
925 | " 30301.000000 | \n",
926 | " 2476.000000 | \n",
927 | " 90.000000 | \n",
928 | " 52.000000 | \n",
929 | " 1649.000000 | \n",
930 | " 3.000000 | \n",
931 | " 4782.000000 | \n",
932 | " 5.905904e+06 | \n",
933 | " 5.740439e+06 | \n",
934 | " 5.700290e+06 | \n",
935 | " ... | \n",
936 | " 2.361808e+06 | \n",
937 | " 7.637857e+06 | \n",
938 | " 1.414168e+06 | \n",
939 | " 5.778185e+06 | \n",
940 | " 5.720144e+06 | \n",
941 | " 1.000000 | \n",
942 | " 365.000000 | \n",
943 | " 52.000000 | \n",
944 | " 12.000000 | \n",
945 | " 6.000000 | \n",
946 | "
\n",
947 | " \n",
948 | "
\n",
949 | "
8 rows × 22 columns
\n",
950 | "
"
951 | ],
952 | "text/plain": [
953 | " customer_id vintage age dependents city \\\n",
954 | "count 28382.000000 28382.000000 28382.000000 25919.000000 27579.000000 \n",
955 | "mean 15143.508667 2091.144105 48.208336 0.347236 796.109576 \n",
956 | "std 8746.454456 272.676775 17.807163 0.997661 432.872102 \n",
957 | "min 1.000000 73.000000 1.000000 0.000000 0.000000 \n",
958 | "25% 7557.250000 1958.000000 36.000000 0.000000 409.000000 \n",
959 | "50% 15150.500000 2154.000000 46.000000 0.000000 834.000000 \n",
960 | "75% 22706.750000 2292.000000 60.000000 0.000000 1096.000000 \n",
961 | "max 30301.000000 2476.000000 90.000000 52.000000 1649.000000 \n",
962 | "\n",
963 | " customer_nw_category branch_code current_balance \\\n",
964 | "count 28382.000000 28382.000000 2.838200e+04 \n",
965 | "mean 2.225530 925.975019 7.380552e+03 \n",
966 | "std 0.660443 937.799129 4.259871e+04 \n",
967 | "min 1.000000 1.000000 -5.503960e+03 \n",
968 | "25% 2.000000 176.000000 1.784470e+03 \n",
969 | "50% 2.000000 572.000000 3.281255e+03 \n",
970 | "75% 3.000000 1440.000000 6.635820e+03 \n",
971 | "max 3.000000 4782.000000 5.905904e+06 \n",
972 | "\n",
973 | " previous_month_end_balance average_monthly_balance_prevQ ... \\\n",
974 | "count 2.838200e+04 2.838200e+04 ... \n",
975 | "mean 7.495771e+03 7.496780e+03 ... \n",
976 | "std 4.252935e+04 4.172622e+04 ... \n",
977 | "min -3.149570e+03 1.428690e+03 ... \n",
978 | "25% 1.906000e+03 2.180945e+03 ... \n",
979 | "50% 3.379915e+03 3.542865e+03 ... \n",
980 | "75% 6.656535e+03 6.666887e+03 ... \n",
981 | "max 5.740439e+06 5.700290e+06 ... \n",
982 | "\n",
983 | " previous_month_credit current_month_debit previous_month_debit \\\n",
984 | "count 2.838200e+04 2.838200e+04 2.838200e+04 \n",
985 | "mean 3.261694e+03 3.658745e+03 3.339761e+03 \n",
986 | "std 2.968889e+04 5.198542e+04 2.430111e+04 \n",
987 | "min 1.000000e-02 1.000000e-02 1.000000e-02 \n",
988 | "25% 3.300000e-01 4.100000e-01 4.100000e-01 \n",
989 | "50% 6.300000e-01 9.193000e+01 1.099600e+02 \n",
990 | "75% 7.492350e+02 1.360435e+03 1.357553e+03 \n",
991 | "max 2.361808e+06 7.637857e+06 1.414168e+06 \n",
992 | "\n",
993 | " current_month_balance previous_month_balance churn \\\n",
994 | "count 2.838200e+04 2.838200e+04 28382.000000 \n",
995 | "mean 7.451133e+03 7.495177e+03 0.185329 \n",
996 | "std 4.203394e+04 4.243198e+04 0.388571 \n",
997 | "min -3.374180e+03 -5.171920e+03 0.000000 \n",
998 | "25% 1.996765e+03 2.074408e+03 0.000000 \n",
999 | "50% 3.447995e+03 3.465235e+03 0.000000 \n",
1000 | "75% 6.667958e+03 6.654693e+03 0.000000 \n",
1001 | "max 5.778185e+06 5.720144e+06 1.000000 \n",
1002 | "\n",
1003 | " doy_ls_tran woy_ls_tran moy_ls_tran dow_ls_tran \n",
1004 | "count 25159.000000 25159.000000 25159.000000 25159.000000 \n",
1005 | "mean 295.045709 39.116300 10.142255 3.042728 \n",
1006 | "std 86.284356 15.889797 2.788671 1.712724 \n",
1007 | "min 1.000000 1.000000 1.000000 0.000000 \n",
1008 | "25% 270.000000 33.000000 9.000000 1.000000 \n",
1009 | "50% 335.000000 47.000000 12.000000 3.000000 \n",
1010 | "75% 354.000000 50.000000 12.000000 5.000000 \n",
1011 | "max 365.000000 52.000000 12.000000 6.000000 \n",
1012 | "\n",
1013 | "[8 rows x 22 columns]"
1014 | ]
1015 | },
1016 | "execution_count": 24,
1017 | "metadata": {},
1018 | "output_type": "execute_result"
1019 | }
1020 | ],
1021 | "source": [
1022 | "data.describe(include=[int, float])"
1023 | ]
1024 | },
1025 | {
1026 | "cell_type": "markdown",
1027 | "metadata": {},
1028 | "source": [
1029 | "* count represents total number of data points.\n",
1030 | "* mean represents average value\n",
1031 | "* std represents standard deviation\n",
1032 | "* min represents the minimum value of the column\n",
1033 | "* 25% represents 25th percentile that means 25% data fall below this value.\n",
1034 | "* 50% represents 50th percentile that means 50% data fall below this value. This is __Median__\n",
1035 | "* 75% represents the 75th percentile here\n",
1036 | "* max represents the maximum value of the column"
1037 | ]
1038 | },
1039 | {
1040 | "cell_type": "code",
1041 | "execution_count": 25,
1042 | "metadata": {
1043 | "colab": {},
1044 | "colab_type": "code",
1045 | "id": "RFier7inQ_o1"
1046 | },
1047 | "outputs": [],
1048 | "source": [
1049 | "churn_age = data[data['churn']==1]['age']"
1050 | ]
1051 | },
1052 | {
1053 | "cell_type": "markdown",
1054 | "metadata": {},
1055 | "source": [
1056 | "A __Histogram__ visualises the distribution of data over a continuous interval. \n",
1057 | "Each bar in a histogram represents the tabulated __frequency__ at each __interval/bin__. \n",
1058 | "In simple words height represents the frequency for respective bin (interval)"
1059 | ]
1060 | },
1061 | {
1062 | "cell_type": "code",
1063 | "execution_count": 26,
1064 | "metadata": {
1065 | "colab": {
1066 | "base_uri": "https://localhost:8080/",
1067 | "height": 573
1068 | },
1069 | "colab_type": "code",
1070 | "executionInfo": {
1071 | "elapsed": 1734,
1072 | "status": "ok",
1073 | "timestamp": 1581058880316,
1074 | "user": {
1075 | "displayName": "Sharoon Saxena",
1076 | "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mA06b2KEfgpAIC1tQCip2ka2KqXrCY2GfWW9Fww=s64",
1077 | "userId": "01167841530696023488"
1078 | },
1079 | "user_tz": -330
1080 | },
1081 | "id": "gi_dN-BNQ_o7",
1082 | "outputId": "97e6261e-6b33-4050-a41b-74a0ec8852c0"
1083 | },
1084 | "outputs": [
1085 | {
1086 | "data": {
1087 | "text/plain": [
1088 | ""
1089 | ]
1090 | },
1091 | "execution_count": 26,
1092 | "metadata": {},
1093 | "output_type": "execute_result"
1094 | },
1095 | {
1096 | "data": {
1097 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAA9kAAAIcCAYAAADi5m1ZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAVhwAAFYcBshnuugAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nOzde7xWVZ348c8XQThxp0QhVDQtnXC8oCajjpImdtXMUnMqYzCLRglLxelHgWZhhKg5eY0ca7SmHLPRwlsiqeAFxtRSNBVTERIQEEIUWL8/9j7w8PA85wL7nOecw+f9eu3XPmevtfZaez+bw/metddakVJCkiRJkiRtvU61boAkSZIkSR2FQbYkSZIkSQUxyJYkSZIkqSAG2ZIkSZIkFcQgW5IkSZKkghhkS5IkSZJUEINsSZIkSZIKYpAtSZIkSVJBDLIlSZIkSSqIQbYkSZIkSQUxyJYkSZIkqSAG2ZIkSZIkFcQgW5IkSZKkghhkS1IbExEp3wbXui2SWk5EzM//rR9Zdvy0/PiM2rRMkrQ1DLIlSZIkSSqIQbYkSVLbshyYB/y11g2RJDVf51o3QJIkSRullG4Bbql1OyRJW8aebEmSJEmSCmKQLUktLDLHR8StEbEgItZExKKImB0R4yNiQANl94mIX+T510TEvIj4VkRsXyHvkflkSfMbON/1eZ4JDZWNiFMi4r6IWFo6MVNETMi/vz4itouIsRHxeET8PSJej4jbImLolt2pzdq6f0RcEBEPRMTLEfFWRCyJiN9HxOcjIhoo2ysiLo6I5yLizYh4JSJ+HBGDGptUKr+uL0bEPRGxOK/3lYj4r4jYt4hrK6tvQ3vyZ+XfIuKxiFgVEQsj4qcRsXNJ/qMi4o78XqyKiD9ExBGN1HFARPxnPtHWmxGxLCJm5nVv9rtA3o4PR8QVEfF/EfFa/vy9EhG/iojDG6hrw8R9zXl+t0ZbvIcl5Y6MiDvz/G9ExCMRMbKp11Mh7b0RMS4i7o2IF0va8mBEnFXt3kbJv/2IqIuIifnn8Wb++f4iIvZsqF1boux5ODh/fhZGxLrIfw7V8nmLiKGR/WxeEtnPsccj+7nWKX+eUkScVqXsgIj4fkQ8GREr82fp8Yj4dkT03Np7J6kdSym5ubm5ubXQBtQBvwZSvi0GHgH+AryVHzutrEx93i8Bq4E3gEeBV0rSflWhriPztPkNtOf6PM+EamWBqfnXC4GH83qPzPNNyNN+BtyRf/0s8BjwZv7934GDCrh3j+bnWwY8ld+30ntwY5Vy7wL+VJLvz8Dc/H6/BkzMj8+oULYvMLOk7Ct52RX5928BJ1ept77Mac28ztPq2wPcWHJPHy95Rl4A3gmMBtbnn82c/NlIwBrg0CrnPycvk/LreAx4qaS9twDblZXpkaetB/4G/DEvt7Tk+JcbuQ/Nen638llpc/cwLzeqpNyy/BlekH8/lezfWyL/91Xpeiqc81d52hv5NT5ccp76e7B9A//2L8nbv57s38aTwNts/Pm0a8GfTX27vp7XU/88zAO+XcvnDTiu5PlYmX8+z+ff/zK/lxX/TQNH5Z9p/bPzdL6tzY89DQws8l66ubm1n63mDXBzc3PryFvJL7avAycCnUrS6oDPAYeVlan/xfAt4GKgW0naZ0t+aR9eVu5Itj7IXksWLJ8KRJ4WQNf86wklbXsBGFpyjncBD+Tp9xVw7z4LDKlw/CDgmbyezQJe4L/ztBeBfUuODwD+UPJL9YwKZX+bp/2htG6yN7++BqzLf5F/b4WyWxtkvwW8CgwrSds9v88JuJXsDxijSj6b7mz8Y8f9Fc59Usnz9/my5+8gsiAtAePLym1PFrQMLDu+HfAZYFXe3p0buA/Nen638llpi/dwb7LgKwGXsfHfUOT3di0bg9sjq1xPpWf0OODg+vaXHN8LmJWXG9fAv/23yP64sEdJ2m5kQWEC/rOoz6XseVgL/KDseair1fNG9vOgPkj+GdCjJO1YsmC92h9C92DjH96+U1Z2ABt/jtxT5L10c3NrP1vNG+Dm5ubWUTfgH0t+AfxgM8rVl7m7Svpv8vRLyo4fydYH2Qn49wbKTyjJd3iF9APY2PPUuwXv7dF5Pb8rO757yS/Vh1Uo17/kl+MZVc75ItCnSr2X53l+VCHt5Xz7dDOv5bSSe7pZWbKe1/r0/6iQ/v6S9D4lxzuzsYfzk1XqHprfr9ep0PvZQJu/k5/3vKKe3618HtrcPQR+nJebW6XcNSV1HlnlemY0dN0VzrlHXu6pCmnX52lvAu+pkH5Cnr60qM+l7Hm4YyvOUfjzxsY3Wp4GOlco9+WSc59Wlvaz/PhlVersmf8sSMDBRd5PNze39rE5u7gktZwT8v2slNLvt6D8f1Q5Pgv4ONkv1C3huibkeTyl9IcKx/+PrPeuK/Aeslett1hE7AqcTBa8vys/LyX7/cuKjCDrKZyXUrq//Hwppb9FxK/J3iAod1K+vymltKxKk24GziR7VbT83IMauJSmeD2l9MsKx+eUfH1NhXr/FBFvAt3I7nl9/g8AuwILUzZb9WZSSnMi4kVgMFmwOKs0PSIOBj5J1ivbh42rkvTP9+X3v1Qtnt+2dA8/ku9/WKWtlwKnV7+U6iKiP9m/i4OBHfN2l85R8L6IqEspra5Q/I6U0nMVjte3u29E9EspLd2StjXgx41laOXn7dh8f31KaW2Fcv9J9hl1LT0YEV3yNgJcWanClNIbEXEX2R9LjiJ7pV/SNsQgW5Jazj75/sEtLP9MleOL8n1LTKyzOKX0tybkq9i2lFKKiL8BO7OV7YuIs4DJZK+SVvPOsu/fl+8fa6DM/1E5yK6f1OyEiDisStlu+X7nKulbo1LgA9kY1Xp/aSDPLmRjW+vVX09dRGz2B4cS9fdwZ/JAKyI6A9OofJ8qla2kFs9vm7iHEdEb2Ck/9qcqZeaRvULdrN/FIuJE4Cdl7dwsG9CPbFxyucY+F8g+m6KD7Gr3oVbPW4M/K1JKqyNiHtkbSaX2BN6Rf31dVJ9/cdd83xI/KyS1cQbZktRyeuX7ar2ijVlV5fj6fF/1t7utUK3O5uTb6vZFxDCycayQ9VD9J9nY1zdSSusiYneygKr8/7H6wOONBk5fLa1vvt8z3xpS10j6lqh2T9OGL1JqLE/pPa+/nt7AoU2o/x0lX3+DLOB5EzifbMzyX4G/539IGUnWM9mlgfO1pee3te9haUC3qFLG/DleQtYT3SQRMZjsVeWuZHMPXE72uvPylNLafJbzdXn2ap9NxetPKa0vCRhb+2dLLZ63Lf1Z0bfk6+b+u5K0jTDIlqSWsyLf92ml+ioFCeW6t0ZDCvCFfP+rlNK/VUiv1qO1Mt831EtaLa2+7MiU0k8aaV97UH89M1NKRzSz7Gn5/hsppUqv4TbUo9iRbOk9LA3OdiQb57+JiNiO5t/Hk8kC7IeBU1JK68vS2+vnclq+b83nbSXZH0+a+7Oi/plIQJeU0roKeSRt41wnW5JazuP5/p9aqb76npz+DeR5b2s0pAC75fuZVdIPqXJ8Xr5vaD3r/aocfyLfl78e2l7VX8/7G1rHuYotvf8dzRbdw5TScrIlwgD+oUq299H8zo76z+X+CgE2tN/PpRbPW4M/KyKijso/L58hm3cigCEt0C5JHYBBtiS1nJvJejuGRcSRrVDfX/L6ukXEZhMERcShtJ8A8u/5fkB5QkR0I5t8rJL6pZj2iojN/rgREe8Cjq9S9r/z/ecjosmv8LZh95OtyfxO4F+bWbah+78X2URS24KtuYe/y/fVntUxW9Cehj6XIHvtuj2qxfM2Pd9/IR8TXu5zbJyDYYN8Mrnb8m/PaYF2SeoADLIlqYWklJ4kWzYH4OaI+GSUDnqM6BYR/9LAJFvNrW8Z2frOAJdFxIZXLPOg+waydXnbg/vy/eiIOKj+YD6r8q+oMplQSul5oH526f+KiH1Kyu5E9oePiuOpU0q3AXeSTRh1b6XPJSJ2j4hzI2JUhbT5+XZiUy6wpaWU3mJjEPDDiPha3ju3QUT0iIhPRUT5jPL19/+7ETGgJP++wP+ycdxvoSJiQkSkiJjfEudvrq28h1PI/r0dEBGXRMT2ef6IiH8lC9orzWrdkPrP5dMR8dGSNvQkWxXg4Gaer0kiYnD+uaQW+oNhLZ63q4DlZDOZ/zgiNgyliYhjyNb0rvbz8ptkQwJOjYhr8p8tG0RE54g4IiKmRcS7W6Dtkto4g2xJallfJVuntR/wP8BrEfFwRDxLNmb7pxS7lNE5ZJMHHQ68HBGPRcQzZEtpPcvG3tq27lrgKbIxkw9FxLyImEu29uzRQKVx2vW+mpcdDPwxIv6Ul/0r2XrI38vzVfrF/STgbrJfvP8QEYvyz2tOPmv6c8DFQKXlunbNt4ZmfW5VKaUbga8B2wFTgaX5MzE7Iv5CFmT8iuyelhpPNvxgKPBCRPwxIp4mm4m5K3BBa11DrW3pPUwp/YmsFzsBY4FFEfEw2TN8HdnSXpVm/27Ib4AZZK+Z3xYRz0fEo2Svpp8GfGlLrrENaPXnLaX0KtncD2uBzwMLI+KRiHiO7I2YO9m4rNm6srLzgE8Ai8mWYXslIp6OiFkR8STZuO0ZwBdpeLI2SR2UQbYktaD81cLjgc+QvT66jmxMcE+ytXj/HxtfWyyivoeBw4DbyYLt9wFvAecCH6P5PWc1kc8AfTjZOrSvko3ZHADcQtZbd08DZReTjeGcDMwn+yPGTsCNZOttv5ZnXVGh7DKytbY/DdzKxs9r7zz/TcApwCVbd4WtJ6V0Gdlycv8BvEB2P4aS/TFgJnAe8KGyMk8Aw8juwWqy56gL2WzW+5N9Ji2hvhdzq9ZXL9qW3MO83NVkwffdZEH6P5C9fj4qpTR2C9qxnmz97Ul5OwaRLTt2L/DBlNJPm3vOJqr/XN4E/lz0yWv1vKWUbiX7WfG/ZL3WQ/L6zyH7g1t973alnxUzgL2AbwGPkP2MOQh4N/Ak2c+fQ6kw6Z2kji9SSo3nkiSpg4iI/wBGA1NTSmfXuj3aKCKeIgtcDk4pPVLr9igTEeeRBfY/Sil9tdbtaQ357O9LyZZi3Del9HgjRSRpA3uyJUnbjIjoTdZLDdVnMlYNRMQOZAH2PQbYbc7hZG/BTK51Q1rRZ8gC7CW0QO+9pI7NIFuS1KFERF1EXBARg8qO7072OuoOZK/a3l6L9qmq+onmJtW0FdpEvnTZocAvUkrza9ycQkXEsRFxUkR0LTkWEfFJ4Ef5oStTSu1imI2ktsPXxSVJLSoi7m9mkRNTSgsbz1a1vh5kM/8CPE82Brsf2TjaAF4HPpxSemhL65DU/kXEv5FNQPcm2frXbwK7A+/Ks9xL9rNiTW1aKKm9MsiWJLWoiGjufzS7bU2PWT6W8hyyCczeS7bG8XqyCYjuAKaklF7a0vNL6hjyNbi/ChxJNrlbb7I/0D1BNsnhj1NK7WXZQ0ltiEG2JEmSJEkFcUy2JEmSJEkFMciWJEmSJKkgBtmSJEmSJBXEIFuSJEmSpIJ0rnUDVF1EPA30JVuCRpIkSZLUenYHXk8p7dWcQgbZbVvfHj169B8yZEj/WjdEkiRJkrYlTz75JCtXrmx2OYPstu35IUOG9J81a1at2yFJkiRJ25Rhw4Yxe/bsZr9V7JhsSZIkSZIKYpAtSZIkSVJBDLIlSZIkSSqIQbYkSZIkSQUxyJYkSZIkqSAG2ZIkSZIkFcQgW5IkSZKkgrhOtiRJkqR2af369SxZsoQ33niDtWvXsn79+lo3SW1Up06d6Ny5Mz179uSd73wnnTq1XH+zQbYkSZKkdmfNmjX89a9/Ze3atRuORUQNW6S2bN26daxbt441a9awbNkydtllF7p27doidRlkS5IkSWp3Fi9ezNq1a6mrq2PHHXeka9euLdo7qfZt/fr1rFmzhkWLFrF69WoWL17Mu9/97hapy6dQkiRJUruzcuVKAAYNGkRdXZ0BthrUqVMn6urqGDRoELDx+WmRulrszJIkSZLUQlJKRASdO/tyrpquc+fORAQppRarwyBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohT8UmS1IjB426vdRParfmTPlrrJkiS1KoMsiVJkiSpmtWr4dFHYcUK6NULDjwQ6upq3Sq1YQbZkiRJklTulVdg6lSYNg1ef33j8b59YeRIOPtsGDiwdu1Tm+WYbEmSJEkq9dhjMHQoTJmyaYAN2fdTpsABB2T5amjGjBlEBBMmTODBBx9k+PDh9OzZkx122IHRo0ezevVqAG6//XaGDRtG9+7d2XHHHTn33HNZu3btZue79dZbOeqoo+jbty/dunVjyJAh/OAHP2DdunWb5Fu+fDkXX3wxRxxxBAMHDmT77bdn4MCBfP7zn+e5557b7LwTJkwgIpgxYwY33ngj++23H3V1dQwYMIAxY8ZsaGdHYZAtSZIkSfVeeQWOPRYWLWo436JFWb4FC1qnXQ146KGHOOqoo+jduzdnnHEGu+yyC1deeSWnn346v/jFLzjxxBPZddddOeOMM+jTpw+TJ0/mu9/97ibnOP/88zn++OOZN28eJ5xwAqNHj6auro5zzjmHk08+eZO8Tz31FN/61reoq6vjk5/8JF/72tc48MADufHGGzn44IN58cUXK7bziiuu4Etf+hLvf//7+cpXvkLfvn25/PLLGTVqVIvdm1rwdXFJkiRJqjd1auMBdr1Fi7L8kye3bJsaMX36dH79619z3HHHAfD2229vCHrvuOMOZs6cyUEHHQTAxIkT2WOPPbjssss4//zz6dKlC3fddReTJk1ixIgR3HzzzXTv3h2AlBKjR4/mqquu4uabb+ZTn/oUAHvvvTevvvoq/fr126Qd9957L0cffTTf+c53uPbaazdr5913382cOXN43/veB8BFF13Efvvtx89//nMmT57MwA7y+r092ZIkSZIE2SRn06Y1r8y0aVm5Gho+fPiGABugS5cunHjiiaSU+PjHP74hwAbo2bMnH/vYx1i6dCkvv/wykPUwA1xzzTUbAmyAiGDSpElEBDfddNOG4717994swK5vx/vf/37uvvvuiu0cM2bMhgAboK6ujlNOOYX169czZ86cLbz6tseebEmSJEmCbBbx8jHYjVm6FObMgcMOa5k2NcF+++232bEBAwY0mrZgwQJ22203Zs+eTffu3ZlW5Q8MdXV1PP3005scmzFjBpdeeikPPfQQixcv3mSM9/bbb1/xPEOHDt3s2KBBgwBYtmxZxTLtkUG2JEmSJEG2TNeWWL682HY0U69evTY71rlz50bT3n77bQCWLl3K2rVrmThxYtU6Vq1ateHrX/7yl5x00kn06NGDESNGMHjwYN7xjncQEVx//fVVx2Q31JbyydXaM4NsSZIkSYJsHewt0bt3se1oZb169SIiWLx4cZPyT5gwgW7dujFnzhz23HPPTdJ+/vOft0QT2xXHZEuSJEkSwIEHZutgN0e/ftlyX+3YBz7wAZYsWcKzzz7bpPzPPfcce++992YB9quvvsrzzz/fEk1sVwyyJUmSJAmgrg5GjmxemZEjs3Lt2FlnnQXAyJEjWbJkyWbpCxcu5Kmnntrw/a677spf/vIXFpXMwv7mm2/yla98ZcMr6Nsyg2xJkiRJqnf22bDjjk3Lu9NOMHZsy7anFRx77LGMHz+e+++/nz322INTTjmFcePGcfrppzN8+HAGDRrErbfeuiH/mWeeyYoVK9h///0566yzGD16NPvssw9PPvkk++67bw2vpG0wyJYkSZKkegMHwvTpjQfaO+2U5esgaztfcMEF3HXXXRx++OHcc889XHLJJdx2222sWbOGCRMmcOqpp27I+9WvfpWrrrqKfv36ce2113LLLbdwxBFHMGvWLPr06VPDq2gbIqVU6zaoioiYdcghhxwya9asWjdFkrZpg8fdXusmtFvzJ3201k2Q1EHVLym11157tUwFCxbA1KnZOthLl2483q9f9or42LEdJsDe1jT12Rk2bBizZ8+enVIa1pzzO7u4JEmSJJUbOBAmT4YLLsjWwV6+PJtFfOjQdj8GWy3LIFuSJEmSqqmrg8MOq3Ur1I44JluSJEmSpIIYZEuSJEmSVBCDbEmSJEmSCmKQLUmSJElSQQyyJUmSJEkqiEG2JEmSJEkFMciWJEmSJKkgBtmSJEmSJBXEIFuSJEmSpIIYZEuSJEmSVBCDbEmSJEmSCmKQLUmSJEkqxIwZM4gIJkyYsMnxwYMHM3jw4Jq0qbUZZEuSJEmSVJDOtW6AJEmSJLVVq99ezaMLHmXFmhX06tqLAwceSF2Xulo3q9255557at2EVmOQLUmSJEllXlnxClNnT2Xa/03j9Tdf33C8b7e+jNx/JGcPO5uBPQfWsIXty3ve855aN6HV+Lq4JEmSJJV4bOFjDL1mKFNmTdkkwAZ4/c3XmTJrCgdcfQCPLXysRi3MlI5/fvDBBxk+fDg9e/Zkhx12YPTo0axevRqA22+/nWHDhtG9e3d23HFHzj33XNauXbvZ+W699VaOOuoo+vbtS7du3RgyZAg/+MEPWLdu3WZ5V69ezbhx49h555035L322murtrXSmOwFCxbw7W9/m0MOOYT+/fvTtWtXBg8ezOjRo/nb3/622TlOO+00IoIXXniByy+/nL322ouuXbuy6667MnHiRNavX9/MO9gyDLIlSZIkKffKilc49mfHsmjVogbzLVq1iGN/diwL3ljQSi2r7qGHHuKoo46id+/enHHGGeyyyy5ceeWVnH766fziF7/gxBNPZNddd+WMM86gT58+TJ48me9+97ubnOP888/n+OOPZ968eZxwwgmMHj2auro6zjnnHE4++eRN8q5fv55PfOITXHzxxfTt25cxY8ZwyCGHMHbsWKZMmdLkds+cOZMpU6aw4447csopp3DmmWfynve8hyuvvJJhw4axfPnyiuXOOeccLrzwQoYNG8aXv/xlACZMmMD48eObeedahq+LS5IkSVJu6uypjQbY9RatWsTUWVOZfMzkFm5Vw6ZPn86vf/1rjjvuOADefvttDjzwQG688UbuuOMOZs6cyUEHHQTAxIkT2WOPPbjssss4//zz6dKlC3fddReTJk1ixIgR3HzzzXTv3h2AlBKjR4/mqquu4uabb+ZTn/oUADfccAN33303xx57LLfddhvbbbcdAGPGjOHAAw9scrs/+MEPsnDhQnr06LHJ8RtuuIEvfOELXHHFFXzzm9/crNzcuXN5/PHHGTBgAADjx49nzz335Ic//CHf/va32X777Zt5B4tlT7YkSZIkkU1yNu3/pjWrzLTHprH67dUt1KKmGT58+IYAG6BLly6ceOKJpJT4+Mc/viHABujZsycf+9jHWLp0KS+//DIAV1xxBQDXXHPNhgAbICKYNGkSEcFNN9204fgNN9wAwEUXXbQhwAbYZ599+NznPtfkdvfv33+zABvgc5/7HL169eLuu++uWG78+PEbAmyAd73rXRx33HG88cYbzJs3r8n1txR7siVpGzF43O21boIkSW3aowse3WwMdmOWrl7KnFfncNguh7VQqxq33377bXasPghtKG3BggXstttuzJ49m+7duzNtWuU/MNTV1fH0009v+P6Pf/wj3bt354ADDtgs7+GHH86Pf/zjJrf9f/7nf7j66quZO3cur7/++ibjvxcsqPwq/tChQzc7NmjQIACWLVvW5LpbSrsKsiPiX4DDgaHAPsD2wBdTStdXyd8LmAB8CtgJeBX4JTAxpbSyQv5OwFeBLwF7ACuBu4FvppSer1LHCODfgQOABMwBvpNS2nbmqJckSZI6gBVrVmxRueVvVh473Fp69eq12bHOnTs3mvb2228DsHTpUtauXcvEiROr1rFq1aoNXy9fvpydd965Yr4dd9yxye2eMmUK3/jGN9hhhx045phjGDRoEHV12fJol156KWvWrKlYrqFrqjRJW2trV0E28B1gV2AxWcC8a7WMEdEduA/YD7gTuAnYH/gGcERE/HNK6c2yYlcDo4A/AZcDA4HPAMdExCEppWfL6vgX4KfAa8D1+eGTgLsi4jMppV9t+aVKkiRJak29um4evDVF7269C25J6+rVqxcRweLFi5uUv3fv3rz22msV0xYtatp49rVr13LhhRcyYMAAHnvsMfr3778hLaXE97///Sadpy1qb2OyRwGDU0o7AFc1kvdcsgD74pTSiJTSuJTSCOBi4CBgbGnmiBien38mcEBK6byU0ueA44F+wBVl+fsCPyQL+A9IKZ2ZUjqTrEd7CXBlRPTcusuVJEmS1FoOHHggfbv1bVaZfnX9GDpg89eX25MPfOADLFmyhGeffbbxzMC+++7LqlWrmDt37mZpf/jDH5p0jsWLF7N8+XKGDRu2SYAN8Oijj25Yfqw9aldBdkrp7pTSi43li4ggC5hXAheWJV+YHx9Vdvz0fD8+pfRWSZ2/A2aQ9WbvUpL/00Af4IcppZdL8r9MFpC/C/hkEy5LkiRJUhtQ16WOkfuPbFaZkfuNpK5LXQu1qHWcddZZAIwcOZIlS5Zslr5w4UKeeuqpDd/XT272zW9+c5PXs5944gl++tOfNqnO/v37U1dXx9y5c/n73/++4fjrr7/OmWeeuUXX0Va0qyC7GfYke9X7gZTSqtKE/PsHgN0jonQgwZFAfVq5O/L9EWX5IXsVvSn5JUmSJLVxZw87mx27N21c8U49dmLssLGNZ2zjjj32WMaPH8/999/PHnvswSmnnMK4ceM4/fTTGT58OIMGDeLWW2/dkP8LX/gCRx99NNOnT2f//ffnvPPO4/TTT2fYsGEcc8wxTaqzU6dOjB49mvnz57Pvvvty9tlnM2rUKIYMGUKnTp0YOHBgS11ui+vIQTZAtfcdni3Nl4/fHgC8kFKqNFJ+k/xNqKNS/qoiYlalDRjSlPKSJEmSijGw50Cm/8v0RgPtnXrsxPRTpzOwZ/sNBktdcMEF3HXXXRx++OHcc889XHLJJdx2222sWbOGCRMmcOqpp27I26lTJ2699VbOPfdcli5dymWXXcaDDz7I1KlT+frXv97kOr/3ve9x0UUXERH86Ec/4q677uKUU07hzons33wAACAASURBVDvvpEuXLi1xma0iUkq1bsMWiYhxwPeoMLt4RHwW+C/gopTS/6tQ9iKyGcFPSCndEhEDgVfIer43m3s/Ij5E1mN9eUppTH7sGbIguktKaW1Z/i7AW8DjKaV9m3Ats6okDTnkkEN6zJpVLVmSms4lvFQL8yd9tNZNkNRB1S8ptddee7XI+Re8sYCps6Yy7bFpLF29dMPxfnX9GLnfSMYOG9thAuxtTVOfnWHDhjF79uzZKaVhzTl/e5tdvEOq9qHlwfchrdwcSZIkaZs3sOdAJh8zmQuGX8CcV+ew/M3l9O7Wm6EDhrb7MdhqWR01yK5fqK7aXPq9yvI1N395mfLZASrllyRJktTO1HWp47BdNnvZVaqqo47JbmxM9CbjqfPJ0F4FdouI7RrL34Q6GhsTLkmSJEnqgDpykL0AODSf1GyD/PtDySY5e6kk6T6gPq3ciHw/syw/QKXp80aU5ZEkSZIkbQM6ZJCdstncrgN6AOPLksfnx68tO35Nvr8wIravPxgRHyZbruvOsjW6/5vsdfAzI2JQSf5BwL8Bi4FbtvpiJEmSJEntRrsakx0Ro4D6ARH75PtREXFk/vX9KaXr8q+/DxwHnBcR+wNzgQPIep4fAS4tPXdK6d6IuA4YBcyNiNvJlvU6CVgKnFmW//WI+Dfgp3n+X+RJJwHvBE5KKb2x9VctSZIkSWov2lWQTRZgf6Hs2KFs+or3dZCNs46II4AJwKeA4WTjrqcAE1NKqyuc/wzgCeBLwBhgJVlv9DdTSs+VZ04p/SwiFpMtB/ZFIAFzgO+klO7ewmuUJEmS1IiIYP369aSUiIhaN0ftREqJlBKdOrXcS93tKshOKZ0GnNaM/MuBsfnWlPzrgcvzral1TAemNzW/JEmSpK3XtWtXVq9ezapVq+jRo0etm6N2YtWqVUD2/LSUDjkmW5IkSVLH1rNnTwAWLlzIypUryaZlkipLKbFy5UoWLlwIQK9evRopseXaVU+2JEmSJAH07duXVatWsWrVKl56KVs0yNfGVU3pH2G6d+9Onz59Wqwug2xJkiRJ7U6nTp0YNGgQy5YtY8WKFaxZs8bebFXVqVMnunbtSq9evejTp49jsiVJkiSpXKdOnejXrx/9+vWrdVOkDRyTLUmSJElSQezJliRJLWbwuNtr3YR2af6kj9a6CZKkLWRPtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkE6dJAdmRMi4t6IeDUi/h4R8yLi6ojYvUL+XhFxSUS8GBFrImJ+REyOiB5Vzt8pIs6MiCciYnVEvBYRN1U6tyRJkiSp4+vQQTbwA+Bm4H3Ar4EfAi8ApwOPRcSQ+owR0R24DxgLPA1MBeYB3wB+HxHdKpz/auByIPL9dOAE4JGI2LOFrkmSJEmS1EZ1rnUDWkpE7AR8DXgR2DeltLwkbSxwCXA2MDI/fC6wH3BxSmlcSd5JwHlkwff3So4PB0YBM4EPpZTeyo/fCPwWuAIY0VLXJ0mSJElqezpyT/Zgsut7oDTAzt2W73eA7LVysoB5JXBhWd4L8+Ojyo6fnu/H1wfYACml3wEzgGMiYpetuwRJkiRJUnvSkYPsZ4G3gEMjoldZ2sfy/T35fk9gIFlAvqo0Y/79A8DuEbFzSdKRQH1auTvy/RFb3HpJkiRJUrvTYV8XTyktiYhxwBTg6Yi4FVgB7At8EPgR2SvdkAXZkAXmlTxL9ur3nsBL+fjtAcCTKaV1VfKXnrdBETGrStKQKsclSZIkSW1Qhw2yAVJKUyPiFeA64MslSfcDN6aU1ubf98735a+V11tRlq+5+SVJkiRJ24CO/Lo4EfEt4GfAd4GdgZ7A4UA3YEZEfKKGzdsgpTSs0gY8Weu2SZIkSZKarsMG2RFxNDARuCKlNCml9HJKaWVK6X7g48DbZK+Sw8Ye6Wo9z73K8jU3vyRJkiRpG9Bhg2zgw/n+3vKElNJCsrWw94iIHjQ+hnqTMdv5ZGivArtFxHaN5ZckSZIkbRs6cpC9fb7foUr6DsB6sh7tZ4EFZDORdy/NlH9/KPBCSumlkqT7gPq0cvXrY8/csqZLkiRJktqjjhxk1y+tdXZEbPJad0R8GRgEzEoprUkpJbLJ0XoA48vOMz4/fm3Z8Wvy/YURUR/QExEfJlve686U0otFXIgkSZIkqX3oyLOL/xL4CvDPwDMR8RtgGXAA2RJeq4GzS/J/HzgOOC8i9gfm5nmPAR4BLi09eUrp3oi4DhgFzI2I28mW9ToJWAqc2XKXJkmSJElqizpsT3a+fvUxwPnAK8Bnga8B7yObcXxoSunhkvyrgCPIgum9ga8De5FNjnZUSml1hWrOAMbkX48BPgLcAhycUnqmBS5LkiRJktSGdeSebFJKa4BJ+daU/MuBsfnWlPzrgcvzTZIkSZK0jeuwPdmSJEmSJLU2g2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEMsiVJkiRJKohBtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgrSudYNkCRJ0qYGj7u91k1ot+ZP+mitmyBpG2dPtiRJkiRJBTHIliRJkiSpIAbZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiRJUkEKCbIj4ryI6F/EuSRJkiRJaq+K6sn+HvBSRPwqIkYUdE5JkiRJktqVooLsUcBc4ATgtxExPyLGR8Sggs4vSZIkSVKbV0iQnVKallIaBgwBLge6AxOBFyLifyPiExHh+G9JkiRJUodWaOCbUvpzSmksMBA4BZgBfAS4hex18u9ExO5F1ilJkiRJUlvRIr3LKaW3U0q/SCl9CDgMeBUYAPw78ExE/DYiPtASdUuSJEmSVCstEmRH5iMRcQtZb/ZA4EXgO8B0YATwQER8viXqlyRJkiSpFjoXebKI2AX4V+CLwLuB9cDtwNXA9JRSyvP9A3Ab8C3ghiLbIEmSJElSrRQSZEfEiWQzjB9N1jv+CnABcF1K6ZXy/CmlP0fET8leH5ckSZIkqUMoqif7v8l6re8ArgJuTymtb6TM08D9BdUvSZIkSVLNFRVkXwRcm1L6a1MLpJRuAm4qqH5JkiRJkmqukCA7pTS+iPNIkiRJktSeFTK7eET8U0RcEhE7VUkfkKcfUkR9kiRJkiS1RUUt4fV14OMppYWVElNKrwIfA8YWVJ8kSZIkSW1OUUH2QTQ+idlMwJ5sSZIkSVKHVVSQ3Z9s2a6GLMzzSZIkSZLUIRUVZC8Ddmkkz67AyoLqkyRJkiSpzSkqyJ4NfDIidq6UGBG7AMcDDxZUnyRJkiRJbU5RQfYlwDuAByLi8xExADbMKv4F4AGgDphSUH2SJEmSJLU5Ra2TPTMiziYLon8CEBEJiDzLemBMSmlmEfVJkiRJktQWFRJkA6SULouIe4Evk8023ptsrPbDwFUppSeLqkuSJEmSpLaosCAbIKX0ODC6yHNKkiRJktReFDUmW5IkSZKkbV6hPdkRsRMwFOgDbFcpT0rphiLrlCRJkiSprSgkyI6IbsC1wMlU7x0PIAEG2ZIkSZKkDqmonuxJwKnAM8BNwMvA2oLOLUmSJElSu1BUkP0Z4M/A0JTSmoLOKUmSJElSu1LUxGd9gOkG2JIkSZKkbVlRQfY8YMeCzlW4iPhkRNwVEUsi4s2IeCEiboqIncvy9YqISyLixYhYExHzI2JyRPSoct5OEXFmRDwREasj4rX8vLu3zpVJkiRJktqSooLsycBxEbFHQecrRGSuBv4H2A34OXAp8Afgn4BdS/J2B+4DxgJPA1PJ/njwDeD3+eRu5a4GLieb1O1yYDpwAvBIROzZQpclSZIkSWqjihqT/TJwB/BwRFwKzAVWVMqYUppZUJ1NcRbwJeBHwFkppXWliRFRev3nAvsBF6eUxpXkmQScRxZ8f6/k+HBgFDAT+FBK6a38+I3Ab4ErgBEtcE2SJEmSpDaqqCB7BtnyXAFMyL+upuL62UWLiDrg28DzwJjyABsgpbQ2zxtkAfNK4MKybBcCX83Tv1dy/PR8P74+wM7P+buImAEcExG7pJT+WswVSZIkSZLauqKC7AtoOLCuhWOAvsBPgO0i4hPAe4FlwN0ppb+U5N0TGAjckVJaVXqSlNKqiHgAGBERO6eUXsqTjgRWAQ9UqPuOPP0I4KeNNTQiZlVJGtJYWUmSJElS21FIkJ1SmlDEeQo2NN+vAx4nC7DrrY+IqSmlb+Tf14+ffrbKuZ4le/V7T+ClfPz2AODJSj3kJedxXLYkSZIkbUOK6slui/rn+7PJxogfDDwF7A9cA3w9Ip5LKV0J9M7zLq9yrvrx5b3L9k3N36CU0rBKx/Me7kOacg5JkiRJUu0VNbs4ABGxf0R8PyJ+ExF3lxzfNSI+ExH9iqyvEfXX9hZwfErpkZTSypTSH4BPA+uBr7dieyRJkiRJHVxhPdkR8X2yoDXyQ6VjtAO4MU+/rKg6G1Hfy/xoSmlBaUJK6cmIeB7YIyL6lOSt1vPcq+yczc0vSZIkSdoGFNKTHRFfJFtP+jbgH9l0Fm5SSvOBh4FPFFFfE83L98uqpNcfr6PxMdSbjNnOJ0d7FdgtIirNlt7YGG9JkiRJUgdU1Ovio8nGO38qpfQk2Sva5Z6mdScCuzff712eEBFdgD3IZgd/jSwYXgAcmk9qVpq3O3Ao8ELJzOIA9wH1aeXq18duzTXBJUmSJEk1VlSQ/Q/AXfXrTlexiI2TkbW4lNJzwJ1kr4SPKkseB/QBbkkprU0pJeA6oAcwvizv+Pz4tWXHr8n3F0bE9vUHI+LDZMt33ZlSerGIa5EkSZIktQ9FjcleC2zfSJ6BwMqC6muq0cCDwLURcTxZb/r+wAeBF4FzSvJ+HzgOOC8i9iebkfwAsvW2HwEuLT1xSuneiLgOGAXMjYjbyZb1OglYCpzZgtclSZIkSWqDiurJfgL4YJXxyUTEO4CjgTkF1dckeW/2gcD1ZOtmn0X2yvp/AAenlBaW5F0FHEEWTO9NNknbXsAU4KiU0uoKVZwBjMm/HgN8BLglP/czLXBJkiRJkqQ2rKie7Glkr1tfFRH/VpoQEb3ytJ3YGJC2mnwc9RebmHc5MDbfmpJ/PXB5vkmSJEmStnGFBNkppWkRcTTwr2SvSy8DiIiHyXqFuwPXp5R+VUR9kiRJkiS1RUW9Lk5K6bNkr0+/ALybbG3sA4G/Al9JKY0sqi5JkiRJktqiol4XByCldC3ZJGN1QF9gRUqptSc7kyRJkiSpJgoNsuvlk4RVmihMkiRJkqQOq7DXxSVJkiRJ2tYV0pMdEeuB1ISsKaXUIr3nkiRJkiTVWlEB70wqB9m9ydal7g78kXzWcUmSJEmSOqKilvA6slpaRLwDmAQcC3yoiPokSZIkSWqLWnxMdkrp7ymls4DlwOSWrk+SJEmSpFppzYnP/gB8tBXrkyRJkiSpVbVmkL0D0KMV65MkSZIkqVW1+EzfEdEJOBU4CXi0peuTJEmSJKlWilrC6/kGzt8f6AK8DZxfRH2SJEmSJLVFRfVkd6LyEl5vA08CjwBXpJT+VFB9kiRJkiS1OUUt4TW4iPNIkiRJktSetebEZ5IkSZIkdWgG2ZIkSZIkFaSoic9+v4VFU0rpqCLaIEmSJElSrRU18dmR+T4BUSG9oeOSJEmSJHUIRb0uXgfcBjwDfA4YnB8bDHw+P/6/QF1KqVPJtl1B9UuSJEmSVHNFBdkTgX2Ag1JK/5VS+mtKaU2+/xnwAWDfPJ8kSZIkSR1SUUH2Z4GbU0orKyWmlFYANwOnFFSfJEmSJEltTlFB9g5Al0bydAb6F1SfJEmSJEltTlFB9nPApyPinZUSI2IH4DPAXwqqT5IkSZKkNqeoIPtSYCdgbkSMiYihEbFzvv8aMIesF3tqQfVJkiRJktTmFLKEV0rpuogYAIwHLilLDmAdMCGlNK2I+iRJkiRJaouKWieblNKFEXEjcCrwj0BvYDnwR+DGlNJzRdUlSZIkSVJbVFiQDZAH0hcUeU5JkiRJktqLosZkbyIi+kXEzi1xbkmSJEmS2qrCguyI6B0Rl0XEIuA14IWStA9ExG8jYmhR9UmSJEmS1NYUEmRHRD/gIeBM4CXgKbIJz+o9DhxKNl5bkiRJkqQOqaie7AnAe4GTU0oHAr8sTUwprQbuAz5YUH2SJEmSJLU5RQXZnwBuSyn9dwN55gODCqpPkiRJkqQ2p6ggewDw50byrAG6F1SfJEmSJEltTlFB9hKgsdnE9wJeLag+SZIkSZLanKLWyZ4JHBcRg1JKL5cnRsQ/AMcCPymoPkmSJGkzg8fdXusmtFvzJ3201k2QOoSierIvArYDHoiIU4F3AUTE3hHxr8DvyV4Xn1xQfZIkSZIktTmF9GSnlJ6IiJOAnwI35IcDeDLfvwF8JqX0bBH1SZIkSZLUFhX1ujgppd9ExG7AF4APAP2AFWTrZ/8kpbS4qLokSZIkSWqLCguyAVJKS4GpRZ5TkiRJkqT2opAx2RGxLiL+q4hzSZIkSZLUXhU18dkK4KWCziVJkiRJUrtUVJD9MLBvQeeSJEmSJKldKirIngB8MCI+X9D5JEmSJElqd4qa+OxDwAzgJxFxJvAIsAhIZflSSunCguqUJEmSJKlNKSrInlDy9dB8qyQBBtmSJEmSpA5pi4LsiPgE8HRK6Zn80PDimiRJkiRJUvu0pT3ZtwATgQvy738CXJpSuryQVkmSJEmS1A5t6cRnbwNdSr4fDPTZ6tZIkiRJktSObWmQ/VfgsIjYruRY+SRnkiRJkiRtU7b0dfEbgW8BSyNiSX5sbER8sZFyKaX0ni2sU5IkSZKkNm1Lg+zvAG8CHwUGkvViR741pLF0SZIkSZLarS0KslNKa4FJ+UZErAemppQuaLCgJEmSJEkd2JaOyS43EZhR0LlaVEScFxEp3w6pkN4rIi6JiBcjYk1EzI+IyRHRo8r5OkXEmRHxRESsjojXIuKmiNi95a9GkiRJktSWFBJkp5QmppRmFnGulhQRQ8j+ILCqSnp34D5gLPA0MBWYB3wD+H1EdKtQ7GrgcrJX4S8HpgMnAI9ExJ5FX4MkSZIkqe0qqie7zYuILsB/Ao+RrfNdybnAfsDFKaURKaVxKaURwMXAQWTBd+k5hwOjgJnAASml81JKnwOOB/oBV7TIxUiSJEmS2qRtJsgGvgm8HxgJrCtPjIggC5hXAheWJV+YHx9Vdvz0fD8+pfRW/cGU0u/IXp8/JiJ2KaLxkiRJkqS2b5sIsiPiALIge2JK6c9Vsu1JNlP6AymlTV4nz79/ANg9InYuSTqS7NXzByqc7458f8RWNF2SJEmS1I5s6RJe7UZEdAVuIHtN/PsNZK0fP/1slfRngRF5vpfy8dsDgCdTSpv1jJecp9Fx2RExq0rSkMbKSpIkSZLajg4fZAMXkAW6Q6sEw/V65/vlVdJXlOVrbn5JkiRJUgfXoYPsiBhGNjP4hJTSk7VuTzUppWGVjuc93JstMyZJkiRJaps6bJAdEZ3JZhN/HJjUhCL1PdLVep57leVrbn5JBRg87vZaN0GSJEmqqsMG2UAPNo6HfiubPHwzs/LjnwTqJ0SrNoZ6kzHbKaVVEfEqsFtEbFfhVfTGxnhLkiRJkjqYjhxkrwF+XCXtn8mC4N8ArwHzyYLhBcChEdG9dIbxfJKzQ4EXUkovlZznPuDkPG1mWR0j8n35cUmSJElSB9Vhg+yU0mo2X9cagIi4nizI/l5KaXbJ8euAbwHjgXElRcaT9Yx/t+xU15AF2RdGxIfq18qOiA+TLe91Z0rpxSKuR5IkSZLU9nXYIHsLfR84DjgvIvYH5gIHAMcAjwCXlmZOKd2bB+ajgLkRcTvZsl4nAUuBM1ux7ZIkSZKkGutU6wa0Jfkr4keQBdN7A18H9gKmAEflvePlzgDG5F+PAT4C3AIcnFJ6psUbLUmSJElqM7bJnuyU0mnAaVXSlgNj860p51oPXJ5vkiRJkqRtmD3ZkiRJkiQVxCBbkiRJkqSCGGRLkiRJklQQg2xJkiRJkgpikC1JkiT9//buPdiyqr4T+PcnIHRoaSGjNG0URZmYDBnQ9kEH5GEZ1BKTKI4PNMJQqFRFR8EewagRNaOog46WGaMYnxmRGCNxBIHxHQgM2AwJZBQZFVABMSrPNO81f+x99XC4l+57e/c9p29/PlW79u291l5nna5V99zvWXvtDTAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAxEyAYAAICBCNkAAAAwECEbAAAABiJkAwAAwECEbAAAABiIkA0AAAADEbIBAABgIEI2AAAADETIBgAAgIEI2QAAADAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAxEyAYAAICBCNkAAAAwECEbAAAABiJkAwAAwEC2nXQHAACAyXvkCWdMugtbrCtPetaku8AUMZMNAAAAAxGyAQAAYCAuFwcAANgELrVfuKV4qb2ZbAAAABiIkA0AAAADWbIhu6oeVlWvqapzqurqqrqjqq6rqs9V1ZPnOGenqnpPVV1VVbdX1ZVV9e6qWj5H/QdU1auq6tKqWl9VP62qU6tqj8377gAAAJhGSzZkJ3lVkvcm2SPJOUlOTnJukj9I8g9V9YLRylW1Y5JvJDk2yXf6cy9PsjbJV6tqh1le40NJ3p+k+v1ZSZ6b5KKq2nMzvCcAAACm2FK+8dmFSQ5qrX1j9GBVPSXJV5J8sKpOb63d3he9Lsk+Sd7ZWjthpP5JSY5PF77fMXL84CRHJ/lmkt9rrd3RH/90kjOTfCDJ0zfTewMAAGAKLdmZ7Nba344H7P743yf5WpKdk/xOklRVpQvMtyR529gpb+uPHz12/GX9/k0zAbtv/0tJvp7kkKp6xKa/EwAAALYUSzZkb8Cd/f6ufr9nklVJzmut3Tpasf/3eUn2qKqHjxQdlGSmbNzZ/f7AoToMAADA9FvKl4vPqp9dflqSa5Nc2h+eWT99xRynXZHu0u89k/ywX7+9W5LLWmt3z1F/tN0N9en8OYr22pjzAQAAmA5b1Ux2VW2X5FNJtk9y/EhAXtHvb5zj1JvG6s23PgAAAFuBrWYmu6oekOTjSQ5Ickpr7VOT7dGvtNbWzHa8n+Hed5G7AwAAwAJtFTPZfcD+aJLDk/xVkmPGqszMSM8187zTWL351gcAAGArsORDdh+wP5bkiCSnJjmytXbPWLUNraG+15rt/mZo1yZ5VFVts6H6AAAAbB2WdMgeCdgvTXJakj+6nxuVXZNkv/6mZqNt7JhkvyQ/aK39cKToG0lmysbNPB/7m5v2DgAAANiSLNmQPXKJ+EuTfDbJS+YI2GmttSQfSbI8yZvGit/UHz9l7PiH+/3bquqBI6/7zHSP9zqntXbVJr4NAAAAtiBL+cZnf5ruEvFbknw3yRurarzO6a21S/qf35XkD5IcX1WPS3JxkscnOSTJRUn+2+iJrbWvVdVHkhyd5OKqOiPdY71ekOTnSV61Od4UAAAA02sph+xH9vvlSd4wR50rk1ySdOusq+rAJCcmOSzJwenWXZ+c5C2ttfWznP+KdM/afnmSV6cL9J9P8obW2veGeBMAAABsOZZsyG6tHZnkyHmec2OSY/ttY+rfk+T9/QYAAMBWbsmuyQYAAIDFJmQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAxEyAYAAICBCNkAAAAwECEbAAAABiJkAwAAwECEbAAAABiIkA0AAAADEbIBAABgIEI2AAAADETIBgAAgIEI2QAAADAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCDbTroDsDV65AlnTLoLAADAZmAmGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAzEI7xYMI+hAgAAuDcz2QAAADAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhewBV9cSqOrOqbqiqW6vqgqp6/qT7Gqs6HAAAERtJREFUBQAAwOLadtId2NJV1cFJzk5yW5LPJLk5yWFJTquqh7fWTp5k/wAAAFg8ZrI3QVVtm+SUJPckOaC19vLW2muT7J3ku0neXlW7T7KPAAAALB4he9M8Ncmjk3y6tXbJzMHW2o1J3p7kgUmOmFDfAAAAWGQuF980B/X7c2YpO7vfH7ihRqrq/DmK9lpAnwAAAJgQIXvT7NnvrxgvaK1dV1W3jNRZcq486VmT7gIAi+yM756RQ089dN7nffFFX8yz/u39fG6ccUZy6PzbzRe/mDzL5xEA00PI3jQr+v2Nc5TfNFJnTq21NbMd72e4911Y1wBgeDttv9OCzluxwwY+DndaWLtZscGPWQBYVNZkAwAb7QmrnpCdd9h5XufssmyXrN5t9QYafkKy8/zazS67JKs30C4ALDIhe9PMzGDP9TX6Tpl7lhsAtjjLtluWox531LzOOWqfo7Jsu2UbaHhZctT82s1RR3XnAcAUEbI3zcxa7Pusu66qlUmWZ5b12gCwJTtuzXHZdcddN6ruyuUrc+yaYzey4eOSXTeu3axcmRy7ke0CwCISsjfNN/r9IbOUPX2sDgAsCasetCpnveSsDQbtlctX5qwXn5VVD1q1kQ2vSs46a8NBe+XKrt6qjWwXABaRkL1pvpLk+0kOr6p9Zg5W1Yokf5LkjiSfnFDfAGCz2WflPrn4FRdn7Zq12WXZLvcq22XZLlm7Zm3WvXxd9l659zwb3ie5+OJk7dpuzfW9Gt6lO75uXbL3PNsFgEVSrbVJ92GLVlUHp3sm9m1JPpPk5iSHJdk9ydrW2smb0Pb5++67777nnz/XY7QBYPLW37k+665dlxtvuzErdliR1but3vAa7I1qeH0XqG+8sbuL+OrV1mADsGjWrFmTCy644IK5ngY1F4/w2kStta9V1f5J3pLkBUm2S3JpkuNba6dNtHMAsAiWbbcs+z9i/83Q8LJk/83QLgBsRkL2AFprFyZ55qT7AQAAwGRZkw0AAAADEbIBAABgIEI2AAAADETIBgAAgIEI2QAAADAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAOp1tqk+8Acquony5cvf+hee+016a4AAABsVS677LLccsst17fWdp3PeUL2FKuq7yTZOcn3F/mlZ1L9ZYv8ujAUY5ilwDhmS2cMs6UzhtkjyS9aa4+dz0lCNvdRVecnSWttzaT7AgthDLMUGMds6YxhtnTGMAtlTTYAAAAMRMgGAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBA3F0cAAAABmImGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAxEyAYAAICBCNn8UlU9sarOrKobqurWqrqgqp4/6X7BjKp6WFW9pqrOqaqrq+qOqrquqj5XVU+e45ydquo9VXVVVd1eVVdW1buravli9x/mUlXHV1Xrt31nKTeOmUpV9Zyq+l9V9bOquq2qflBVp1bVw8fqGcNMleo8t6q+VlXXVtW/VtXlVfWhqtpjlvrGMButWmuT7gNToKoOTnJ2ktuSfCbJzUkOS7J7krWttZMn2D1IklTVSUmOT/K9JF9P8tMkeyb5wySV5PDW2mkj9XdMcm6SfZKck+T/JHlckkOSXJTkgNbabYv4FuA+qmqvJN9KcleSHZOsaa1dMFJuHDN1qqqS/EWSl6f7nXx2ur8dViU5MMmLW2vn9nWNYaZOVZ2c5Lgk1yb5uyQ3Jdk73bi8JcnvttYu6+saw8xPa822lW9Jtk3y/9IF7H1Gjq9IcnmS25PsPul+2mxJnpvkwFmOPyXJHUl+nmT7keNvSdKSnDRW/6T++Osn/Z5sW/eWZLsk65JckORT/bjcd6yOcWybui3Jq/vx9+dJtpmlfNuRn41h21RtSVYmuTvJlUlWjJUd24/Lj44cM4Zt89rMZJOqOiTdN9Afa60dNVZ2RJKPJ3lza+2tE+gebJSqOjvdN8pPbK19q59l+VGSnZKsbK3dOlJ3xyTXJbm+tfboiXQYklTViUlOSPL4JK9LckRGZrKNY6ZRVS1L8uMkv0jym621u+6nrjHM1OmX5Zyf5NOttRePle2Z5LtJvthae7YxzEJYk02SHNTvz5ml7Ox+f+DidAUW7M5+P/PH3p7pLls8b/QDMUn6f5+XZI/xdYOwWKrq8UnekOQtrbX/O0c145hpdEiSnZOcnmSbfl3rCVV1TFU9ZqyuMcw0uiLdFXD7VdVOY2WH9vuv9HtjmHkTskm6Xx5J9wvnXlpr16Vbl7LneBlMi6p6RJKnpVtXdWl/eM5xPXbc2GbRVdX2ST6Z5JIk77qfqsYx02h1v787yT8l+VySdyT5YJLLq+q/jtQ1hpk6rbWfpbuK6BFJvlNVH6yqd1bVWUnemeS/J/lAX90YZt62nXQHmAor+v2Nc5TfNFIHpkpVbZduLev2SY5vrd3dF23MuB6tB4vpren+IFs9MmZnYxwzjR7a749LcnGSJyX5drobQX04yWur6nuttQ/GGGZKtdbeW1U/TvKRJMeMFJ2b7jLymSvjjGHmzUw2sMWqqgeku2fAAUlOaa19arI9gg2rqjVJ1ib5s9bfuRa2MDN/P96R5A9baxe11m5prf19kv+Q5J4kr51Y72AjVNWfJvmrJG9P8vAkD0p3I9Udkny9qn5/gt1jCydkk/zqm7m5voHbKXN/ewcT0QfsjyY5PN2H5DFjVTZmXI/Wg82uqrZN8ol0l9ietBGnGMdMo5nx9q3W2jWjBf0XR99P8uiqenCMYaZQVT0t3R3DP9BaO6m19qP+i6Jzkzw73X1eZh5fawwzby4XJ7n3WpJ1owVVtTLJ8iQXLnanYC59wP5YkpcmOTXJka21e8aqbWiN1IbWWMHmsDy/Gnt3dDetvY/z++PPSTJzQzTjmGlyeb+/YY7ymePL4ncx0+mZ/f5r4wWtteuq6jtJHldVy2MMswBCNknyjSSvT3e30M+MlT19pA5M3FjAPi3JH82xpvWKJNeku3PojrM8cmO/JD9orf1wEboNM25P8pdzlB2Q7o+1LyT5abrntxrHTKOZYPJb4wX9fTIek+TWdOP4uhjDTJ8H9vuHzFH+kHTLHu6M38MsgMvFSbpHFHw/yeFVtc/MwapakeRP0q25+uSE+ga/NHKJ+EuTfDbJS+a6aVRrraW7mcnyJG8aK35Tf/yUzddbuK/W2vrW2tGzbUn+oa/2jv7YJcYx06i19r10j/18TFUdPVZ8QpIHJ/l8a+0uY5gpdV6/P67/e/eXquqYJL+R5PzW2u3GMAtR3bhha1dVB6d7JvZt6Wazb05yWJLdk6xtrZ18P6fDoqiqE5O8Od1j5d6XXz0Te9TprbVL+vo7pvsg3TvdH4QXJ3l8uqs2LkpyYGtt/ebvOWxYVX08yRFJ1rTWLhg5bhwzdarq0em+GHpokjOSfCfd3cWfmuSqJPv2jwE1hpk6VbVNkq+mu4Lo+nRXEN2Qblw+Ncn6JAe11i7s6xvDzIuQzS9V1ZPS3QTid5Nsl+55w+9prZ020Y5BbySE3J//2Fr7+Mg5K5KcmO5Lo5XpnqX92SRvaa3dvFk6CgswV8juy4xjpk5VPTzd4+iekeTX010a/oUkb22tXT9W1xhmqlTV9kmOTfL8JL+Z7hLyn6RbDvH21tq3x+obw2w0IRsAAAAGYk02AAAADETIBgAAgIEI2QAAADAQIRsAAAAGImQDAADAQIRsAAAAGIiQDQAAAAMRsgEAAGAgQjYAAAAMRMgGAACAgQjZAAAAMBAhGwC2YFV1YlW1qjpo0n0BAIRsAAAAGIyQDQAAAAMRsgEAAGAgQjYATLGqOqCqTq+qn1TV7VX1w6r626raf5a6h1fVJVW1vqqurar3VdWysTpH9mu4j5zl/IP6shPHjreq+npVPayqPllV11XVPTPrwKvqyn5b3r/mNX1f/6mqnrcJ7311VX2gqi6rqhv793VpVZ1QVdvNcc6BVfXNqrq1qn5WVadV1cP7/rdZ6ldVHVVV51XVTVX1r1X1rao6aqH9BmDrtu2kOwAAzK6qXp3kvUnWJ/l8kquTPCzJ/kmel+TckeqvTPKMJH+X5Kv9z/8pyb9J8uIBuvPrSc5P8vMkn0myQ5KbRsq3S3JOkp2TfC7JryV5YZK/rqpntNbOWcBrvizJs5N8M8mZfZsHJXlHkicmOWy0clUdkuSMJHcnOS3JNUkOTvf/9IvxxquqkvyPJC9KckWSTye5I8nvJfnLqvrt1traBfQbgK2YkA0AU6iq9k7yniTXJtmvtXblSFkl2W3slKclWd1au7yv84YklyR5YVX959baNZvYpb2SfCzJy1prd89SvirJRUkOaq3d0ffh00m+nOS4dAF8vt6e5I9HX69/7x9JclRV7ddaO68/vk2SDyfZJsnBrbVzR875RJKXztL+0ekC9seSvKK1dmdf/4FJ/ibJa6vq1NbaugX0HYCtlMvFAWA6vSLd5/QbRwN2krTOeGh+30zA7uusT3Jq38bqAfpzR5LXzRGwZxw7E7D7PnwlyVXpZp3nrbV29fjrtdZakj/v//m0kaL9k+ye5H+OBuzeG9PNbo97ZZJb0wX5O0de444kb+j/+aKF9B2ArZeZbACYTk/q9xs7AzzbbOuP+v2DN707+UFr7V/up/yG1toP5ujDmoW8YD+j/Mp0l50/NsnyJDVSZdXIz3v3+/GAndbaD6vq6iSPGmn715L8TrpLyo/vJsjvZWbN92MX0ncAtl5CNgBMpxVJWrrLxTfGTbMcu6vfbzNAf36ygfIb5zh+VxZ+5dzfpFuT/d10a6yvT3Jnui8NXp1k+5G6O/X76+do6ycZCdnp1o5XujXub76fPuw4714DsFUTsgFgOt2QLgTuluTHA7Z7T7+f7W+AFfdz3n3uzL05VdUT0wXss5M8a2xd9r7pQvaomS8ZHjpHk7vOUX9da+0Jm9hdAPgla7IBYDpd2O8PGbjdmbtsP2yWsscN/Fqb4tH9/oxZ1oE/ZZb6/9jv9xsvqKrfSPKI0WOttZuTfDvJb1XVEJfTA0ASIRsAptVfpLtZ159V1e6jBf2znVfNftoGrUs3K/3CqtphpM09c9/Z4Um6qt/f63ngVfXvkrx+lvrnpnvE2bOranwN+Nsy+yXz70/3WLBTquo+l4VX1aOq6pHz6zYAWzuXiwPAFGqtXVpVr0kXBP+5qk5PFzxXJjkg3fOgX7OAdq+pqlOTHJ5kXVWdle4S6+ckOStjz56eoAv77flVtVuSC9LNRv9+uvf+vNHKrbW7q+qYJF9I8tWqOi3devYD083a/2OSfz/2Gh9Ksm+SI5LsV1VfTncjtF3T3fDsyen+n67cDO8PgCVKyAaAKdVa+0BVXZbktUmeme7u2tcn+d9J/noTmj46yb8keUGSP05yeZKXpwuYUxGy+9B8aJKTkjwj3WPArkiyNsmXMhay+3O+VFWHJHlrkucnWZ/kK+ne55kZuzlc/ziwI6vqzCQvS3JofvV/PPNaX94c7w+Apau6zxcAgKWpqh6U7u7il7bWnjzp/gCwtFmTDQAsCVW1Yx+oR49tk+TdSZYlOX0iHQNgq2ImGwBYEqpqn3Q3QDs7yfeTPCjdnch/O8k/J3lya+3WyfUQgK2BkA0ALIqqOijJQRtR9ZLW2rxnnavqIUnele5mZ7umu/fM1elmsP9La+2G+bYJAPMlZAMAi6KqTkzy5o2o+onW2pGbtzcAsHkI2QAAADAQNz4DAACAgQjZAAAAMBAhGwAAAAYiZAMAAMBAhGwAAAAYiJANAAAAAxGyAQAAYCBCNgAAAAxEyAYAAICBCNkAAAAwECEbAAAABiJkAwAAwED+P8eeSpzelo4tAAAAAElFTkSuQmCC\n",
1098 | "text/plain": [
1099 | ""
1100 | ]
1101 | },
1102 | "metadata": {
1103 | "needs_background": "light"
1104 | },
1105 | "output_type": "display_data"
1106 | }
1107 | ],
1108 | "source": [
1109 | "# setting image resolution\n",
1110 | "plt.figure(figsize = (8,4), dpi = 140)\n",
1111 | "\n",
1112 | "# Plotting histogram and descriptive summary\n",
1113 | "plt.scatter(churn_age.mean(), 0, label = 'mean', color = 'red')\n",
1114 | "plt.scatter(churn_age.median(), 0, label = 'median', color = 'green')\n",
1115 | "plt.hist(churn_age,bins=10)\n",
1116 | "\n",
1117 | "# axes labels\n",
1118 | "plt.xlabel('churn_age')\n",
1119 | "plt.ylabel('frequency')\n",
1120 | "plt.title('churn_age: mean, median, range')\n",
1121 | "plt.legend()"
1122 | ]
1123 | },
1124 | {
1125 | "cell_type": "code",
1126 | "execution_count": null,
1127 | "metadata": {},
1128 | "outputs": [],
1129 | "source": [
1130 | "* Mean Value is higher than the Median Value\n",
1131 | "* The peak occurs at the interval (36,45) that means highest number of customers age lie in this interval\n",
1132 | "* Most customer age between 30 and 65\n"
1133 | ]
1134 | }
1135 | ],
1136 | "metadata": {
1137 | "colab": {
1138 | "collapsed_sections": [],
1139 | "name": "Mean_Variance.ipynb",
1140 | "provenance": []
1141 | },
1142 | "kernelspec": {
1143 | "display_name": "Python 3",
1144 | "language": "python",
1145 | "name": "python3"
1146 | },
1147 | "language_info": {
1148 | "codemirror_mode": {
1149 | "name": "ipython",
1150 | "version": 3
1151 | },
1152 | "file_extension": ".py",
1153 | "mimetype": "text/x-python",
1154 | "name": "python",
1155 | "nbconvert_exporter": "python",
1156 | "pygments_lexer": "ipython3",
1157 | "version": "3.6.9"
1158 | }
1159 | },
1160 | "nbformat": 4,
1161 | "nbformat_minor": 1
1162 | }
1163 |
--------------------------------------------------------------------------------