├── Basic statistics.py ├── Bayes Theorem └── Bayes_Theorem.py ├── Data_and_DataTypes └── Data_types.py ├── Descriptive_Statstics_using_Python ├── Measure_of_Center.ipynb ├── Measure_of_Centre.py ├── Measure_of_Spread.ipynb └── Measure_of_Spread.py ├── Event_Intersaction_and_Union └── Intersaction_and_Union.ipynb ├── Probability_Distribution └── Probability_Distribution.ipynb ├── Probability_and_It's_Uses ├── Probability.ipynb └── Type_of_Probability.py ├── README.md ├── Statistical_Inference ├── Point_Estimates.ipynb └── confidence_interval_margin_of_error.ipynb └── Statistical_Testing ├── Non-Parametric_Tests └── Chi-Square_Test.py └── Parametric_Tests ├── Z_Test.ipynb └── t_Test.ipynb /Basic statistics.py: -------------------------------------------------------------------------------- 1 | #Basic Statistics, Graphs and Reports 2 | #Taking a random sample 3 | import pandas as pd 4 | #view all the names(functions) in a module on pd 5 | dir(pd) 6 | 7 | ####################Sampling in R############################# 8 | #Taking a random sample 9 | import pandas as pd 10 | 11 | Online_Retail=pd.read_csv("E:\\Larning\\hadoop\\Data Science\\001_Python\\Class Files Python\\Class Files Python\\1.Python Programming\\3.Basic Statistics and Reporting in Python\\datasets\\Online Retail Sales Data\\Online Retail.csv", encoding = "ISO-8859-1") 12 | Online_Retail.shape 13 | 14 | sample_data=Online_Retail.sample(n=1000) 15 | sample_data.shape 16 | print(sample_data.head()) 17 | 18 | #Regenerating same sample again 19 | 20 | sample_data1=Online_Retail.sample(n=1000 , random_state=12 ) 21 | sample_data1.shape 22 | print(sample_data1.head()) 23 | 24 | #####################LAB: Sampling in python############################# 25 | 26 | #Import “Census Income Data/Income_data.csv” 27 | Income=pd.read_csv("E:\\Larning\\hadoop\\Data Science\\001_Python\\Class Files Python\\Class Files Python\\1.Python Programming\\3.Basic Statistics and Reporting in Python\\datasets\\Census Income Data\\Income_data.csv") 28 | Income.shape 29 | Income.head() 30 | Income.tail(3) 31 | #Sample size 5000 32 | Sample_income=Income.sample(n=5000) 33 | Sample_income.shape 34 | 35 | #####################Descriptive statistics##################### 36 | #Import “Census Income Data/Income_data.csv” 37 | Income=pd.read_csv("E:\\Larning\\hadoop\\Data Science\\001_Python\\Class Files Python\\Class Files Python\\1.Python Programming\\3.Basic Statistics and Reporting in Python\\datasets\\Census Income Data\\Income_data.csv") 38 | 39 | Income.columns.values 40 | 41 | #Mean and Median on python 42 | gain_mean=Income["capital-gain"].mean() 43 | gain_mean 44 | 45 | gain_median=Income["capital-gain"].median() 46 | gain_median 47 | 48 | #####################LAB: Mean and Median on python##################### 49 | 50 | Online_Retail=pd.read_csv("E:\\Larning\\hadoop\\Data Science\\001_Python\\Class Files Python\\Class Files Python\\1.Python Programming\\3.Basic Statistics and Reporting in Python\\datasets\\Online_Retail_Sales_Data\\Online Retail.csv", encoding = "ISO-8859-1") 51 | Online_Retail.shape 52 | Online_Retail.columns.values 53 | 54 | #Mean and median of 'UnitPrice' in Online Retail data 55 | up_mean=Online_Retail['UnitPrice'].mean() 56 | up_mean 57 | 58 | up_median=Online_Retail['UnitPrice'].median() 59 | up_median 60 | 61 | #Mean of "Quantity" in Online Retail data 62 | Quantity_mean=Online_Retail['Quantity'].mean() 63 | Quantity_mean 64 | 65 | Quantity_median=Online_Retail['Quantity'].median() 66 | Quantity_median 67 | 68 | #####################Dispersion Measures##################### 69 | 70 | #####################Variance and Standard deviation##################### 71 | usa_income=Income[Income["native-country"]==' United-States'] 72 | usa_income.shape 73 | 74 | other_income=Income[Income["native-country"]!=' United-States'] 75 | other_income.shape 76 | 77 | #Var and SD for USA 78 | var_usa=usa_income["education-num"].var() 79 | var_usa 80 | 81 | std_usa=usa_income["education-num"].std() 82 | std_usa 83 | 84 | var_other=other_income["education-num"].var() 85 | var_other 86 | 87 | std_other=other_income["education-num"].std() 88 | std_other 89 | 90 | #####################LAB: Variance and Standard deviation##################### 91 | ##var and sd UnitPrice 92 | var_UnitPrice=Online_Retail['UnitPrice'].var() 93 | var_UnitPrice 94 | 95 | std_UnitPrice=Online_Retail['UnitPrice'].std() 96 | std_UnitPrice 97 | 98 | #variance and sd of Quantity 99 | var_UnitPrice=Online_Retail['Quantity'].var() 100 | var_UnitPrice 101 | 102 | std_UnitPrice=Online_Retail['Quantity'].std() 103 | std_UnitPrice 104 | 105 | ######################Percentiles & Quartiles ##################### 106 | 107 | Income["capital-gain"].describe() 108 | 109 | #Finding the percentile & quantile by using .quantile() 110 | Income['capital-gain'].quantile([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]) 111 | Income['capital-loss'].quantile([0, 0.1, 0.2, 0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]) 112 | Income['hours-per-week'].quantile([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.98,1]) 113 | 114 | ######################LAB: Percentiles & quartiles in python###################### 115 | bank=pd.read_csv("E:\\Larning\\hadoop\\Data Science\\001_Python\\Class Files Python\\Class Files Python\\1.Python Programming\\3.Basic Statistics and Reporting in Python\\datasets\\Bank Tele Marketing\\bank_market.csv",encoding = "ISO-8859-1") 116 | bank.shape 117 | 118 | #Get the summary of the balance variable 119 | #we can find the summary of the balance variable by using .describe() 120 | summary_bala=bank["balance"].describe() 121 | summary_bala 122 | 123 | #Get relevant percentiles and see their distribution. 124 | bank['balance'].quantile([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]) 125 | 126 | #Get the summary of the age variable 127 | summary_age=bank['age'].describe() 128 | summary_age 129 | 130 | #Get relevant percentiles and see their distribution 131 | bank['age'].quantile([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]) 132 | 133 | ######################LAB: Box plots and outlier detection###################### 134 | #Do you suspect any outliers in balance 135 | bank=pd.read_csv("E:\\Larning\\hadoop\\Data Science\\001_Python\\Class Files Python\\Class Files Python\\1.Python Programming\\3.Basic Statistics and Reporting in Python\\datasets\\Bank Tele Marketing\\bank_market.csv",encoding = "ISO-8859-1") 136 | bank.shape 137 | 138 | import matplotlib.pyplot as plt 139 | 140 | #Basic plot of boxplot by importing the matplot.pyplot as plt ("plt.boxplot()) 141 | plt.boxplot(bank.balance) 142 | 143 | #Get relevant percentiles and see their distribution 144 | bank['balance'].quantile([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,0.95, 1]) 145 | #Do you suspect any outliers in balance 146 | # outlier are present in balance variable 147 | 148 | #Do you suspect any outliers in age 149 | #detect the ouliers in age variable by plt.boxplot() 150 | plt.boxplot(bank.age) 151 | #No outliers are present 152 | #Get relevant percentiles and see their distribution 153 | bank['age'].quantile([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95,1]) 154 | #Do you suspect any outliers in age 155 | #outliers are not present in age variable 156 | 157 | 158 | ######################Creating Graphs ################################ 159 | 160 | ##Scatter Plot: 161 | 162 | cars=pd.read_csv("E:\\Larning\\hadoop\\Data Science\\001_Python\\Class Files Python\\Class Files Python\\1.Python Programming\\3.Basic Statistics and Reporting in Python\\datasets\\Cars Data\\Cars.csv",encoding = "ISO-8859-1") 163 | cars.shape 164 | cars.columns.values 165 | 166 | cars['Horsepower'].describe() 167 | cars['MPG_City'].describe() 168 | 169 | import matplotlib.pyplot as plt 170 | plt.scatter(cars.Horsepower,cars.MPG_City) 171 | 172 | 173 | ######################LAB:Creating Graphs ################################ 174 | 175 | import matplotlib.pyplot as plt 176 | 177 | 178 | #Sports data 179 | sports_data=pd.read_csv("E:\\Larning\\hadoop\\Data Science\\001_Python\\Class Files Python\\Class Files Python\\1.Python Programming\\3.Basic Statistics and Reporting in Python\\datasets\\Sporting_goods_sales\\Sporting_goods_sales.csv") 180 | sports_data.head(10) 181 | 182 | #Draw a scatter plot between Average_Income and Sales. Is there any relation between two variables 183 | plt.scatter(sports_data.Average_Income,sports_data.Sales) 184 | 185 | import numpy as np 186 | np.corrcoef(sports_data.Average_Income,sports_data.Sales) 187 | 188 | #Draw a scatter plot between Under35_Population_pect and Sales. Is there any relation between two 189 | plt.scatter(sports_data.Under35_Population_pect,sports_data.Sales,color="red") 190 | np.corrcoef(sports_data.Under35_Population_pect,sports_data.Sales) 191 | 192 | ######################Bar Chart###################### 193 | #Bar charts used to summarize the categorical variables 194 | 195 | import pandas as pd 196 | 197 | cars=pd.read_csv("E:\\Larning\\hadoop\\Data Science\\001_Python\\Class Files Python\\Class Files Python\\1.Python Programming\\3.Basic Statistics and Reporting in Python\\datasets\\Cars Data\\Cars.csv",encoding = "ISO-8859-1") 198 | cars.shape 199 | cars.columns.values 200 | 201 | freq=cars.Cylinders.value_counts() 202 | freq.values 203 | freq.index 204 | 205 | import matplotlib.pyplot as plt 206 | plt.bar(freq.index,freq.values) 207 | ######################LAB: Bar Chart###################### 208 | 209 | freq=sports_data.Avg_family_size.value_counts() 210 | freq.values 211 | freq.index 212 | 213 | import matplotlib.pyplot as plt 214 | plt.bar(freq.index,freq.values) 215 | plt.bar(freq.index,freq.values, align="center") 216 | plt.bar(freq.index,freq.values, align="center",tick_label=freq.index) 217 | 218 | 219 | ######################Trend Chart###################### 220 | 221 | AirPassengers=pd.read_csv("E:\\Larning\\hadoop\\Data Science\\001_Python\\Class Files Python\\Class Files Python\\1.Python Programming\\3.Basic Statistics and Reporting in Python\\datasets\\Air Travel Data\\Air_travel.csv", encoding = "ISO-8859-1") 222 | AirPassengers.head() 223 | AirPassengers.dtypes 224 | AirPassengers.columns.values 225 | 226 | import matplotlib.pyplot as plt 227 | plt.plot(AirPassengers.AIR) 228 | 229 | 230 | #X axis lable 231 | #Format the date to DD-MM-YYYY before importing 232 | AirPassengers['new_time']=pd.to_datetime(AirPassengers['DATE'],format='%d-%m-%Y') 233 | plt.plot(AirPassengers.new_time,AirPassengers.AIR) 234 | 235 | # Any single array will give time series plot 236 | plt.plot(sports_data.Avg_family_size) 237 | #Formatted col 238 | 239 | 240 | ################################ 241 | ## Used defined Functions 242 | 243 | def mydistance(x1=1,y1=1,x2=1,y2=1): 244 | import math 245 | dist=math.sqrt(pow((x1-x2),2)+pow((y1-y2),2)) 246 | print(dist) 247 | return; 248 | 249 | mydistance(x1=0,y1=0,x2=2,y2=2) 250 | mydistance(x1=1,y1=0,x2=0,y2=1) 251 | mydistance(x1=4,y1=6,x2=1,y2=2) 252 | mydistance(4,6,1,2) 253 | 254 | ##The Absolute percentage difference 255 | 256 | x=1 257 | y=1 258 | 259 | def abspe(x=1,y=1): 260 | abpe=abs((x-y)/y) 261 | print(abpe) 262 | return; 263 | 264 | abspe(x=5,y=9) 265 | abspe(10,100) 266 | 267 | ###Sum of squares functions 268 | 269 | def sumsquares(*inputnums): 270 | s = 0 271 | for n in inputnums: 272 | s =s + pow(n,2) 273 | print(s) 274 | return s; 275 | 276 | 277 | sumsquares (1,1,1,1,1) 278 | sumsquares (1,2,5,8,-1) 279 | 280 | ###Function for summary 281 | import pandas as pd 282 | column_names = ["Name","Mean", "Median", "Variance","S.D", "p5", 283 | "p10", "p20", "p25", "p30", "p50", "p75", "p80", "p90", "p95", "p97", "p99"] 284 | summary_df=pd.DataFrame(columns=column_names) 285 | 286 | def allsummary(df): 287 | i=1 288 | for f in df.columns.values: 289 | summary_df.set_value(i,"Name",f) 290 | summary_df.set_value(i, "Mean",df[f].mean()) 291 | summary_df.set_value(i, "Median",df[f].median()) 292 | summary_df.set_value(i, "Variance",df[f].var()) 293 | summary_df.set_value(i, "S.D",df[f].std()) 294 | summary_df.set_value(i, "p5",pd.notnull(df[f]).quantile(0.1)) 295 | summary_df.set_value(i, "p10",df[f].dropna(axis=0).quantile(0.1)) 296 | summary_df.set_value(i, "p20",df[f].dropna(axis=0).quantile(0.2)) 297 | summary_df.set_value(i, "p25",df[f].dropna(axis=0).quantile(0.25)) 298 | summary_df.set_value(i, "p30",df[f].dropna(axis=0).quantile(0.3)) 299 | summary_df.set_value(i, "p50",df[f].dropna(axis=0).quantile(0.5)) 300 | summary_df.set_value(i, "p75",df[f].dropna(axis=0).quantile(0.75)) 301 | summary_df.set_value(i, "p80",df[f].dropna(axis=0).quantile(0.8)) 302 | summary_df.set_value(i, "p90",df[f].dropna(axis=0).quantile(0.9)) 303 | summary_df.set_value(i, "p95",df[f].dropna(axis=0).quantile(0.95)) 304 | summary_df.set_value(i, "p97",df[f].dropna(axis=0).quantile(0.97)) 305 | summary_df.set_value(i, "p99",df[f].dropna(axis=0).quantile(0.99)) 306 | i=i+1; 307 | print(summary_df) 308 | 309 | credit_risk=pd.read_csv("E:\\Larning\\hadoop\\Data Science\\001_Python\\Class Files Python\\Class Files Python\\1.Python Programming\\3.Basic Statistics and Reporting in Python\\datasets\\Give me some Credit\\cs-training.csv", encoding = "ISO-8859-1") 310 | 311 | allsummary(credit_risk) 312 | 313 | ###How dropna(axis=0) works 314 | ###dropna expects a dataframe as input. 315 | ### Axis=1 drops coloumns with NA values 316 | ### Axis=0 drops rows with NA values 317 | 318 | import numpy as np 319 | df = pd.DataFrame(np.random.randn(5, 3), columns=['one', 'two', 'three']) 320 | df1=df.reindex([0,1,2,3,4,5,6,7]) 321 | df1["colfour"]=4 322 | 323 | print(df1) 324 | 325 | df1[["one","colfour"]] 326 | df1[["one","colfour"]].dropna(axis=0) 327 | 328 | df1[["one","colfour"]] 329 | df1[["one","colfour"]].dropna(axis=1) -------------------------------------------------------------------------------- /Bayes Theorem/Bayes_Theorem.py: -------------------------------------------------------------------------------- 1 | #Bayes Theorem 2 | def get_outcomes(sample_space, f_name='', e_name=''): 3 | outcomes = 0 4 | for e_k, e_v in sample_space.items(): 5 | if f_name=='' or f_name==e_k: 6 | for se_k, se_v in e_v.items(): 7 | if e_name!='' and se_k == e_name: 8 | outcomes+=se_v 9 | elif e_name=='': 10 | outcomes+=se_v 11 | return outcomes 12 | 13 | def p(sample_space, f_name): 14 | return get_outcomes(sample_space, f_name) / get_outcomes(sample_space, '', '') 15 | 16 | def p_inters(sample_space, f_name, e_name): 17 | return get_outcomes(sample_space, f_name, e_name) / get_outcomes(sample_space, '', '') 18 | 19 | def p_conditional(sample_space, f_name, e_name): 20 | return p_inters(sample_space, f_name, e_name) / p(sample_space, f_name) 21 | 22 | def bayes(sample_space, f, given_e): 23 | sum = 0; 24 | for e_k, e_v in sample_space.items(): 25 | sum+=p(sample_space, e_k) * p_conditional(sample_space, e_k, given_e) 26 | return p(sample_space, f) * p_conditional(sample_space, f, given_e) / sum 27 | 28 | sample_space = {'UK':{'Boy':10, 'Girl':20}, 29 | 'FR':{'Boy':10, 'Girl':10}, 30 | 'CA':{'Boy':10, 'Girl':30}} 31 | 32 | print('Probability of being from FR:', p(sample_space, 'FR')) 33 | print('Probability to be French Boy:', p_inters(sample_space, 'FR', 'Boy')) 34 | print('Probability of being a Boy given a person is from FR:', p_conditional(sample_space, 'FR', 'Boy')) 35 | print('Probability to be from France given person is Boy:', bayes(sample_space, 'FR', 'Boy')) 36 | 37 | 38 | sample_space = {'Grow' :{'Up':160, 'Down':40}, 39 | 'Slows':{'Up':30, 'Down':70}} 40 | 41 | print('Probability economy is growing when stock is Up:', bayes(sample_space, 'Grow', 'Up')) 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /Data_and_DataTypes/Data_types.py: -------------------------------------------------------------------------------- 1 | # Types of Data 2 | 3 | # 1. Qualitative Data: “Data Associated with the quality in different categories” 4 | 5 | # (a). Ordinal Data 6 | 7 | marks=int(input("Enter your percentage")) 8 | 9 | if(marks<=40): 10 | print("Fail") 11 | elif((marks>40)&(marks<=50)): 12 | print("Second Class") 13 | elif((marks>50)&(marks<=60)): 14 | print("First Class") 15 | elif(marks>=60): 16 | print("Congratulations!! You got distinction") 17 | 18 | # [“Girls and boys are differentiated based on the answer given by them.] 19 | 20 | 21 | # (b). Nominal Data 22 | 23 | ans1=input("Would you like to go for shopping? if yes then type Y / y else N/n") 24 | ans2=input("Do you love talking on any topic? if yes then type Y / y else N/n") 25 | 26 | if(((ans1=='Y')|(ans1=='y'))&((ans2=='Y')|(ans2=='y'))): 27 | print("Welcome to girls club") 28 | else: 29 | print("Sorry boys, you can not join the club") 30 | 31 | # [“Grade are given to students according to their score on the exam”] 32 | 33 | 34 | #------------------------------------------------------------------------------------------- 35 | 36 | # 2. Qantitative Data : “Data associated with Quantity which can be measured” 37 | 38 | # (a). Discrete Data : Product_List(in fixed quantity) 39 | 40 | Product_List = {"Samsung":500, "Apple":30, "Nokia":10, "LG":450, "Sony":200} 41 | print(Product_List) 42 | 43 | 44 | # (b). Continuous Data = Weight of Patients(in Kg) 45 | Patients_Weight = {"P_ID001":86.5, "P_ID002":91.3, "P_ID003":45.8, "P_ID004":78.2, "P_ID005":80.3} 46 | print(Patients_Weight) 47 | -------------------------------------------------------------------------------- /Descriptive_Statstics_using_Python/Measure_of_Center.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Measure_of_Center.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "markdown", 18 | "metadata": { 19 | "id": "gmHVVuCdiHnl", 20 | "colab_type": "text" 21 | }, 22 | "source": [ 23 | "# Measure of Center\n", 24 | "\n", 25 | "---\n", 26 | "\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "metadata": { 32 | "id": "Cs6hAcU2h5NT", 33 | "colab_type": "code", 34 | "colab": {} 35 | }, 36 | "source": [ 37 | "import numpy as np\n", 38 | "x=[2,4,6,7,20,10,22]\n", 39 | "y=np.array(x)" 40 | ], 41 | "execution_count": 0, 42 | "outputs": [] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": { 47 | "id": "v0B8j5IsiDL7", 48 | "colab_type": "text" 49 | }, 50 | "source": [ 51 | "# Mean:\n", 52 | "\n", 53 | "---\n", 54 | "\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "metadata": { 60 | "id": "oYOWjwMviCLm", 61 | "colab_type": "code", 62 | "colab": { 63 | "base_uri": "https://localhost:8080/", 64 | "height": 34 65 | }, 66 | "outputId": "1a7c27be-63e3-4423-cfad-da76e679bf10" 67 | }, 68 | "source": [ 69 | "print(\"Mean is : \",y.mean())" 70 | ], 71 | "execution_count": 3, 72 | "outputs": [ 73 | { 74 | "output_type": "stream", 75 | "text": [ 76 | "Mean is : 10.142857142857142\n" 77 | ], 78 | "name": "stdout" 79 | } 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": { 85 | "id": "0IpYCciHiQKX", 86 | "colab_type": "text" 87 | }, 88 | "source": [ 89 | "# Median:\n", 90 | "\n", 91 | "---\n", 92 | "\n" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "metadata": { 98 | "id": "ZRjbvMUoiM9U", 99 | "colab_type": "code", 100 | "colab": { 101 | "base_uri": "https://localhost:8080/", 102 | "height": 34 103 | }, 104 | "outputId": "4ee89fc5-f940-4595-afd3-d95b90ac2663" 105 | }, 106 | "source": [ 107 | "print(\"Median is : \",np.median(y))" 108 | ], 109 | "execution_count": 4, 110 | "outputs": [ 111 | { 112 | "output_type": "stream", 113 | "text": [ 114 | "Median is : 7.0\n" 115 | ], 116 | "name": "stdout" 117 | } 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": { 123 | "id": "Q3hBUZmEiYCh", 124 | "colab_type": "text" 125 | }, 126 | "source": [ 127 | "# Mode:\n", 128 | "\n", 129 | "---\n" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "metadata": { 135 | "id": "k-wIC7j_iPdR", 136 | "colab_type": "code", 137 | "colab": { 138 | "base_uri": "https://localhost:8080/", 139 | "height": 34 140 | }, 141 | "outputId": "1cb99ae2-a249-4b9a-ab13-f344cd7dc583" 142 | }, 143 | "source": [ 144 | "from statistics import mode\n", 145 | "print(\"Mode is:\",mode([1, 1, 2, 3, 3, 3, 3, 4]))" 146 | ], 147 | "execution_count": 5, 148 | "outputs": [ 149 | { 150 | "output_type": "stream", 151 | "text": [ 152 | "Mode is: 3\n" 153 | ], 154 | "name": "stdout" 155 | } 156 | ] 157 | } 158 | ] 159 | } -------------------------------------------------------------------------------- /Descriptive_Statstics_using_Python/Measure_of_Centre.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | x=[2,4,6,7,20,10,22] 4 | y=np.array(x) 5 | 6 | print("Mean is : ",y.mean()) 7 | 8 | print("Median is : ",np.median(y)) 9 | print("\n") 10 | print("Mean is : ",y.mean()) 11 | 12 | print("\n") 13 | 14 | from statistics import mode 15 | print("Mode is:",mode([1, 1, 2, 3, 3, 3, 3, 4])) 16 | -------------------------------------------------------------------------------- /Descriptive_Statstics_using_Python/Measure_of_Spread.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Measure_of_Spread.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "markdown", 18 | "metadata": { 19 | "id": "wPQ-oKmUd4VC", 20 | "colab_type": "text" 21 | }, 22 | "source": [ 23 | "# Range : \n", 24 | "\n", 25 | "---\n", 26 | "\n", 27 | "Range = X(lagest) - X (lowest)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "metadata": { 33 | "id": "zFat5FJ8d1-6", 34 | "colab_type": "code", 35 | "colab": { 36 | "base_uri": "https://localhost:8080/", 37 | "height": 51 38 | }, 39 | "outputId": "10536d99-7178-4d92-8fce-3f5f325c3c14" 40 | }, 41 | "source": [ 42 | "import numpy as np\n", 43 | "A=np.array([[10,14,11,7,9.5,15,19],[8,9,17,14.5,12,18,15.5], [15,7.5,11.5,10,10.5,7,11],[11.5,11,9,12,14,12,7.5]])\n", 44 | "A\n", 45 | "\n", 46 | "B=A.T\n", 47 | "B\n", 48 | "\n", 49 | "a=np.ptp(B, axis=0)\n", 50 | "b=np.ptp(B,axis=1)\n", 51 | "\n", 52 | "print(\"Range in Array A:\",a)\n", 53 | "print(\"Range in Array B:\",b)" 54 | ], 55 | "execution_count": 4, 56 | "outputs": [ 57 | { 58 | "output_type": "stream", 59 | "text": [ 60 | "Range in Array A: [12. 10. 8. 6.5]\n", 61 | "Range in Array B: [ 7. 6.5 8. 7.5 4.5 11. 11.5]\n" 62 | ], 63 | "name": "stdout" 64 | } 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": { 70 | "id": "qmZJRD0xekaC", 71 | "colab_type": "text" 72 | }, 73 | "source": [ 74 | "# Quartile\n", 75 | "\n", 76 | "---\n", 77 | "\n" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "metadata": { 83 | "id": "onAG-ZzleNRO", 84 | "colab_type": "code", 85 | "colab": { 86 | "base_uri": "https://localhost:8080/", 87 | "height": 85 88 | }, 89 | "outputId": "79be6256-2bf0-4d90-e955-b9f77b17d9da" 90 | }, 91 | "source": [ 92 | "A=np.array([[10,14,11,7,9.5,15,19],[8,9,17,14.5,12,18,15.5], [15,7.5,11.5,10,10.5,7,11],[11.5,11,9,12,14,12,7.5]])\n", 93 | "\n", 94 | "B=A.T\n", 95 | "\n", 96 | "a=np.percentile(B,27,axis=0, interpolation='lower')\n", 97 | "b=np.percentile(B,25,axis=1, interpolation='lower')\n", 98 | "c=np.percentile(B,75,axis=0, interpolation='lower')\n", 99 | "d=np.percentile(B,50,axis=0, interpolation='lower')\n", 100 | "\n", 101 | "print(a)\n", 102 | "\n", 103 | "print(b)\n", 104 | "\n", 105 | "print(c)\n", 106 | "\n", 107 | "print(d)" 108 | ], 109 | "execution_count": 5, 110 | "outputs": [ 111 | { 112 | "output_type": "stream", 113 | "text": [ 114 | "[9.5 9. 7.5 9. ]\n", 115 | "[8. 7.5 9. 7. 9.5 7. 7.5]\n", 116 | "[14. 15.5 11. 12. ]\n", 117 | "[11. 14.5 10.5 11.5]\n" 118 | ], 119 | "name": "stdout" 120 | } 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": { 126 | "id": "7Do6eEXLgj2i", 127 | "colab_type": "text" 128 | }, 129 | "source": [ 130 | "# inter-qurtile range\n", 131 | "\n", 132 | "---\n", 133 | "\n", 134 | "\n" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "metadata": { 140 | "id": "v0J_1_TVgqF_", 141 | "colab_type": "code", 142 | "colab": { 143 | "base_uri": "https://localhost:8080/", 144 | "height": 34 145 | }, 146 | "outputId": "0956b1cd-d0f9-4058-ccd3-d50a41a4caed" 147 | }, 148 | "source": [ 149 | "import numpy as np\n", 150 | "from scipy.stats import iqr\n", 151 | "A=np.array([[10,14,11,7,9.5,15,19],[8,9,17,14.5,12,18,15.5], [15,7.5,11.5,10,10.5,7,11],[11.5,11,9,12,14,12,7.5]])\n", 152 | "\n", 153 | "B=A.T\n", 154 | "\n", 155 | "a=iqr(B, axis=0 , rng=(25, 75), interpolation='lower')\n", 156 | "b=iqr(B, axis=1 , rng=(25, 75), interpolation='lower')\n", 157 | "\n", 158 | "print(a,b)" 159 | ], 160 | "execution_count": 7, 161 | "outputs": [ 162 | { 163 | "output_type": "stream", 164 | "text": [ 165 | "[4.5 6.5 3.5 3. ] [3.5 3.5 2.5 5. 2.5 8. 8. ]\n" 166 | ], 167 | "name": "stdout" 168 | } 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": { 174 | "id": "TeDGcMkvg5je", 175 | "colab_type": "text" 176 | }, 177 | "source": [ 178 | "# Variance\n", 179 | "\n", 180 | "---\n", 181 | "\n" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "metadata": { 187 | "id": "RA52W0_HgrOz", 188 | "colab_type": "code", 189 | "colab": { 190 | "base_uri": "https://localhost:8080/", 191 | "height": 51 192 | }, 193 | "outputId": "4554289c-45b5-4a5c-d611-4092f80e7ee8" 194 | }, 195 | "source": [ 196 | "import numpy as np\n", 197 | "A=np.array([[10,14,11,7,9.5,15,19],[8,9,17,14.5,12,18,15.5],\n", 198 | " [15,7.5,11.5,10,10.5,7,11],[11.5,11,9,12,14,12,7.5]])\n", 199 | "\n", 200 | "B=A.T\n", 201 | "\n", 202 | "a = np.var(B,axis=0)\n", 203 | "b = np.var(B,axis=1)\n", 204 | "\n", 205 | "print(a)\n", 206 | "\n", 207 | "print(b)" 208 | ], 209 | "execution_count": 8, 210 | "outputs": [ 211 | { 212 | "output_type": "stream", 213 | "text": [ 214 | "[13.98979592 12.8877551 6.12244898 3.92857143]\n", 215 | "[ 6.546875 5.921875 8.796875 7.546875 2.875 16.5 19.0625 ]\n" 216 | ], 217 | "name": "stdout" 218 | } 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": { 224 | "id": "H40m2neihCJC", 225 | "colab_type": "text" 226 | }, 227 | "source": [ 228 | "# Standard deviation\n", 229 | "\n", 230 | "---\n", 231 | "\n" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "metadata": { 237 | "id": "9pETuLnmhAsc", 238 | "colab_type": "code", 239 | "colab": { 240 | "base_uri": "https://localhost:8080/", 241 | "height": 68 242 | }, 243 | "outputId": "1ff2c7d4-2723-4a54-cf94-5e5968bfcd17" 244 | }, 245 | "source": [ 246 | "import numpy as np\n", 247 | "A=np.array([[10,14,11,7,9.5,15,19],[8,9,17,14.5,12,18,15.5],\n", 248 | " [15,7.5,11.5,10,10.5,7,11],[11.5,11,9,12,14,12,7.5]])\n", 249 | "B=A.T\n", 250 | "a = np.std(B,axis=0)\n", 251 | "b = np.std(B,axis=1)\n", 252 | "print(a)\n", 253 | "\n", 254 | "print(b)" 255 | ], 256 | "execution_count": 9, 257 | "outputs": [ 258 | { 259 | "output_type": "stream", 260 | "text": [ 261 | "[3.74029356 3.58995196 2.4743583 1.98206242]\n", 262 | "[2.55868619 2.43349029 2.96595263 2.74715762 1.6955825 4.0620192\n", 263 | " 4.3660623 ]\n" 264 | ], 265 | "name": "stdout" 266 | } 267 | ] 268 | } 269 | ] 270 | } -------------------------------------------------------------------------------- /Descriptive_Statstics_using_Python/Measure_of_Spread.py: -------------------------------------------------------------------------------- 1 | #Range 2 | import numpy as np 3 | A=np.array([[10,14,11,7,9.5,15,19],[8,9,17,14.5,12,18,15.5], [15,7.5,11.5,10,10.5,7,11],[11.5,11,9,12,14,12,7.5]]) 4 | A 5 | 6 | B=A.T 7 | B 8 | 9 | a=np.ptp(B, axis=0) 10 | b=np.ptp(B,axis=1) 11 | 12 | print(a) 13 | print(b) 14 | 15 | 16 | #Quartile 17 | A=np.array([[10,14,11,7,9.5,15,19],[8,9,17,14.5,12,18,15.5], [15,7.5,11.5,10,10.5,7,11],[11.5,11,9,12,14,12,7.5]]) 18 | 19 | B=A.T 20 | 21 | a=np.percentile(B,27,axis=0, interpolation='lower') 22 | b=np.percentile(B,25,axis=1, interpolation='lower') 23 | c=np.percentile(B,75,axis=0, interpolation='lower') 24 | d=np.percentile(B,50,axis=0, interpolation='lower') 25 | 26 | print(a) 27 | 28 | print(b) 29 | 30 | print(c) 31 | 32 | print(d) 33 | 34 | 35 | 36 | #inter-qurtile range 37 | import numpy as np 38 | from scipy.stats import iqr 39 | A=np.array([[10,14,11,7,9.5,15,19],[8,9,17,14.5,12,18,15.5] [15,7.5,11.5,10,10.5,7,11],[11.5,11,9,12,14,12,7.5]]) 40 | 41 | B=A.T 42 | 43 | a=iqr(B, axis=0 , rng=(25, 75), interpolation='lower') 44 | b=iqr(B, axis=1 , rng=(25, 75), interpolation='lower') 45 | 46 | print(a,b) 47 | 48 | 49 | #Variance 50 | 51 | import numpy as np 52 | A=np.array([[10,14,11,7,9.5,15,19],[8,9,17,14.5,12,18,15.5], 53 | [15,7.5,11.5,10,10.5,7,11],[11.5,11,9,12,14,12,7.5]]) 54 | 55 | B=A.T 56 | 57 | a = np.var(B,axis=0) 58 | b = np.var(B,axis=1) 59 | 60 | print(a) 61 | 62 | print(b) 63 | 64 | 65 | #Standard deviation 66 | import numpy as np 67 | A=np.array([[10,14,11,7,9.5,15,19],[8,9,17,14.5,12,18,15.5], 68 | [15,7.5,11.5,10,10.5,7,11],[11.5,11,9,12,14,12,7.5]]) 69 | B=A.T 70 | a = np.std(B,axis=0) 71 | b = np.std(B,axis=1) 72 | print(a) 73 | 74 | print(b) 75 | -------------------------------------------------------------------------------- /Event_Intersaction_and_Union/Intersaction_and_Union.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Intersaction_and_Union.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "markdown", 18 | "metadata": { 19 | "id": "zZN6q0CIgHh9", 20 | "colab_type": "text" 21 | }, 22 | "source": [ 23 | "# Intersaction of Event\n", 24 | "\n", 25 | "---\n", 26 | "\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "metadata": { 32 | "id": "ddwYCw7EJOLy", 33 | "colab_type": "code", 34 | "colab": { 35 | "base_uri": "https://localhost:8080/", 36 | "height": 34 37 | }, 38 | "outputId": "628fd4bf-ebe4-4346-f476-c3ad012e5514" 39 | }, 40 | "source": [ 41 | "dice=[1,2,3,4,5,6]\n", 42 | "\n", 43 | "n=len(dice)\n", 44 | "\n", 45 | "for i in range(0,n):\n", 46 | " if((dice[i]%2==0)and(dice[i]%3)==0):\n", 47 | " print(dice[i])" 48 | ], 49 | "execution_count": 1, 50 | "outputs": [ 51 | { 52 | "output_type": "stream", 53 | "text": [ 54 | "6\n" 55 | ], 56 | "name": "stdout" 57 | } 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": { 63 | "id": "Q8gY-vc6gSSf", 64 | "colab_type": "text" 65 | }, 66 | "source": [ 67 | "# Union of events\n", 68 | "\n", 69 | "---\n", 70 | "\n", 71 | "\n" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "metadata": { 77 | "id": "JXw0SR7RgMMs", 78 | "colab_type": "code", 79 | "colab": { 80 | "base_uri": "https://localhost:8080/", 81 | "height": 51 82 | }, 83 | "outputId": "f7f367f2-d034-4e32-a5e2-1bbc6bea5a5e" 84 | }, 85 | "source": [ 86 | "dice=[1,2,3,4,5,6]\n", 87 | "\n", 88 | "n=len(dice)\n", 89 | "\n", 90 | "for i in range(0,n):\n", 91 | " if((dice[i]==4)or(dice[i]==6)):\n", 92 | " print(dice[i])" 93 | ], 94 | "execution_count": 2, 95 | "outputs": [ 96 | { 97 | "output_type": "stream", 98 | "text": [ 99 | "4\n", 100 | "6\n" 101 | ], 102 | "name": "stdout" 103 | } 104 | ] 105 | } 106 | ] 107 | } -------------------------------------------------------------------------------- /Probability_Distribution/Probability_Distribution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Probability_Distribution.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "markdown", 18 | "metadata": { 19 | "id": "VAoq8_D1F4eN", 20 | "colab_type": "text" 21 | }, 22 | "source": [ 23 | "# Normal Distribution Curve:\n", 24 | "\n", 25 | "---\n", 26 | "\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "metadata": { 32 | "id": "jPTbU8IrF2pG", 33 | "colab_type": "code", 34 | "colab": { 35 | "base_uri": "https://localhost:8080/", 36 | "height": 269 37 | }, 38 | "outputId": "95d3c5e2-74fd-4fbf-c6e9-2ff397f49f47" 39 | }, 40 | "source": [ 41 | "from scipy.stats import norm\n", 42 | "import numpy as np\n", 43 | "import matplotlib.pyplot as plt\n", 44 | "\n", 45 | "x= np.arange(-4,4,0.001)\n", 46 | "plt.plot(x, norm.pdf(x))\n", 47 | "plt.show()" 48 | ], 49 | "execution_count": 1, 50 | "outputs": [ 51 | { 52 | "output_type": "display_data", 53 | "data": { 54 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8leWZ//HPdU72sGaHhJ2whCCg\nkcV9QSRawVat2Nbaqa11qmP7aztT/dmfTu10Xl2mTmemTpUZbaetlKLWigrirlXZAgRIWENYkkDI\nAgmErCfn+v2Rg3OMQE7CSZ6zXO/X67w8z3bON5hc5zn3cz/3LaqKMcaY6OByOoAxxpiBY0XfGGOi\niBV9Y4yJIlb0jTEmiljRN8aYKGJF3xhjoogVfWOMiSJW9I0xJopY0TfGmCgS43SA7tLS0nTs2LFO\nxzDGmLCyadOmOlVN72m/kCv6Y8eOpaioyOkYxhgTVkTkYCD7WfOOMcZEESv6xhgTRazoG2NMFLGi\nb4wxUSSgoi8iC0Vkt4iUiciD59jvFhFRESnwW/eQ77jdInJ9MEIbY4zpmx5774iIG3gCuA6oBDaK\nyEpV3dFtv8HAt4D1fuvygCXANGAk8KaITFLVzuD9CMYYYwIVyJn+bKBMVctVtR1YDiw+w34/An4K\ntPqtWwwsV9U2Vd0PlPlezxhjjAMC6aefDVT4LVcCc/x3EJELgVGq+qqI/H23Y9d1Oza7j1mNccSh\n+mbW76/n6IlWYt0uRqckMWd8KinJcU5HM6bXzvvmLBFxAY8DXzmP17gHuAdg9OjR5xvJmPOmqry1\ns4Yn3i1jy6GGT213u4SF+Vl869pcJmUOdiChMX0TSNGvAkb5Lef41p02GMgH3hURgCxgpYgsCuBY\nAFR1KbAUoKCgwGZqN46qb2rjoT9v5/UdRxmdksQPbpzKlZPSGZWShMer7K4+yWslR1i+sYI1JdX8\n7VUT+Pb8Sbhd4nR0Y3okqueusSISA+wBrqWrYG8EvqCqpWfZ/13ge6paJCLTgGV0teOPBN4Ccs91\nIbegoEBtGAbjlD1HT/LV326k5mQb371uEndfNo4Y95kvfR071c4/vbqDP2+u4vLcNJ744oUMSYgd\n4MTGdBGRTapa0NN+PV7IVVUPcD+wBtgJrFDVUhF5zHc2f65jS4EVwA7gNeA+67ljQtWOwye49dcf\n0ebx8tw35vGNKyecteADpCTH8fjnZ/KTz01nXXk9dz69gcaWjgFMbEzv9XimP9DsTN84YV9tE59/\nci3xMS7+9I15jEpJ6tXxb+w4yjef3cT07KEs+/pcEmLd/ZTUmDML2pm+MZGusaWDr/1PESLwh6/N\n6XXBB7guL5N/WzKLzYca+IfntxFqJ1PGnGZF30Q1r1f5zp+KqTjWzK+/dBHj0wf1+bVumD6Cv79+\nMiu3Hua//7o/iCmNCR4r+iaqPfPhft7aVcMPbpzKxWNTzvv1vnnVBBbkZfKzNbsoqWoMQkJjgsuK\nvolaZTVN/HzNbuZPzeCuS8YG5TVFhJ/ecgEpyXE8sHwLrR3Wb8GEFiv6Jip1epXvPbeVxDg3//y5\n6fjuMQmK4clx/OK2mZTXnuI/3t4btNc1Jhis6JuotGzDIYorGvjhomlkDE4I+utflpvG5y7MZun7\n5ew9ejLor29MX1nRN1Hn+Kl2fvH6buaNT2XRjJH99j4P3zCV5PgYHv5LifXmMSHDir6JOo+/sYeT\nrR4eXZQX1Gad7lIHxfP9hVPYsP8Ya0qr++19jOkNK/omquw5epJn1x/kzrljmJI1pN/f77aLcpiU\nOYifvrabjk5vv7+fMT2xom+iyuOv7yE5LoZvXZs7IO8X43bxYOEU9tedYvmGQwPynsacixV9EzW2\nVzbyWmk1d18+juEDOBb+1ZMzmDMuhV++uZfmds+Ava8xZ2JF30SNX7yxm2FJsXz1snED+r4iwj8s\nnEz9qXaWrbezfeMsK/omKmw6eIx3d9fyjSsmODL88UVjUrhkQipPvV9uN2wZR1nRN1Hh1+/uY3hS\nLHddMsaxDA9cm0vtyTb+tLGi552N6SdW9E3E23v0JG/urOGuS8aSFHfeM4T22dzxqcwem8Kv391H\nm8fO9o0zrOibiLf0/XISYl18ed5Yp6Nw/zUTqT7RystbjzgdxUSpgIq+iCwUkd0iUiYiD55h+70i\nsl1EikXkAxHJ860fKyItvvXFIvJksH8AY86lurGVvxRXcXvBKFIGsMfO2Vyem8bkzME8/cF+u0vX\nOKLHoi8ibuAJoBDIA+44XdT9LFPV6ao6E/gZ8Ljftn2qOtP3uDdYwY0JxG8+2k+nV/na5eOdjgJ0\n9eT56mVj2XnkBGvL652OY6JQIGf6s4EyVS1X1XZgObDYfwdVPeG3mAzYKYxxXHO7h2XrD1E4fUSf\nZsPqL4tnZpOaHMczH9hEK2bgBVL0swH/7gaVvnWfICL3icg+us70H/DbNE5EtojIeyJy+XmlNaYX\nVhYf5mSrh78J0lj5wZIQ6+aLc8fw5s4aymubnI5jokzQLuSq6hOqOgH4PvAD3+ojwGhVnQV8B1gm\nIp8a8ERE7hGRIhEpqq2tDVYkE8VUld+tPciUrMFcNGa403E+5c65Y4hzu/ifjw44HcVEmUCKfhUw\nym85x7fubJYDNwOoapuq1vuebwL2AZO6H6CqS1W1QFUL0tPTA81uzFltPtTAjiMn+PK8sf06kmZf\npQ+Op3B6Fn/eUkVLu3XfNAMnkKK/EcgVkXEiEgcsAVb67yAi/qNX3Qjs9a1P910IRkTGA7lAeTCC\nG3Muv197gMHxMSye2X/j5Z+vL8wezclWDy9vO+x0FBNFeiz6quoB7gfWADuBFapaKiKPicgi3273\ni0ipiBTT1Yxzl2/9FcA23/rngXtV9VjQfwpj/NQ1tbFqezW3XJRDcrxzN2P1ZPa4FCZmDLLxeMyA\nCugvQlVXAau6rXvE7/m3znLcC8AL5xPQmN5aUVRBe6eXL811bsiFQIgId8wezY9e2cGOwyfIG9n/\n4/sbY3fkmoiiqjxXVPnxWXSou+XCbOJiXCzbcNDpKCZKWNE3EaXo4HH2153i8wWjet45BAxLiuMz\n00fwly2HOdVmY+2b/mdF30SU54oqSI5zc8P0LKejBOyOOaNpavOwusTm0TX9z4q+iRin2jy8su0I\nn7lgpKOjafZWwZjhjE1N4oVNlU5HMVHAir6JGK9uP0Jzeye3FeQ4HaVXRITPXZjD2vJ6Ko41Ox3H\nRDgr+iZiPF9Uyfi05JC8A7cnn53VNbLJi1vOdd+jMefPir6JCPvrTrHhwDFuLcgJyTtwezIqJYl5\n41P58+ZKG3LZ9Csr+iYivLCpEpfALReGV9OOv1suyuFAfTObDh53OoqJYFb0TdhTVf5SXMWlE9PI\nHJLgdJw+K8zPIinOzQub7YKu6T9W9E3Y23zoOJXHW7h55qdG/A4ryfExLMzP4pWtR2jtsEHYTP+w\nom/C3kvFh4mPcbFgWqbTUc7bLRfmcLLNw1s7a5yOYiKUFX0T1jo6vby67Qjzp2YyOCHW6Tjnbe74\nVNIHx7Nyq/XiMf3Dir4Jax+W1VF/qj2kh1DuDbdLuHH6CN7ZXcuJ1g6n45gIZEXfhLWXig8zJCGG\nKydHzuQ7i2aOpN3jZY0Ny2D6gRV9E7Za2jtZU1rNDdNHEB/jdjpO0MwaNYxRKYms3GqTq5jgs6Jv\nwtabO4/S3N7J4jDvtdOdiHDTBSP5aF89dU1tTscxESagoi8iC0Vkt4iUiciDZ9h+r4hsF5FiEflA\nRPL8tj3kO263iFwfzPAmur1UfJisIQnMHpfidJSgWzRzJJ1eZdX2I05HMRGmx6Lvm+P2CaAQyAPu\n8C/qPstUdbqqzgR+BjzuOzaPrjl1pwELgf88PWeuMeejobmd9/bUcNOMEbhd4TfsQk+mZA1hUuYg\nVhZbE48JrkDO9GcDZaparqrtwHJgsf8OqnrCbzEZOD14yGJguaq2qep+oMz3esacl9d3HKWjU7lp\nRmT02jmTRTNGUnTwOFUNLU5HMREkkKKfDVT4LVf61n2CiNwnIvvoOtN/oJfH3iMiRSJSVFtbG2h2\nE8XWlFSTPSyR6dlDnY7Sb05/oL1sF3RNEAXtQq6qPqGqE4DvAz/o5bFLVbVAVQvS0yOn653pHydb\nO/jr3joW5meF5YiagRqTmsyMnKG8us3a9U3wBFL0qwD/CUdzfOvOZjlwcx+PNaZHb++qob3TS2F+\n+EyJ2Fc3TB/B9qpGm1zFBE0gRX8jkCsi40Qkjq4Lsyv9dxCRXL/FG4G9vucrgSUiEi8i44BcYMP5\nxzbRbE1pNemD47lwdPhNltJbhfkjAFhdYmf7Jjh6LPqq6gHuB9YAO4EVqloqIo+JyCLfbveLSKmI\nFAPfAe7yHVsKrAB2AK8B96mqDR9o+qylvZN3dtVy/bRMXBHYa6e70alJ5GcPYdV2uzvXBEdAs0er\n6ipgVbd1j/g9/9Y5jv0x8OO+BjTG33t7amnp6Pz4DDga3DB9BD97bTdVDS1kD0t0Oo4Jc3ZHrgkr\na0qrGZYUG5E3ZJ3N6Q+412wsHhMEVvRN2Gj3eHlz51Gum5pJrDt6fnXHpSUzdcQQVtvduSYIoucv\nx4S9D/fVcbLVQ+H0yO+1090N+VkUHTxOdWOr01FMmLOib8LGmpJqBsXHcOnENKejDLjC6aebeOxs\n35wfK/omLHg6vby+4yjXTMmIqGGUAzUxYxCTMgexytr1zXmyom/CwoYDxzh2qj0qbsg6m8L8EWw8\ncIyak9bEY/rOir4JC2tKqkmIdUXUDFm9dcP0EajCmtKjTkcxYcyKvgl5Xq/yWmk1V05KJykuoFtL\nItKkzEFMSE+2XjzmvFjRNyFvS0UDR0+0RdUNWWciItwwfQTryuuptxm1TB9Z0Tchb01pNbFu4eop\nGU5HcdzC/Cy82jWfgDF9YUXfhDRVZXXJES6dmMbQxFin4zgub8QQRqcksdp68Zg+sqJvQlrp4RNU\nHGuJ6l47/kSEwvwsPiqro7G5w+k4JgxZ0TchbU1pNS6B+VMznY4SMhbmZ+HxKm/utCYe03tW9E1I\nW11SzZxxqaQOinc6SsiYkTOMEUMTrInH9IkVfROyympOUlbTFJVj7ZyLyyVcPy2L9/fW0tTmcTqO\nCTMBFX0RWSgiu0WkTEQePMP274jIDhHZJiJvicgYv22dIlLse6zsfqwxZ3N6KOEFeVb0uyvMz6Ld\n4+WdXTVORzFhpseiLyJu4AmgEMgD7hCRvG67bQEKVPUC4HngZ37bWlR1pu+xCGMCtLqkmgtHDyNr\naILTUUJOwdgU0gbF2Rj7ptcCOdOfDZSparmqttM18fli/x1U9R1VPT1z8zq6JkA3ps8O1TdTevhE\n1N+QdTZul7BgWhbv7K6htcNmIDWBC6ToZwMVfsuVvnVnczew2m85QUSKRGSdiNzch4wmCq0p7TqD\nXWhdNc+qMD+L5vZO3ttT63QUE0aCeiFXRL4EFAA/91s9RlULgC8AvxSRCWc47h7fB0NRba39AhtY\nXXKEaSOHMColyekoIWvu+FSGJsZaE4/plUCKfhUwym85x7fuE0RkPvAwsEhVPx4YRFWrfP8tB94F\nZnU/VlWXqmqBqhakp0fvKIqmS3VjK5sPNdgNWT2Idbu4Li+TN3cepd3jdTqOCROBFP2NQK6IjBOR\nOGAJ8IleOCIyC3iKroJf47d+uIjE+56nAZcCO4IV3kSm13dY006gCvOzONnq4cN9dU5HMWGix6Kv\nqh7gfmANsBNYoaqlIvKYiJzujfNzYBDwXLeumVOBIhHZCrwD/ERVreibc1q9vZqJGYOYmDHY6Sgh\n77LcNAbFx/DadmviMYEJaHByVV0FrOq27hG/5/PPctxHwPTzCWiiS31TG+v313Pf1ROdjhIW4mPc\nXDMlg9d3VPPjznxi3Ha/pTk3+w0xIeXNnUfxKlw/zZp2AlWYn8Xx5g427D/mdBQTBqzom5CyuqSa\nUSmJTBs5xOkoYePKyekkxLpsLB4TECv6JmQ0tnTwYVkdhfkjEBGn44SNpLgYrpqUwZrSarxedTqO\nCXFW9E3IeGdXDR2dak07fVA4PYuak21sPnTc6SgmxFnRNyFjdckRMofEM2vUMKejhJ1rpmQQ57Ym\nHtMzK/omJDS3e3hvTy0Lp2XhclnTTm8NTojlstw0XiupRtWaeMzZWdE3IeG93bW0dni53m7I6rOF\n+VlUNbSwvarR6SgmhFnRNyFhdUk1KclxzB6b4nSUsHXd1EzcLrEmHnNOVvSN49o8nby9q4YFeZl2\nc9F5GJ4cx7zxqdbEY87J/sKM4z4sq6OpzWNNO0GwMD+L/XWn2H30pNNRTIiyom8ct3p7NYPjY7h0\nQprTUcLegmmZiHT9mxpzJlb0jaM6Or28sfMo8/MyiYuxX8fzlTE4gYvHpNgY++as7K/MOGpdeT0N\nzR02jHIQLczPYvfRk+yrbXI6iglBVvSNo1aXVJMU5+bKSTZ5TrCc/gC1s31zJlb0jWM6vcrrpdVc\nPSWDhFi303EixshhicwcNYzVJUecjmJCkBV945iNB45R19TODfkjnI4ScQrzsyipOkHFsWano5gQ\nE1DRF5GFIrJbRMpE5MEzbP+OiOwQkW0i8paIjPHbdpeI7PU97gpmeBPeXiupJj7GxVWTrWkn2Ap9\nH6TWxGO667Hoi4gbeAIoBPKAO0Qkr9tuW4ACVb0AeB74me/YFOBRYA4wG3hURIYHL74JV16vsrrk\nCFdNTic5PqAJ3EwvjE5NIm/EEGviMZ8SyJn+bKBMVctVtR1YDiz230FV31HV098j1wE5vufXA2+o\n6jFVPQ68ASwMTnQTzrZUNHD0RNvHZ6Qm+Arzs9h8qIHqxlano5gQEkjRzwYq/JYrfevO5m5gdR+P\nNVFi9fYjxLqFa6ZmOB0lYhVO7+rFs6bUmnjM/wrqhVwR+RJQAPy8l8fdIyJFIlJUW1sbzEgmBKkq\nq0uquTw3nSEJsU7HiVgTMwYzMWOQNfGYTwik6FcBo/yWc3zrPkFE5gMPA4tUta03x6rqUlUtUNWC\n9HS7qBfpSqpOUNXQYjdkDYDC/Cw27D9GfVNbzzubqBBI0d8I5IrIOBGJA5YAK/13EJFZwFN0Ffwa\nv01rgAUiMtx3AXeBb52JYqtKjhDjEhbkZTodJeItzM/Cq/D6jqNORzEhoseir6oe4H66ivVOYIWq\nlorIYyKyyLfbz4FBwHMiUiwiK33HHgN+RNcHx0bgMd86E6VUldXbjzBvQirDkuKcjhPx8kYMYXRK\nko2xbz4WUF85VV0FrOq27hG/5/PPcewzwDN9DWgiy67qkxyob+aeKyY4HSUqiAiF+Vk8/cF+Gps7\nGJpk11Cind2RawbU6pJqXNI1BLAZGAvzs/B4lTd3WhOPsaJvBpCq8uq2w8wel0LaoHin40SNGTnD\nGDE0wZp4DGBF3wygXdUn2Vd7is9cMNLpKFHF5RKun5bF+3traWrzOB3HOMyKvhkwL289jNvV1cZs\nBlZhfhbtHi/v7KrpeWcT0azomwGhqryy7QiXTEgl1Zp2BlzB2BTSBsXZAGzGir4ZGNsqGzl0rJmb\nZljTjhPcLmHBtCze2V1Da0en03GMg6zomwHxyrbDxLqF6/OsaccphflZNLd38t4eG+okmlnRN/3O\n6+1q2rlyUrr1E3fQ3PGpDE2MtSaeKGdF3/S7zYeOc6Sx1XrtOCzW7eK6vEze3HmUdo/X6TjGIVb0\nTb97eeth4mNczLexdhxXmJ/FyVYPH5RZE0+0sqJv+lWnV3l1ezXXTMlgkM2Q5bjLctMYmhjLS8WH\nnY5iHGJF3/Sr9eX11DW1Wa+dEBEf4+aG6SN4vfQop+xGrahkRd/0q5e3HSEpzs3Vk22GrFBx88yR\ntHR08oYNtxyVrOibftPm6WTV9iNcl5dJYpzb6TjG5+KxKYwcmsBfij81n5GJAlb0Tb95Z1ctjS0d\nfHaWTYscSlwuYfGsbP66t446m1Er6ljRN/3mxS2VpA2K57KJaU5HMd3cPDO76yL7Nps/N9oEVPRF\nZKGI7BaRMhF58AzbrxCRzSLiEZFbu23r9M2m9fGMWibyNTS38/auGhbPHEmM284tQs3krMFMyRrM\ni1usiSfa9PjXKCJu4AmgEMgD7hCRvG67HQK+Aiw7w0u0qOpM32PRGbabCPTq9iN0dKo17YSwm2dl\nU1zRwIG6U05HMQMokFOw2UCZqparajuwHFjsv4OqHlDVbYDd5mcAeHFzFbkZg5g2cojTUcxZLJox\nEhGsz36UCaToZwMVfsuVvnWBShCRIhFZJyI39yqdCUuH6pspOnicz16YjYg4HcecxchhicwZl8JL\nxVWoqtNxzAAZiMbWMapaAHwB+KWIfGpGbBG5x/fBUFRba7eHh7sXt1Qh0nWx0IS2m2dmU153iq2V\njU5HMQMkkKJfBYzyW87xrQuIqlb5/lsOvAvMOsM+S1W1QFUL0tPTA31pE4JUlb8UVzF3XCojhyU6\nHcf04IYLRhAf4+L5TRU972wiQiBFfyOQKyLjRCQOWAIE1AtHRIaLSLzveRpwKbCjr2FN6CuuaGB/\n3Sm7gBsmhiTEcsP0EbxUfNgmV4kSPRZ9VfUA9wNrgJ3AClUtFZHHRGQRgIhcLCKVwG3AUyJS6jt8\nKlAkIluBd4CfqKoV/Qi2oqiSxFg3hdNtspRwcdtFOZxs9bCm1MbZjwYBDXuoqquAVd3WPeL3fCNd\nzT7dj/sImH6eGU2YaG738PLWw9wwfQSDE2yylHAxd3wqOcMTea6oksV2HSbi2V0zJmhWba+mqc3D\n7ReP6nlnEzJcLuHWi3L4cF8dlcebnY5j+pkVfRM0KzZWMD4tmYvHDnc6iumlWy/q+qL+wia7QzfS\nWdE3QbGvtokNB45xW8Eo65sfhnKGJ3HphDSe21SB12t99iOZFX0TFCuKKnC7hFsusjbhcHVbQQ6V\nx1tYV17vdBTTj6zom/PW0enlhU1VXDMlg4zBCU7HMX10/bQsBifEsKLI+uxHMiv65ry9s6uGuqY2\nbi+wC7jhLCHWzWdnZbNqezXHTrU7Hcf0Eyv65rwt31hBxuB4rppsd1OHuy/NHUN7p5fn7Gw/YlnR\nN+el4lgz7+yuYcnFo2zc/AgwKXMws8emsGzDIbugG6Hsr9Sclz+sP4hLhDvmjHY6igmSL84dzcH6\nZv5aVud0FNMPrOibPmvt6GTFxgqum5rJiKE2uFqkWJifRWpyHH9Yd9DpKKYfWNE3ffbqtiMcb+7g\ny/PGOB3FBFF8jJvbLx7FWzuPUtXQ4nQcE2RW9E2f/X7dQSakJzNvQqrTUUyQ3TF7NAos33DI6Sgm\nyKzomz7ZXtlIcUUDd84dY3fgRqBRKUlcPTmD5RsraPfYLKiRxIq+6ZPfrT1AUpybz130qcFVTYT4\n8rwx1J5s45VtNoduJLGib3qt5mQrLxUf5nMXZjPEhlCOWFdOSic3YxBPf7Df5tCNIFb0Ta/9fu1B\nOrxe7r5svNNRTD8SEe6+bBylh0+wrvyY03FMkARU9EVkoYjsFpEyEXnwDNuvEJHNIuIRkVu7bbtL\nRPb6HncFK7hxRkt7J39Yd5D5UzMZl5bsdBzTz26elU1qchxPf1DudBQTJD0WfRFxA08AhUAecIeI\n5HXb7RDwFWBZt2NTgEeBOcBs4FERscHWw9jzmys53tzB1y+3s/xokBDr5otzx/DmzhrKa5ucjmOC\nIJAz/dlAmaqWq2o7sBxY7L+Dqh5Q1W1A98v81wNvqOoxVT0OvAEsDEJu4wCvV3nmg/3MyBlqE6VE\nkTvnjiHO7eI3Hx5wOooJgkCKfjbgP/pSpW9dIAI6VkTuEZEiESmqra0N8KXNQHtz51H2153ia5eP\nt26aUSR9cDw3zxrJc5sqbPTNCBASF3JVdamqFqhqQXq6jdQYilSVp94vJ3tYIoX5WU7HMQPsnivG\n0+bx8psP9zsdxZynQIp+FeA/UHqOb10gzudYE0LW7qtn08Hj3HvleBtNMwpNzBhMYX4Wv/3wAI0t\nHU7HMechkL/ejUCuiIwTkThgCbAywNdfAywQkeG+C7gLfOtMmPn3t/eSMTie22yilKj1zasmcrLN\nw+/XHnA6ijkPPRZ9VfUA99NVrHcCK1S1VEQeE5FFACJysYhUArcBT4lIqe/YY8CP6Prg2Ag85ltn\nwsjGA8dYV36Me64YT0Ks2+k4xiH52UO5ZkoGT3+wn1NtHqfjmD4K6Hu6qq5S1UmqOkFVf+xb94iq\nrvQ936iqOaqarKqpqjrN79hnVHWi7/Gb/vkxTH/697f2kpocxxfn2Gia0e6+qydyvLmDZettILZw\nZY2z5pyKKxr46946vnb5eBLj7Cw/2l00ZjiXTkzlqffLaWnvdDqO6QMr+uacHn9jD8OSYrnTxsw3\nPt+6dhJ1TW389qMDTkcxfWBF35zVR/vqeH9PLd+8agKD4mOcjmNCxOxxKVw1OZ0n39tnPXnCkBV9\nc0aqys9e282IoQl8ed5Yp+OYEPP310+msaWDpe/vczqK6SUr+uaM1pQepbiigW/Pz7UeO+ZTpo0c\nyk0zRvLMBweoOdnqdBzTC1b0zad4Or38y+u7mZCezC0X2iQp5sy+e90kOjq9/OrtMqejmF6wom8+\nZUVRJWU1TXxvwWS7+9ac1di0ZG6/eBTL1h+irOak03FMgOwv2nxCY3MH//L6bmaPS2GhjbFjevCd\n6yaRGOfmsVd22uxaYcKKvvmEX761h4bmdh69Kc9G0jQ9Sh0Uz7fnT+L9PbW8tbPG6TgmAFb0zcf2\nHj3J79YeZMns0UwbOdTpOCZMfHneGCakJ/NPr+6gzWM3bIU6K/oG6Oqi+cOXd5Ac5+Z7CyY7HceE\nkVi3i0dumsaB+mae+eCA03FMD6zoGwBeKj7MB2V1fHfBZFKS45yOY8LMlZPSWZCXyb+9tYeD9aec\njmPOwYq+ob6pjR++XMqs0cP40lwbbsH0zWOL84lxufi/L263i7ohzIq+4Z9e3UlTm4ef3nIBbpdd\nvDV9kzU0ge8XTuHDsnpe2GxzJYUqK/pR7t3dNby4pYq/vWoikzIHOx3HhLkvzh5NwZjh/OiVHdSe\nbHM6jjkDK/pR7Pipdr7/wja8LA3+AAAOHElEQVRyMwZx39UTnI5jIoDLJfzklum0dHTy4AvbrJkn\nBAVU9EVkoYjsFpEyEXnwDNvjReRPvu3rRWSsb/1YEWkRkWLf48ngxjd9pao89OftHDvVzi+XzCQ+\nxsbXMcExMWMwDxVO4a1dNfzBJlsJOT0WfRFxA08AhUAecIeI5HXb7W7guKpOBP4V+Knftn2qOtP3\nuDdIuc15eq6oktdKq/negsnWJ98E3VcuGcuVk9L5p1d22BANISaQM/3ZQJmqlqtqO7AcWNxtn8XA\n//iePw9cK3Y7Z8jaV9vEP75cyrzxqXz98vFOxzERSET4+W0XkBwfwwN/LKa1w27aChWBFP1soMJv\nudK37oz7+CZSbwRSfdvGicgWEXlPRC4/0xuIyD0iUiQiRbW1tb36AUzvNLV5+MbvN5EQ6+bx22fg\nst46pp9kDE7gX267gB1HTvD//lJi7fshor8v5B4BRqvqLOA7wDIRGdJ9J1VdqqoFqlqQnp7ez5Gi\nl6ryD89vpby2iV/dMYsRQxOdjmQi3DVTMnng2lye21TJs9a+HxICKfpVwCi/5RzfujPuIyIxwFCg\nXlXbVLUeQFU3AfuASecb2vTNU++Xs2p7Nd9fOIVLJqY5HcdEiW9fm8vVk9P54culbDp4zOk4US+Q\nor8RyBWRcSISBywBVnbbZyVwl+/5rcDbqqoiku67EIyIjAdygfLgRDe9sWr7EX762i5unD6Ce66w\ndnwzcFwu4Ze3z2LksETu+d0mDtTZMA1O6rHo+9ro7wfWADuBFapaKiKPicgi325PA6kiUkZXM87p\nbp1XANtEpJiuC7z3qqp91A+wogPH+Pafirlw9HB+8fkZNmSyGXBDk2L5zVcuxqvKV36zgfomu3HL\nKRJqF1cKCgq0qKjI6RgRo6zmJLc+uZbhSXG88LeX2GBqxlGbDh7jC/+1nqkjhrDs63NIiotxOlLE\nEJFNqlrQ0352R24E21fbxB3/tZ4Yl4vf/s3FVvCN4y4ak8K/LZnFtsoG7v5tES3t1pVzoFnRj1D7\n605xx9J1qCp//PocxqQmOx3JGAAW5mfxi8/PYP3+er76241W+AeYFf0ItLv6JEuWrsXjVZ792lxy\nbSA1E2I+OyuHX3x+Buv21/M3v93AydYOpyNFDSv6EWZ9eT23PvkRAH/8+lwmZ1nBN6Hps7Ny+NfP\nz6TowHFue3ItR0+0Oh0pKljRjyAvbz3Mnc9sIGNwPH/+5qVW8E3Iu3lWNs985WIqjjXzuf/8iD1H\nbZye/mZFPwJ4Or38+NUd/N0ftzAjZyjP33sJ2cPsblsTHq6YlM6fvjGP9k4vNz/xIS9vPex0pIhm\nRT/MHT3Ryp1Pb+C//rqfu+aN4dmvzWW49dIxYSY/eyiv/N1lTB0xhL/74xZ++HIp7R6v07EiknWS\nDWMvbz3MD/5SQpunk3+5bQa3XpTjdCRj+ixzSALL75nLP6/ayW8+PMDaffX84vMzbOjvILMz/TBU\n3djKfcs283d/3MK4tGRWPXC5FXwTEWLdLh69aRpP31VA/al2Fv/qQ3755h4bmjmI7I7cMNLu8fLM\nh/v597f24vEqD1wzkXuvnECM2z67TeQ5fqqdR1eWsnLrYUanJPHwjVNZkJdpw4icRaB35FrRDwOe\nTi8vbqniP94u49CxZuZPzeSRz+QxOjXJ6WjG9LsP9tbxw5dL2VvTxLzxqfyf6yYxe1yK07FCjhX9\nCNDa0clLxVX8+t19HKhvJj97CN9bMJmrJmc4Hc2YAeXp9PLs+kP8x9tl1DW1MXd8CvddPZHLJqbZ\nmb+PFf0wdrD+FMvWH+JPRRU0NHcwbeQQvj1/EvOnZtgvuIlqLe2d/HHDIZ58bx81J9sYn57Ml+aM\n4ZaLchiaGOt0PEdZ0Q8zNSdaeWXbEVZuPUxxRQNul3D9tEzumjeW2eNSrNgb46e1o5PVJUf4/dqD\nbD7UQFyMi6smpXPjBSOYPzWT5Pjo65hoRT/EdXR62XKogff21PD+njpKDjeiCnkjhnDTjJHcPGuk\nTWdoTABKqhr58+YqXt1+mKMn2oiPcTFnfCpX5KZxeW46kzIHRcVJU1CLvogsBP4NcAP/rao/6bY9\nHvgdcBFQD9yuqgd82x4C7gY6gQdUdc253isSi77Xq1Q1tLC9qpHiiga2HDrO9qpGWju8uF3ChaOH\ncUVuOoXTs5iYYUMnGNMXXq9SdPA4q0uO8Ne9dZTVNAGQmhzHjFHDmOl75GcPjchhxgMt+j1+B/JN\nd/gEcB1QCWwUkZWqusNvt7uB46o6UUSWAD8FbheRPLqmV5wGjATeFJFJqhpxnW7bPJ0cbWzjcGML\nRxpbqDreQllNE3trmthX20RrR9fdhXFuF9Oyh3DH7NHMGZfCvAlpUd8WaUwwuFzC7HEpH/fsqWpo\n4a97atl44DhbKxt4e1fNx/sOT4plYsYgJmYMYkL6IEYOS+x6DE0gbVA8LlfkfjMIpOFrNlCmquUA\nIrIcWAz4F/3FwD/6nj8P/Eq6vk8tBparahuw3zed4mxgbXDi952q4lXo9CqdXqXd46Wlo5OWjk6a\n2z20dnTS0v6/61raPTS2dNDQ3MHx5g4amtt9z9upa2qn7gzTv2UPS2RCxiDmjEtlYsYgpo4YTN7I\nIcTHuB34iY2JLtnDElkyezRLZo8G4ERrB9sqGtlVfYJ9tU3sPdrE6pJqGpo/OaxzrFvIGJxASnIc\nw5JiGZ4Ux/CkWIYnxzEsMZakuBgS49wkxblJjHOTHBfz8fO4GBdxbhcxbhcxLiHW7cIdYh8ggRT9\nbKDCb7kSmHO2fVTVIyKNQKpv/bpux2b3Oe05HD/Vzm1Prf24iHd6FY/X6/dcu23r27WMGJcwLKnr\nl2FYYiw5w5OYkTOMEcMSGDk0seu/wxIZOTSRxDgr7saEiiEJsVyWm8ZluWkfr1NVGpo7ur6hN7Ry\npLGFw42tHG1s5VhzO8ebOzh0rJnjp9o50erp0/u6BGLcLmJd0vVf3weCS7q+nbhEcLsEEZg2cij/\nccesYP3IZxQSl7hF5B7gHoDRo0f36TVi3MKkzEG4XV3/oG6X4BbB7ZYzL4t07evu+kePj3GRGOcm\nMdZNQqz7fz/J/ZaHJsaSHOeOiotCxkQDEWF4chzDk+N6HOPH0+nlRKuH5nYPLe2dNLd3csrveUt7\nJ22dXjo8XjxeLx2diqdT6ej00uH14ulUPJ1eOrxd//UqeFXxen2tDqqMTun/zhuBFP0qYJTfco5v\n3Zn2qRSRGGAoXRd0AzkWVV0KLIWuC7mBhvc3OCGW//ziRX051BhjehTjdpGSHBf2F4EDGbRlI5Ar\nIuNEJI6uC7Mru+2zErjL9/xW4G3t6ha0ElgiIvEiMg7IBTYEJ7oxxpje6vFM39dGfz+whq4um8+o\naqmIPAYUqepK4Gng974Ltcfo+mDAt98Kui76eoD7IrHnjjHGhAu7OcsYYyJAoP30bUxeY4yJIlb0\njTEmiljRN8aYKGJF3xhjoogVfWOMiSIh13tHRGqBg+fxEmlAXZDiBJPl6h3L1TuWq3ciMdcYVU3v\naaeQK/rnS0SKAum2NNAsV+9Yrt6xXL0TzbmseccYY6KIFX1jjIkikVj0lzod4CwsV+9Yrt6xXL0T\ntbkirk3fGGPM2UXimb4xxpiziNiiLyLfFREVkbSe9x4YIvIjEdkmIsUi8rqIjHQ6E4CI/FxEdvmy\nvSgiw5zOBCAit4lIqYh4RcTRnhYislBEdotImYg86GQWfyLyjIjUiEiJ01n8icgoEXlHRHb4/h9+\ny+lMACKSICIbRGSrL9cPnc50moi4RWSLiLzSn+8TkUVfREYBC4BDTmfp5ueqeoGqzgReAR5xOpDP\nG0C+ql4A7AEecjjPaSXA54D3nQwhIm7gCaAQyAPuEJE8JzP5+S2w0OkQZ+ABvquqecBc4L4Q+Tdr\nA65R1RnATGChiMx1ONNp3wJ29vebRGTRB/4V+AcgpC5YqOoJv8VkQiSfqr6uqqcnAF1H1wxnjlPV\nnaq62+kcwGygTFXLVbUdWA4sdjgTAKr6Pl1zWIQUVT2iqpt9z0/SVcz6ZX7s3tAuTb7FWN/D8b9D\nEckBbgT+u7/fK+KKvogsBqpUdavTWc5ERH4sIhXAFwmdM31/XwVWOx0ixGQDFX7LlYRAAQsXIjIW\nmAWsdzZJF18zSjFQA7yhqqGQ65d0nah6+/uNQmJi9N4SkTeBrDNsehj4v3Q17TjiXNlU9SVVfRh4\nWEQeAu4HHg2FXL59Hqbra/mzA5Ep0FwmfInIIOAF4Nvdvuk6xjd730zftasXRSRfVR27JiIinwFq\nVHWTiFzV3+8XlkVfVeefab2ITAfGAVtFBLqaKTaLyGxVrXYy2xk8C6xigIp+T7lE5CvAZ4BrdQD7\n8fbi38tJVcAov+Uc3zpzDiISS1fBf1ZV/+x0nu5UtUFE3qHrmoiTF8IvBRaJyA1AAjBERP6gql/q\njzeLqOYdVd2uqhmqOlZVx9L1NfzCgSr4PRGRXL/FxcAup7L4E5GFdH21XKSqzU7nCUEbgVwRGSci\ncXTNAb3S4UwhTbrOup4Gdqrq407nOU1E0k/3ThORROA6HP47VNWHVDXHV7OWAG/3V8GHCCv6YeAn\nIlIiItvoaoIKiW5swK+AwcAbvu6kTzodCEBEPisilcA84FURWeNEDt9F7vuBNXRdkFyhqqVOZOlO\nRP4IrAUmi0iliNztdCafS4E7gWt8v1PFvjNZp40A3vH9DW6kq02/X7tIhhq7I9cYY6KInekbY0wU\nsaJvjDFRxIq+McZEESv6xhgTRazoG2NMFLGib4wxUcSKvjHGRBEr+sYYE0X+PypZFfpx4Ay+AAAA\nAElFTkSuQmCC\n", 55 | "text/plain": [ 56 | "
" 57 | ] 58 | }, 59 | "metadata": { 60 | "tags": [] 61 | } 62 | } 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "metadata": { 68 | "id": "VmXKxs-hGWNo", 69 | "colab_type": "code", 70 | "colab": { 71 | "base_uri": "https://localhost:8080/", 72 | "height": 320 73 | }, 74 | "outputId": "8781beb8-b4f8-4984-c483-a81562f7d0b9" 75 | }, 76 | "source": [ 77 | "import matplotlib.pyplot as plt\n", 78 | "import numpy as np\n", 79 | "\n", 80 | "mu, sigma = 0.5, 0.1\n", 81 | "s = np.random.normal(mu, sigma, 1000)\n", 82 | "\n", 83 | "# Create the bins and histogram\n", 84 | "count, bins, ignored = plt.hist(s, 20, normed=True)\n", 85 | "\n", 86 | "# Plot the distribution curve\n", 87 | "plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) *\n", 88 | " np.exp( - (bins - mu)**2 / (2 * sigma**2) ), linewidth=3, color='y')\n", 89 | "plt.show()" 90 | ], 91 | "execution_count": 3, 92 | "outputs": [ 93 | { 94 | "output_type": "stream", 95 | "text": [ 96 | "/usr/local/lib/python3.6/dist-packages/matplotlib/axes/_axes.py:6521: MatplotlibDeprecationWarning: \n", 97 | "The 'normed' kwarg was deprecated in Matplotlib 2.1 and will be removed in 3.1. Use 'density' instead.\n", 98 | " alternative=\"'density'\", removal=\"3.1\")\n" 99 | ], 100 | "name": "stderr" 101 | }, 102 | { 103 | "output_type": "display_data", 104 | "data": { 105 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8VPW9//HXJ5nsQFiSQMhCWAUE\nRI3WFaiKolZsq231Wpd7VerSqrft71d77+92sbe3dvXaahfU1qWIW12oioqKuCECyr4GCNl3spNl\nZr6/P2acTDAhk2Qm38nM5/l4zONxvme+mXmfTPLJyfme8z1ijEEppVRkibEdQCmlVPBpcVdKqQik\nxV0ppSKQFnellIpAWtyVUioCaXFXSqkIpMVdKaUikBZ3pZSKQFrclVIqAjlsvXFaWprJy8uz9fZK\nKTUsbd68ucYYk95XP2vFPS8vj02bNtl6e6WUGpZE5HAg/fSwjFJKRSAt7kopFYG0uCulVATS4q6U\nUhFIi7tSSkUgLe5KKRWBAi7uIhIrIp+KyMs9PJcgIk+LSIGIbBCRvGCGVEop1T/92XO/E9jdy3M3\nAkeMMdOA+4BfDjaYUkqpgQuouItINnAp8HAvXS4HHvMuPwecLyIy+HhK9Y8xhqqqp9m//04aG/Ui\nORW9Ar1C9X+B/wuM7OX5LKAYwBjjFJEGYBxQ499JRJYBywByc3MHklepXjU372D//ttoaHgPgOKS\nB3jl4JW8VHA1ThPXr9cqvPfSUERUasj0WdxF5EtAlTFms4gsGsybGWOWA8sB8vPzzWBeS0W+vLtf\nCahfYmwrX572JIsnrSI2xu1bHyNuLpv6DPMzPuahbd+lqGlKqKIqFXYCOSxzNrBURAqBp4DzROTv\nx/QpBXIARMQBpAK1QcypVA8Mp014j1+cewtLJr/oK+xOdyzFTXm+XjkjC/nRmf/O0qkriRWnpaxK\nDa0+99yNMT8Efgjg3XP/vjHmm8d0WwVcD6wHrgTeNsbonrkKmQkpJXxz1p+Zk7al2/o9dXN4fNet\nlDfncP6kl/najMdIiG3HEePiq9NXcHLGBh7a/u+UNU+ylFypoTHgWSFF5B5gkzFmFfAI8ISIFAB1\nwFVByqdUN/ExbVw29WkunvwCjpiuvfCG9tE8tedG1pcvAjxj+W8eXsr26lO5ae59TB+zB4DJqQX8\n9Kw7eX7ftbxW+GUMsRa2QqnQE1s72Pn5+Uan/FXH0/2Yu+HkjA1cM2s5aUlVvrVuE8Obhy/lhYJv\nctSZ0uPrCC6W5L3IV2c8QZzfH4T9R2bx8Pa7qGzN+tzX6ICqClcistkYk99XP2vzuSsVqPSkCq6Z\n9RfmZ2zstr6g/gQe33kbRU1Tj/v1hlhWF17B1pp8bp57H5NTCwCYPmY395x9B8/uu563Dn8Joxds\nqwiixV2FrbiYDi6e/A++NOVZ4mM7fOubOkbx7N4beK/0gn4V5LLmSfz3R7/h0inPsnTqUzhiXCTE\ntvPNWcs5NWM9j+y4i5qj40OxKUoNOT0so8JSXd2bvLH+OiaklPvWuY2wruQintt3HS2dowb1+rkj\nD3DzvPvIGVnoW3fUmcRTe25kXclFFN77pUG9vlKhEuhhGS3uKuzU17/Hli1fBFy+dYUNU3ls120c\najghaO/jkE4un7aSS6c8R4x0nR+/vfoUbr7sJRITs4P2XkoFixZ3FRYCvRDpM4KLn551F7mjDgHQ\n0pnCP/Zdx9riJSE7s2Vy6l5unnsfE0eU+NYlJuaRn78Nh6O3i7KVsiPQ4q4jSCqsLMx5w1fY210J\n/PjD+3m7+NKQnrJ4qOEEfvzh/aw+9BXcxnMaZVtbIcXFvwrZeyoValrcVdhIdjRzxfQnfO1XDl5J\nzdEJQ/Lene4Ent57I3/b8R3fuuLi39DWVjQk769UsGlxV2Hjy9NWMDK+EYCaoxmsPvTVIc/wfukF\nHGqYBoDb3cbBg/8x5BmUCgYt7iosTEwp4vzcruPzT+35NzrdCUOewxDDyj03+dpVVStobPx4yHMo\nNVha3FUYMPzLrId8E3/trp3LpsqzraXZd2QOaWld/zUUFHwXnSpJDTda3JV1J2dsYE7ap4BnOoEV\nu5fx2fwwtkyd+itEPHPANzZ+QHX1c1bzKNVfWtyVVXExHVw9s+sGX2uLl1DSPNliIo+kpKlkZd3h\nax88+ANcrjaLiZTqHy3uyqoLJ71ERnIFAM0dI3h+/7GzSdszadL/w+EYB0Bb2yFKS39vOZFSgdPi\nrqwZnVDLZVOf9rVfKLhm0NMKBFNc3GgmT/6pr3348M/p6Kg6zlcoFT60uCtrvjbjURIdnkMdJU25\nrC2+xHKiz8vMXEZy8kwAXK5GCgt/bDmRUoHR4q6smJq6h7Oz1vraK3Yvw23C78YZMTFxTJ36G1+7\nrGw5LS07LSZSKjB9FncRSRSRj0Vkq4jsFJGf9tDnBhGpFpEt3sdNPb2WUgCCm2tm/cXX3lRxJrvr\n5ltMdHxjx17CmDGLvS03Bw5832oepQIRyJ57O3CeMeYkYD6wRETO6KHf08aY+d7Hwz08rxQAZ2e9\nzZTR+wHodMXx9N4bLSc6PhFh6tTf8tmvS13da9TWvmY3lFJ96LO4G49mbzPO+9ArOtSAJMa28rUZ\nj/rarxV+heohmj9mMEaMmEtmZtcfoQMHvofb7TzOVyhlV0DH3EUkVkS2AFXAGmPMhh66XSEi20Tk\nORHJCWpKFTGWTn2K1IR6AI60jeXlg1+znChwkyf/jNjYEQC0tu6ivFz/QVXhK6DiboxxGWPmA9nA\n6SIy55gu/wTyjDHzgDXAYz29jogsE5FNIrKpurp6MLnVMDQ+uZQL81b52s/s/VfaXUkWE/VPfPx4\ncnO7JhIrLPwRTmeDxURK9a5fZ8sYY+qBtcCSY9bXGmPavc2HgVN7+frlxph8Y0x+enr6QPKqYezq\nmQ/jiPEcyth/ZCbryxfZDTQA2dl3kZCQC0BnZzWHD/+P5URK9SyQs2XSRWS0dzkJWAzsOaZPpl9z\nKbA7mCHV8Dc3bTPzMzYCnnuhrtj9LWzPHzMQsbFJTJnyS1+7pOR/OXr0kMVESvUskD33TGCtiGwD\nNuI55v6yiNwjIku9fe7wnia5FbgDuCE0cdVwFCud/MvMh3zt90svoLBxusVEg5OR8Q1GjfKcMGZM\nBwcP/sByIqU+z9FXB2PMNuDkHtb/yG/5h8APgxtNRYrzc18h03t/0qPOJJ7bd53lRIPjOTXyPj79\n9EwAqqufpaHhA1JT7U1TrNSx9ApVFVIj4+v58rQnfe2XCq6msWOMxUTBkZp6BhkZV/nanjnf3RYT\nKdWdFncVUldOf5zkuFYAyluyWHP4MsuJgmfKlHsR8dwtqqnpY6qqVlpOpFQXLe4qZJqaNnNu9hpf\ne+Xum3CZOIuJgisxcRI5Od/1tQ8evBuXq9ViIqW6aHFXIWGMYf/+O4kRz8XMW6vy2VZzmuVUwZeb\nezdxcRkAtLeXUFz8O8uJlPLQ4q5Corr6ORobPwDA6Y5l5d7InEvO4RjF5Mk/87WLiu6lvb3cYiKl\nPLS4q5AoLv6tb3nN4aVUtGRbTBNamZk3kpIyFwC3u4VDh/7LciKltLirEGhq2kxTk2f6oU63g1cP\nXWE5UWiJxHpnjfSoqPirzvmurNPiroKutPRB3/LGinNo6hhtMc3QGDt2MWPHXuxtmW7fA6Vs0OKu\ngqqzs7bbKYFvFV1qMc3Qys3tulK1svIJnM5Gi2lUtNPiroKqvPxvuN2e+6KOGHEKB+pnWk40dFJT\nF5CcPBsAl6uZysq/W06kolmf0w8olXf3KwH1E9z8csFvyUj2tO9ffw7DcXKwgRIRsrJuY//+bwOe\nw1MTJ96KSPR8D1T40D13FTRz0zaTkVwBQHPHCDaUL7CcaOiNH39ttxt6NDS8azmRila6566C5vzc\nrj3890oX0+FOtJjGDodjFG8cWsD5ua8C8JfV/8WftgY+a2ThvdEzRqFCS/fcVVCkJ5UzN30z4Jmv\n/e2iSywnsmet37afOv5DUhPqLKZR0UqLuwqK83Jf9U01sL3mFKqPZvbxFZGrpDmPvXUnAuCIcbEw\n+3XLiVQ00uKuBi0+po1zs7omCHu76EsW04QH//9cFuW8Roy4LKZR0UiLuxq0L2S+y4j4ZgCqWsez\nrfoUy4ns21R5Fg3tnou3xibWMj99g+VEKtoEcg/VRBH5WES2em+l99Me+iSIyNMiUiAiG0QkLxRh\nVTgy3QZS1xZdgiHWYp7w4DJxrCu5yNc+zzvAqtRQCWTPvR04zxhzEjAfWCIiZxzT50bgiDFmGnAf\n8EtUVJiaupe81AMAdLjiebd0seVE4eOd4otwG8+v2Jy0LUxIKbGcSEWTPou78Wj2NuO8D3NMt8uB\nx7zLzwHni165ERX899o3lC+gpXOUxTThpa4tgy1Vp/va5+Xo3rsaOgEdcxeRWBHZAlQBa4wxxx5A\nzAKKAYwxTqABGBfMoCr8jIyv57TM93ztaJpHJlD+35Nzst4iPrbNYhoVTQIq7sYYlzFmPpANnC4i\ncwbyZiKyTEQ2icim6urqgbyECiMLst8gLsYJwIH6GRQ2TrecKPzsqj2JipaJACTHtXBG5jrLiVS0\n6NfZMsaYemAtsOSYp0qBHAARcQCpQG0PX7/cGJNvjMlPT08fWGIVFgQXX8xZ7Wu/pac/9sgQ0+20\nSM9hrGOPaioVfIGcLZMuIqO9y0nAYmDPMd1WAdd7l68E3jbG6E9wBJufsZG0JM9/X40do9hYcY7l\nROHr/dILaHclADBp1EGmpu61nEhFg0D23DOBtSKyDdiI55j7yyJyj4gs9fZ5BBgnIgXAd4G7QxNX\nhQv/gdR3Sy6k0x1vMU14a3V2n0TtvNzAZtlUajD6nDjMGLMNOLmH9T/yW24DvhbcaCpcjU8uZU7a\npwC4TUy3uVRUz94qupQF2Z6reE/PfI+Ve26iuTPVcioVyfQKVdVv/nvtW6pOo7Ytw2Ka4eFw4zQO\n1M8AIC7GybnZa/r4CqUGR4u76pf42DbOyXrL19bTHwP3tt/36os5qxF0vhkVOlrcVb+cmfkOyXEt\nAFS0TGRX7XzLiYaPjyvOobljJAAZyZXMTfvEciIVybS4q34wXDDpZV/r7aJLMfojFLBOdwLvlnRN\nz3C+DqyqENLfTBWwGWN2kjOyEIB2ZwLvl55vN9AwtLb4YtzGMzPH3PTNpCdVWE6kIpUWdxUw/z3N\n9eWLaHWOsJhmeKo+msn2Gs+UyDFiWOR3IZhSwaTFXQUkNaGOU8d/6GvrQOrA+Q+sLsheQ1xMh8U0\nKlLpDbJVQBZmv44jxnN2x74jsylummI50fC1rfpUao5mkJZUxcj4Rk6b8D4flp0HQN7dAz8OrzfX\nVv50z131KVac3eeROaxFZDAMsawtutjX1itWVShocVd9OjnjI8Yk1gHQ0D6aTZVnWU40/L1buphO\nt+cf52mj9zJpVIHlRCrSaHFXffIfSH2neAkuE2cxTWRo6hjdbbI1vZGHCjYt7uq4Wlp2MmvcdgBc\n7hjWFh8727MaKP9B6TMmriPZ0Xyc3kr1jw6oquMqLf2jb/mTqjOob0+zmGboDGZgM1AH6mdyuHEK\nk0YdJCG2nXOy3uKNw5eH/H1VdNA9d9Urp7ORysrHfW29IUewSbcbeZyX+wqC22IeFUm0uKteVVY+\ngcvlOVRQ2pTLnrq5lhNFnvXli2jtTAZgQkoZs8Zts5xIRQot7qpHxhhKS//ka79dfAkg9gJFqA5X\nYrdpHM7L0dMiVXBocVc9amz8iNbWnQC0ORP5oPQ8y4ki19rirkMzp4zfwJiEGotpVKQI5B6qOSKy\nVkR2ichOEbmzhz6LRKRBRLZ4Hz/q6bXU8FFevty3vKF8AW2uZItpIlt5Sw67aucBECNuFuW8ZjmR\nigSB7Lk7ge8ZY2YDZwC3i8jsHvq9Z4yZ733cE9SUakg5nQ1UVT3ta79TcpHFNNHBf2D13Ow1xIje\nyEMNTp/F3RhTboz5xLvcBOwGskIdTNlTWbkCt/soACkp8zjUMMNyosj3adUZNLSPBmBsYi3z0jZZ\nTqSGu34dcxeRPDw3y97Qw9NnishWEVktIicGIZuywBhDWVnXIZmJE5ehA6mh5zIO3i+9wNfWQzNq\nsAIu7iIyAvgHcJcxpvGYpz8BJhljTgL+ALzYy2ssE5FNIrKpurp6oJlVCDU1baKlZSsAMTFJZGRc\nYzlR9FhXcqFveV76Zh1YVYMSUHEXkTg8hX2FMeb5Y583xjQaY5q9y68CcSLyuUsZjTHLjTH5xpj8\n9PT0QUZXoVBe/pBvOT3968TFjbaYJrpUtU5kZ81JgGdg9dzsNZYTqeEskLNlBHgE2G2M+V0vfSZ4\n+yEip3tftzaYQVXoOZ1NVFY+6WtPnHizxTTR6Z2Srrl7Fma/gaADq2pgAplb5mzgWmC7iGzxrvsP\nIBfAGPNn4ErgVhFxAkeBq4wxJgR5VQhVVT2F290CQHLybEaN0ql9h9onlWfQ2J7KqIQGxiVVMzft\nE7bVnGY7lhqG+izuxpj36WNEzRjzAPBAsEIpO/zPbc/MvBnvP2NqCLlMHB+Unc/Fkz1HPxflvK7F\nXQ2IXqGqAGhq+pSmJs/pdyIJTJhwreVE0eud4q7rCk5K/5jRCXqEU/WfFncFHDuQegVxceMspolu\nla1Z7K71TNIWG+Pm3Kw3LSdSw5EWd4XL1UJl5Qpf23Nuu7Kp28Bqzus6FbDqNy3uiqqqZ3C5PJcu\nJCXNIDV1geVE6pPKM2nuGAlAWlIVJ6Z9ajmRGm60uKtuh2R0IDU8dLrjeb+saybOhdmvW0yjhiO9\nzV6U6O22cVkjCvn5OesBcLodXPrQRJo6dU7xcLCueAlL8l4C4OSMDaTGH6GhY4zlVGq40D33KOe/\nR7i58kyaOlMtplH+ylty2FvnmabJEePiHB1YVf2gxT2KxcW0c3bW2772Op3aN+y8U6wDq2pgtLhH\nsfzxH5IS57kitap1Aru9N4xQ4WNT5Vm0dKYAkJFcofdYVQHT4h7F/KeVXVdyIUZ/HMJOpzuh2y0O\ndWBVBUp/m6NUZkoxJ4z13CPV6Y7l/ZLFlhOp3vgfLjt1/HpGxtdbTKOGCy3uUcp/D3BL1el6FkYY\nK23OY/+RWQA4YpycM/Ety4nUcKDFPQo5pPOYgdQlx+mtwoH/3vvCnNcBnXRVHZ8W9yh06vgPGRnv\nuSK15mg6O2rmW06k+vJx+Tm0egdWJ6SUMXPsdsuJVLjT4h6FPHt+Hu+WXIgh1mIaFYgOdyIfli3y\ntXVgVfVFi3uUyUguY7b3dDq3ieE9HUgdNvwPzeRP+IARcQ0W06hwp8U9yizMfsO3vLU6nyPtn7vV\nrQpTxU1TOFB/AgBxMc5u4yZKHSuQe6jmiMhaEdklIjtF5M4e+oiI/F5ECkRkm4icEpq4ajBipbPb\nJezrivWK1OHG/zNblK0Dq6p3gey5O4HvGWNmA2cAt4vI7GP6XAxM9z6WAX8KakoVFCdnfExqgucc\n6bq2cWyrybecSPXXhopzOepMAiBzRAkzxuy0nEiFqz6LuzGm3BjziXe5CdgNZB3T7XLgcePxETBa\nRDKDnlYNykK/K1LfK1mM2+hA6nDT7kpivd/A6iIdWFW96NcxdxHJA04GNhzzVBZQ7Ncu4fN/AJRF\naUkVnDhuCwBuI7xbcqHlRGqg/CcTO23C+6TENVlMo8JVwMVdREYA/wDuMsY0DuTNRGSZiGwSkU3V\n1dUDeQk1QAuy1xAjnuOzO2pOobYtw3IiNVBFTVM51DANgLjYTs6aqAOr6vMCKu4iEoensK8wxjzf\nQ5dSIMevne1d140xZrkxJt8Yk5+enj6QvGoA3G4n52at8bXf0YHUYa/bVMA6sKp6EMjZMgI8Auw2\nxvyul26rgOu8Z82cATQYY8qDmFMNQl3dK4xJrAOgoX00W6tPt5xIDdaG8gW0ORMByB5ZxLTReywn\nUuEmkD33s4FrgfNEZIv3cYmI3CIit3j7vAocBAqAh4DbQhNXDURZWdc9Ut8tWYzL6N0Vh7s2VzIf\nlS/0tf2nb1YKAriHqjHmfeC4d0w2xhjg9mCFUsHT1lZMXd1qX1sHUiPHuuKLWOSdSuL0Ce/R2XmE\nuDid3VN56BWqEa6i4q/gvTXbjpr5VB/VM1QjxaHG6RxunAJAfGwHlZUrLCdS4USLewRzuzspL3/Y\n19Z7pEYa6TawWl6+HM8/0UppcY9o1dXP0d5eAkBjeyqfVp5hOZEKto/KF9LuTACgpWU7TU0fW06k\nwoUW9whljKG4+De+9ltFl+I0cRYTqVA46kxhQ8UCX7us7M8W06hwosU9QtXXr6O5+RMAYmISebvo\nUsuJVKj4H5qprHyS9nY9C1lpcY9YJSW/9S2PH389TZ2pFtOoUDrYMIMC71TAxnRQWvoHy4lUONDi\nHoFaWnZTW/uytyXk5Py71Twq1ITVh67wtcrK/oTTqfPNRDst7hGopOQ+3/K4cZeRnHyCxTRqKHxS\n+QWSkjzzzTid9ZSXP2I5kbJNi3uE6eiopKLicV87J+f7FtOooWKIJTv7e752Scl9uN2dFhMp27S4\nR5jS0j9iTDsAI0eeTmrqOZYTqaEyYcL1xMV5JuRrby+iuvpZy4mUTVrcI4jL1Upp6YO+dk7O9/DM\n+6aiQWxsEllZ3/a1i4t/rRc1RTEt7hGkouIxnM5aABIT80hL+6rlRGqoTZx4GzExntvwNTdv4ciR\ntywnUrZocY8Qxri6DaRmZ99FTIzO/hht4uPTmDDh33zt4uJfW0yjbNLiHiFqav7J0aP7AXA4Rnf7\nBVfRJSfnu3z2q33kyBs0N2+1G0hZocU9QvhftJSZ+S0cjpEW0yibkpKmkJ7edd67/zQUKnpocY8A\njY0baGh4HwCROLKzv2M5kbItJ+f/+Jarqp6ira34OL1VJNLiHgGKi7v22jMyriYhIctiGhUORo06\njdRUz52ajHFSUnK/5URqqAVyD9W/ikiViOzo5flFItLgdwu+HwU/purN0aOHqK7+h6+dk/O94/RW\n0SQ3t2vvvbx8OU5ng8U0aqgFsuf+KLCkjz7vGWPmex/3DD6WClRJyf/y2Z2WxoxZzIgR8+wGUmFj\n7NiLSU6eBYDL1URZ2V8sJ1JDqc/ibox5F6gbgiyqnzo7j3SbQ0SnGlD+RGK6/UyUlNyP291hMZEa\nSsE6EfpMEdkKlAHfN8bs7KmTiCwDlgHk5uYG6a2jV1nZX3C7WwBISZnLmDGLLSdSNuXd/crn1jlk\nLL9eOJYxiXV0dJRx3YM/4P3SCz7Xr/Bene8/0gRjQPUTYJIx5iTgD8CLvXU0xiw3xuQbY/LT09OD\n8NbRy+3uoLT09762TjWgeuI0cbx5+DJfe0ne84BOSRANBl3cjTGNxphm7/KrQJyIpA06mTquqqqV\ndHR47rgTH59JRsbVlhOpcLW2+GKOOj1TEmSPLGJe2ibLidRQGHRxF5EJ4t1lFJHTva9ZO9jXVb3z\n3B+16/THrKw7iImJt5hIhbNW5wjWlVzoa188+XmLadRQCeRUyJXAeuAEESkRkRtF5BYRucXb5Upg\nh/eY+++Bq4xORRdSR46soaVlOwAxMSlMnPgty4lUuHuj8HJcbs+v+6xx28kbtd9yIhVqfQ6oGmOO\n+/++MeYB4IGgJVJ98r+cPDPzRuLixlhMo4aDurYMNlQs4KyJ7wCevfc/bf2B3VAqpPQK1WGmuXkr\nR46s8bZiyM6+y2oeNXysPtQ1BfRpEz4gPanCYhoValrch5ni4t/5ltPTryApabLFNGo4KW6awo6a\nkwGIETcX5fV6YpuKAFrch5H29lKqqp70tXWqAdVf/nvv52atISWu0WIaFUpa3IeRkpI/YIwTgNTU\ncxg16guWE6nhZmftfA43TgEgwdHOeTmvWk6kQkWL+zDhdDZRVvZnX1unGlADI7x26Cu+1gWTXiYu\nRqckiERa3IeJioq/4nJ5ZvVLSprOuHGX9fEVSvXs44pzqT3quUI8NaGesya+bTmRCgW9yeYw4HY7\n+Wj7/5DmuciQP29azDdeWG03lBq2XMbBG4eXcvVMz6RzSya/gDH3I6L7epFEP81hoKbmedKSqgBo\n6hjFB6XnWU6khrt1xRfR2pkCQGZKKTU1qywnUsGmxT3MeaYa6Lpo6a2iS+lwJ1pMpCJBmyuZtcUX\n+9rFxb+2mEaFghb3MFdb+zJNTRsB6HTF8VaRTs2qgmPN4ctwuj1HZhsbP6Sh4UPLiVQwaXEPYy5X\nGwUFd/ra75YupqljtMVEKpLUt4/jw7JFvnZR0a/shVFBp8U9jBUX/4q2tkMANHeM5IX911hOpCLN\na34XNdXWvsSRI2stplHBpGfLDKGe7pTTm7SkSv7nnJ8TH+tp/2P/tTR3poYomYpWZS25rC9byJkT\n1wGwf/9t5Odv1SmkI4DuuYepq054mPhYz8UlhQ1Teaf4IsuJVKR6eu+/ERs7EoDW1j2UlNxnOZEK\nBi3uYWhO2mbyJ6z3tZ/YfQuGWIuJVCSrbx9HXt49vnZh4T20tRVZTKSCQYt7mHFIJ9+c9Rdf+72S\nCzhQP8tiIhUNsrK+TUrKPADc7lYKCnQq6eFOi3uYuTDvJSaklAHQ2pnMs/uut5xIRYOYGAczZvzR\n166peYHaWp1UbDgL5DZ7fxWRKhHZ0cvzIiK/F5ECEdkmIqcEP2Z0GJNQw9KpT/naLxRcQ2OH3mVJ\nDY3U1LOZMOFffe39+7+Dy3XUYiI1GIHsuT8KLDnO8xcD072PZcCfBh8rOl018xESHW0AFDdN4q2i\nL1lOpKLNlCm/xOHw7FC0tR2kqOiXlhOpgeqzuBtj3gXqjtPlcuBx4/ERMFpEMoMVMFrMHLuNL2S+\n52v/fdctuI0OoqqhFR+fzpQpv/C1i4rupbW1wGIiNVDBOM89Cyj2a5d415Uf21FEluHZuyc3NzcI\nbx0ZYsXJtbO65mpfX7aQvUfmWkykoo3/NRhCJv91xnSmjN6PMe08/MpV/G7zTwDp9esL79VpMcLN\nkA6oGmOWG2PyjTH56enpQ/nWYe383JfJGuk59eyoM4mn9/6b5UQqmhlieWzX7biNp5jPS9/MqePX\n9/FVKtwEo7iXAjl+7WzvOhXnGvJ9AAARHklEQVSA1IQ6vjJ9ha+9quAq6tvHWUykFBxunMbaokt8\n7X+ZuZyEWB1cHU6CUdxXAdd5z5o5A2gwxnzukIzq2ddn/I0kh+eXpqw5mzcOL7WcSCmPf+y/loZ2\nz0R145K6n8mlwl8gp0KuBNYDJ4hIiYjcKCK3iMgt3i6vAgeBAuAh4LaQpY0w00fv5OysromaVuz+\nFi4TZzGRUl1anSN4em/XqZEX5b3IxBS9cnW46HNA1RhzdR/PG+D2oCWKEjHi4trZXYOoGyvOYmft\nyRYTKfV5H5adx8LsNzhh7E4cMS6uO/GP3PvxLzje4KoKD3qFqiVfzFlN7ijPdL7trgRW7rnJciKl\neiI8vutWXG5PqZg5dgdnZr5jN5IKiBZ3C0bGNfDV6U/42i8f+Dp1bRkWEynVu9LmPN44fLmvfdXM\nR0h2NFtMpAKhxd2CK2c8RkpcCwCVLZm8VvgVy4mUOr6XCq6mrs1zFldqQn23M7xUeNLiPsQmp+5l\nYc4bvvaK3cvodOuNEVR4a3Mls3L3zb72+bmvMGmUXrkazrS4DyHB3W0Q9dOq09lWc5rFREoFbmPl\n2Wyv8Qz6x4ib62b/CcFtOZXqjRb3IbQg+w2mpO4HoNMVx5O7l1lOpFR/CH/fdQudbs9JdlNH7+Xc\n7DWWM6neaHEfIp2ddVw543Ff+5VDV1J9dILFREr1X2VrFqsPXuFrf33Go4yIa7CYSPVGi/sQOXTo\nvxgZ3whAzdEMXvX7BVFqOPnnwa9T1ToegBHxTXxtxmOWE6meaHEfAvX171FW1nWs/cndN9HhTrSY\nSKmB63QnsGL3t3zthTlv0NCgE4uFGy3uIdbSsosdOy4H78DT9upT+KTqTLuhlBqkrdWn80nlGb72\nvn236l2bwowW9xBqby9l27YlOJ1HAGhoH81ju25HL91WkWDF7mW0uxIAaGnZyq5dV+F2Oy2nUp/R\n4h4inZ31bNt2Me3tnvuYxMaO4Hebf0LN0fGWkykVHLVtGTyz94audu0q9u1bhme6KWWbFvcQcLvb\n2bnzK7S0bAdAxMGJJ/6Dw43TLCdTKrjeKrqMV/xODqio+BsHD/7AYiL1GS3uQWaMm927r6O+/h3f\nuhNO+Ctjx15oL5RSIfTsvhuYMKHr7mHFxb+mqOhXFhMp0OIeVMYYCgq+S3X1M751U6bcy4QJ11pM\npVSoCTNm/IW0tC/71hw8+APKy/9qMZPS4h5ExcW/obT0fl87K+s75OT8X4uJlBoaMTEOZs1ayejR\ni3zr9u69merqF+2FinIBFXcRWSIie0WkQETu7uH5G0SkWkS2eB9RNzl5RcXfOXiwq5Cnp1/JtGn3\nIaJnxqjoEBubyJw5LzFixGc3nXGza9dVHDnyjs1YUSuQ2+zFAg8CFwOzgatFZHYPXZ82xsz3Ph4O\ncs6wVle3hr1+tyNLTV3IzJlP4PnWKRU9HI5RzJu3mqQkz8kDxrSzY8dSmpo+sZws+gSy5346UGCM\nOWiM6QCeAi7v42uiRlPTJ+zc+VWM8Zzfm5IyhzlzXiQ2Vq9AVdEpPn488+atIT5+IgAuVxPbti2h\ntXWf5WTRJZDingUU+7VLvOuOdYWIbBOR50QkJyjpwtzRowfZtu0SXC7PXWkSErKZO3c1cXGjLSdT\nyq6kpDzmzXsdh2MMAJ2d1WzdeiHt7aWWk0WPYA2o/hPIM8bMA9YAPc4kJCLLRGSTiGyqrq4O0lvb\n0dFRzbZtS+jsrATA4RjNvHmvkZiYbTmZUuFhxIg5zJ37MjExSQC0tx9m69aL6Oyss5wsOjgC6FMK\n+O+JZ3vX+Rhjav2aDwM9nuRqjFkOLAfIz88ftpexuVwtbN/+JY4e9czNLpLAnDn/JCXlRMvJlLIj\n7+5Xen1ubtoPuPOUn+GIcdHaupNHXz6bX2/6bzpcnkOXhfdeOlQxo0oge+4bgekiMllE4oGrgFX+\nHUQk06+5FNgdvIjhxe12snPnN2hq+ti7Rpg9+0lGjz7Hai6lwtX2mnwe2X6Xrz19zB6+Pf8XxEqn\nxVSRr8/ibjwjhd8GXsdTtJ8xxuwUkXtEZKm32x0islNEtgJ3ADeEKrBNxhj27fsWdXVdeynTpz9A\nevpXLaZSKvytL/8if/e789i89M3cNPd/9TZ9IRTIYRmMMa8Crx6z7kd+yz8EfhjcaOGnsPDHVFR0\nXXWXm/sfZGXdZjGRUsPHm4eXMjKukcunPQXAmRPX0dI5EmO+pNeDhIBeoRoAl6uV/fvv4vDhn/nW\nTZhwA5Mn/7fFVEoNPy8UXMPbRRf72hdMepnDh++xmChyaXHvQ339OjZunNdtWoGxYy9mxozlureh\nVL8JT+y6hQ3l5/rWFBb+hB07vqKnSQaZFvdeOJ3N7Nt3O1u2LKKt7YBv/dixlzB79jPExMRZTKfU\n8GWI5aFt32VHzcm+dTU1L/Lxx7MpLf0zxuhx+GDQ4t6Duro32bhxDmVlf/StczhGc8IJf2Pu3Jdx\nOEZYTKfU8Oc0cfz+0/9kXXHXVNguVyP799/Kli0LaWmJ2BPuhowWdz9OZwN7997Mtm2LaW8/7Fs/\nbtxlnHbaTjIzb9BDMUoFSYcrkb/tvIOTTlpLUtJ03/qGhvfZtGk+hYX34HZ3WEw4vGlx96qtXc3G\njXMoL++a88zhGMusWSuYM+clEhImWkynVOQaM2YR+flbyc39D0Q8J/AZ00Fh4Y/ZtOlkGho+tJxw\neAroVMhI4381XbKjmX+Z9RDnZL3Vrc/GirN4YtetNL6civ9ZoHo1nVLBFxubxJQpPycj4xvs3Xuz\n7yLB1tZdfPrpOUyceBtTpvwPDscoy0mHj6gs7p+Zn76B6098kDGJXXNdNLan8sSuW9lYqVecKjXU\nRoyYxymnfEhp6QMcPPifuN0tgKGs7EFqal5kxow/kpa2tM/XUVFa3EfENXDNrOWcOXFdt/UflS9g\nxa5v0dSZaimZUkokluzsO0lL+zL79t1GXZ3nP+eOjlJ27LjceyOcP5CQMMFy0vAWdcW9quo5fn7O\n7aQm1PvWNbSP5rGdt/NJ1ZkWkyml/CUmTmLu3JepqnqKgoI76ez0zCRbXf0cR468yZQpvyYz80Y9\nyaEXUVHcjx4tpLr6aSorV9LSspXUhK7nPij9Ik/uWUZL50h7AZVSPRIRxo+/mrFjL+TAge9TUfEo\nAE5nPfv23Uxl5eNkZX2bceMuJTY2xW7YMCPG2Jl5Nz8/32zatClkr9/RUUlV1TNUVa2ksXH9554/\n0jaWR3d+h63Vp4Usg1Kqb/05SeHIkbfYu/db3S4sBIiJSWLcuEtJT/9axBd6EdlsjMnvq19E7bl3\ndh6hpuZ5KitXUl+/FnqYcU4kgbcPf5Fn991Aq1MvRlJqOBkz5nxOO20bhYX3UFz8G8AFgNt9lOrq\n56iufi6qCv3xDPvi7nK1UFPzT6qqVlJXtxpjepojOpaxYy8kI+Nq0tIu5/rV7w15TqVUcMTGJjN1\n6r1kZt5IVdWTVFU9Q2vrLt/zPRf6rzNu3CVRVeiH5WGZaT98kTlpn3DGxHWcnL6BBEf75/q4jbDv\nyIlsKF/AxoqzadYzYJQKS8G4dqSlZSdVVc9SXf1st0LvL1IKfaCHZYZVcW9p2U1x8W85UPwUKXEt\nPfY52DCdDeUL+Lj8XI60pwUjqlIqhIJ9YWCghX7s2IsZOfJUkpNnkpw8k6SkacTExAc1SyhE5DF3\np7OOiopHSDlmQsbSplw+Kl/AxxXnUtmaZSecUiospKScyOTJJzJ58k96LfRu91Fqap6npuZ5v6+M\nJSlpiq/Y+z/i4sYO/YYMUkB77iKyBLgfiAUeNsbce8zzCcDjwKlALfANY0zh8V5zIHvuxrj56KPJ\ntLcXUd06ng0V5/JR2UJKmvMAPddVqWjTn73+QPboexMXl+4t9Cd49/JPID5+PA7HGOLixuBwjEYk\ntr/xByRoe+7iSfwgsBgoATaKyCpjjP9350bgiDFmmohcBfwS+MbAoh8vSwzTp/+Bbzy0nwMNJ6AF\nXSkVqGP36Ovr36W1dS+trXtobd3TbSbYY3V2VtPQUE1DQ+8nY8TGjsThGONX8D1F//PrPOs9/xGM\nCcWmAoEdljkdKDDGHAQQkaeAywH/4n458BPv8nPAAyIiJgQH9NPSlnKg4ZW+OyqlVC9SUk7kxJ8V\nArl49lshPqaNCSllZKYUkzmihMwUz2NCSinxsX1PPexyNeFyNdHeXhRQhtmznyEj42sD34g+BFLc\ns4Biv3YJ8IXe+hhjnCLSAIwDaoIRUimlQq3DnUhR0xSKmqZ0Wy+4GZdUzYSUroI/PrmMlLhmUuKa\nSY5rJsnRSoz0b1/W4QjdXjsM8YCqiCwDlnmbzSKy95guaQz/Pwi6DeFBtyE8hHQb5JehemWfNKCm\nMCQvvXigXzgpkE6BFPdSIMevne1d11OfEvHMtp+KZ2C1G2PMcmB5b28kIpsCGSgIZ7oN4UG3ITwM\n920YzvkDuRPTRmC6iEwWkXjgKmDVMX1WAdd7l68E3g7F8XallFKB6XPP3XsM/dvA63hOhfyrMWan\niNwDbDLGrAIeAZ4QkQKgDs8fAKWUUpYEdMzdGPMq/vea86z7kd9yGxCMYd9eD9kMI7oN4UG3ITwM\n920YtvmtTT+glFIqdAI55q6UUmqYsVLcRWSJiOwVkQIRubuH578rIrtEZJuIvCUiAZ36M5QC2IZb\nRGS7iGwRkfdFZLaNnMfT1zb49btCRIyIhN1ZAwF8DjeISLX3c9giIjfZyNmbQD4DEfm69/dhp4g8\nOdQZ+xLAZ3Cf3/d/n4jU9/Q6NgWwDbkislZEPvXWpUts5OwXY8yQPvAMyh4ApgDxwFZg9jF9vggk\ne5dvBZ4e6pxB2IZRfstLgdds5+7vNnj7jQTeBT4C8m3nHsDncAPwgO2sg8g/HfgUGONtZ9jOPZCf\nI7/+38FzUob17P38HJYDt3qXZwOFtnP39bCx5+6bzsAY0wF8Np2BjzFmrTGm1dv8CM+59eEkkG1o\n9GumAOE2uNHnNnj9DM9cQW1DGS5AgW5DuAok/83Ag8aYIwDGmKohztiX/n4GVwMrhyRZ4ALZBgOM\n8i6nAmVDmG9AbBT3nqYzON48vTcCq0OaqP8C2gYRuV1EDgC/Au4YomyB6nMbROQUIMcYE66T+QT6\ns3SF91/p50Qkp4fnbQkk/wxghoh8ICIfeWdoDScB/z57D69OBt4eglz9Ecg2/AT4poiU4Dlz8DtD\nE23gwnpAVUS+CeQDv7adZSCMMQ8aY6YCPwD+n+08/SEiMcDvgO/ZzjJI/wTyjDHzgDXAY5bz9JcD\nz6GZRXj2eh8SkdFWEw3cVcBzxhiX7SADcDXwqDEmG7gEz3U9YV0/bYQLZDoDROQC4D+BpcaYz99H\nz66AtsHPU8CXQ5qo//rahpHAHOAdESkEzgBWhdmgap+fgzGm1u/n52E89xwIF4H8HJUAq4wxncaY\nQ8A+PMU+XPTnd+Eqwu+QDAS2DTcCzwAYY9YDiXjmnQlfFgYvHMBBPP+efTZ4ceIxfU7GM8Ax3fag\nxCC2Ybrf8mV4rua1nr0/23BM/3cIvwHVQD6HTL/lrwAf2c7dz/xLgMe8y2l4Dh+Ms529vz9HwEyg\nEO+1NeH0CPBzWA3c4F2eheeYe9htS7fMlr6Zl+DZAzkA/Kd33T149tIB3gQqgS3exyrb36gBbMP9\nwE5v/rXHK5zhug3H9A274h7g5/AL7+ew1fs5zLSduZ/5Bc/hsV3AduAq25kH8nOE55j1vbazDuJz\nmA184P052gJcaDtzXw+9QlUppSJQWA8IKKWUGhgt7kopFYG0uCulVATS4q6UUhFIi7tSSkUgLe5K\nKRWBtLgrpVQE0uKulFIR6P8DIetitFWm1F8AAAAASUVORK5CYII=\n", 106 | "text/plain": [ 107 | "
" 108 | ] 109 | }, 110 | "metadata": { 111 | "tags": [] 112 | } 113 | } 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "metadata": { 119 | "id": "n_sm4L7mGNqx", 120 | "colab_type": "code", 121 | "colab": { 122 | "base_uri": "https://localhost:8080/", 123 | "height": 320 124 | }, 125 | "outputId": "c70b7c0f-3af3-4211-adc2-a42ab59ffdee" 126 | }, 127 | "source": [ 128 | "import numpy as np\n", 129 | "import matplotlib.pyplot as plt\n", 130 | "\n", 131 | "class norm1:\n", 132 | " def __init__(self, a1, b1, c1):\n", 133 | " self.a1 = a1\n", 134 | " self.b1 = b1\n", 135 | " self.c1 = c1\n", 136 | " \n", 137 | " def dist_curve(self):\n", 138 | " plt.plot(self.c1, 1/(self.b1 * np.sqrt(2 * np.pi)) *\n", 139 | " np.exp( - (self.c1 - self.a1)**2 / (2 * self.b1**2) ), linewidth=2, color='y')\n", 140 | " plt.show()\n", 141 | "\n", 142 | "#Vary the mean and SD to generate different plots\n", 143 | "mean1 = 5 \n", 144 | "sd1 = 2\n", 145 | "\n", 146 | "c = np.random.normal(mean1, sd1, 3000)\n", 147 | " \n", 148 | "w1, x1, z1 = plt.hist(c, 100, normed=True) #hist\n", 149 | "\n", 150 | "hist1 = norm1(mean1, sd1, x1)\n", 151 | "plot1 = hist1.dist_curve()\n", 152 | "\n", 153 | "## https://medium.com/@balamurali_m/normal-distribution-with-python-793c7b425ef0" 154 | ], 155 | "execution_count": 2, 156 | "outputs": [ 157 | { 158 | "output_type": "stream", 159 | "text": [ 160 | "/usr/local/lib/python3.6/dist-packages/matplotlib/axes/_axes.py:6521: MatplotlibDeprecationWarning: \n", 161 | "The 'normed' kwarg was deprecated in Matplotlib 2.1 and will be removed in 3.1. Use 'density' instead.\n", 162 | " alternative=\"'density'\", removal=\"3.1\")\n" 163 | ], 164 | "name": "stderr" 165 | }, 166 | { 167 | "output_type": "display_data", 168 | "data": { 169 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl4VOX5xvHvM5ksJCGBQCAhC2GT\nVSqK4tJWERdc6lK1YkVFrfTXunS3WFutWqu21lrXSt2qdd8qCoob7lpBUdawhex7QvZMZjLz/v7I\ngDEsGWCSd5bnc125nJw5J3NH4J4z7znnPWKMQSmlVHRw2A6glFKq/2jpK6VUFNHSV0qpKKKlr5RS\nUURLXymlooiWvlJKRREtfaWUiiJa+kopFUW09JVSKoo4bQfoaejQoSYvL892DKWUCiuff/55rTEm\nvbf1Qq708/LyWLFihe0YSikVVkSkKJD1dHhHKaWiiJa+UkpFES19pZSKIlr6SikVRbT0lVIqimjp\nK6VUFNHSV0qpKKKlr5RSUURLXymlokjIXZGrVCjLW7B4x+PCW0+xmESpfaN7+kopFUW09JVSKopo\n6SulVBTR0ldKqSiipa+UUlFES18ppaKIlr5SSkURLX2llIoiWvpKKRVFtPSVUiqKaOkrpVQU0dJX\nSqkooqWvlFJRREtfKaWiiJa+UkpFES19pZSKIlr6SikVRbT0lVIqimjpK6VUFNHSV0qpKKKlr5RS\nUURLXymlooiWvlJKRRGn7QBKRYu8BYt3PC689RSLSVQ00z19pZSKIlr6SikVRbT0lVIqigRU+iIy\nW0Q2iMhmEVmwi+d/KSLrRGSViLwtIiO7PXeRiGzyf10UzPBKKaX2Tq+lLyIxwL3AScAk4DwRmdRj\ntZXAdGPMVOB54C/+bdOA64EZwGHA9SIyOHjxlVJK7Y1Azt45DNhsjCkAEJGngdOBddtXMMYs67b+\np8Bc/+MTgTeNMfX+bd8EZgNP7X90pfZMz5ZRameBlH4WUNLt+1K69tx351LgtT1sm7U3AZUKFW53\nFQcOXUFWcjEjkkvYsOFlEhJGkpCQx8CBM0hMHGs7olK9Cup5+iIyF5gOHL2X280H5gPk5uYGM5JS\n+625+QtKSm6nuvpZfjXdu2N5RcU31xs0aBZZWT9hyJDTcDhi+zmlUoEJpPTLgJxu32f7l32DiBwH\nXAscbYzp6LbtMT22fbfntsaYhcBCgOnTp5sAMinVp/IWLGbogErmTb6HKUO/9C+NIb9+CqXNIylv\nyeWmM6bgchXS3r6F+vrXaGh4m4aGtxkwYCwTJjxGauoRVn8HpXYlkNJfDowTkVF0lfgc4IfdVxCR\nacADwGxjTHW3p5YCf+528PYE4Jr9Tq1UHzs04wMunnw3ibFtxMQMJDNzPtnZVzHvj6t3rPNw1tfH\nCTyeBqqq/k1Z2T20t29m5cpvk5t7DXl51+FwxNn4FZTapV7P3jHGdAJX0FXg64FnjTFrReRGETnN\nv9pfgWTgORH5UkQW+betB26i641jOXDj9oO6SoUin8/Nhg0/5vKDbiMxto0VlUdw+OGFjB17OwkJ\nux96jI0dRHb2zzj00DUsLjgLnzEUF9/Mo4umMOUPT37joLJSNgU0pm+MWQIs6bHsum6Pj9vDtg8D\nD+9rQKX6i8/nZt26c6mt/S8ebyxP5f+Id0pO5tdz0gL+GQ5HPM9tvJivag5l/tQ7GD1oEwsOu4a/\nLr+ZRreerazs0wnXlKKr8P/+3NEcMvxTWj1J3L78JrY2HQCwT3vpG7dN4aZPb+fq6b8ne2Ax18z4\nLX9ZfjP1rvRgR1dqr+g0DCrq+Xwe1q2bs6Pw/7L85h2Fvz8aO9K49bNbKGoaTUZSOQsOu4aBsY1B\nSKzUvtPSV1Ehb8HiHV89FRRcTW3tSzsKv6gpeOfbN3tSue2zP7O1cSzDEiu5fNotxIgnaD9fqb2l\npa+iWmXlfygtvRORWP7++fVBLfzt2jqT+ccXf2CbK40JaWuYO+kBjNEzk5UdWvoqajU3r2TjxssA\nGDfubjY39JxSKngaOoZw1xe/x+ONZWbO61x63+W7/OSxp08kSgWDlr6KSh5PPWvWnInP5yIj41Iy\nM+f3+WtubTqAh9dcBcD5Ex8gL2VTn7+mUj1p6auotGnTFXR0FDFw4GGMG3cPItIvr/tJxUzeKPwe\nMQ4fl029g1hHR+8bKRVEWvoq6tTUvEh19VM4HIlMmvQkMTEJ/fr6z22cR3lLNlnJJZx1wGP9+tpK\naemrqJIc28jGjT8BYPTo2xgwYEy/Z/D44vnXql/i9Tk4YeQitm17t98zqOilpa+iygWT/onHU82g\nQceQlfVTazm2Nh3AKwXn4hBDfv48OjtbrGVR0UVLX0WNacM+ZUbmB7g6E/jRy3MZdc1rvW+0B/t7\nps0rW86lsHEMHR1FFBX9ab+yKBUoLX0VFWIdHfxwwr8AeH7ThdS2Z1hOBF7j5LF1XZ82SkvvoK1t\ng+VEKhpo6auoMDvvJdITqyhpzuOd4tC5dWJB43gyMi7FGA+bNl0F6EVbqm9p6auIl5ZQw6ljngPg\nifXz8ZkYy4m+afToW3A6B7Ft2xscMvwT23FUhNPSVxHv3PEPEx/Twf8qvkN+/VTbcXZywHWf8ciq\nOQCcN+FfxDlclhOpSKalryLaAYPXMCPzAzq88Tyz4WLbcXbrneKTKGoazdABNRw38lXbcVQE09JX\nEczwg/GPALCk4CzqXcMs59k9QwzP+t+UThn9HIlOPYVT9Q0tfRWxDkr/jLGDNtDUkcrrhWfajtOr\ntXXTWFv7LZJiWzll9HO246gIpaWvIpIx3h1THLxS8AM6vAMsJwrM8xsvAuD4ka/gcpVaTqMikZa+\nikjV1U+TM7CI2vZ0lhWfbDtOwLY2HcDyyqOIi3FTVHSD7TgqAmnpq4jj87nZuvU6AP67+Yd0mljL\nifbO8xsvxOtzUFHxsF6wpYJOS19FnMrKR3C5Cihvyebj8mNtx9lrVW1ZfFh2HOCjqOjPtuOoCOO0\nHUCpYPL5PBQV3QLAS5vP77cLsYJ9p6tXCn7A0TlvU1X1BHl511mZDVRFJt3TVxGlquoJOjqKSEyc\nwIrKI23H2We17RlkZFwAeHe8iSkVDFr6KmIY46W4uGs4JDf3dxhCa7qFvZWb+zvAQVXVv2lvL7Qd\nR0UILX0VMaqrn6O9fRMJCaMYNuw823H2W2LiOIYNOw9jOikpuc12HBUhtPRVRDDGR3HxzQDcv+IU\nRv9uqeVEwTFy5LWAUFHxkJ63r4JCS19FhNraRbS2rqGufSgflc2yHSdokpImkp5+DsZ4KC2903Yc\nFQG09FVEKCn5CwCvFX4/7M7L701u7tUAVFQsxONpsJxGhTstfRX2Ghs/oqnpE5zONN4vPcF2nKAb\nOPAQBg06Fq+3mYqKB2zHUWFOS1+FvZKS2wHIyvopbm+C5TR9IyfnNwCUlv4Dn6/DchoVzrT0VVhr\na9tIbe3LiMSTlXWF7Th9Ji3tRJKSDsTtrqCq6knbcVQY0ytyVVgrKbkDMGRkXEhc3PC93j7YV9L2\nFREhJ+c35OdfSEnJX8nIuAgR3WdTe0//1qiw5XZXU1n5KADZ2b+0G6YfDBs2h/j4HNra1lNf/5rt\nOCpMaemrsFVWdh/GdDBkyPdISppgO06fczhiycq6CkBP31T7LKDSF5HZIrJBRDaLyIJdPP9dEflC\nRDpF5Owez3lF5Ev/16JgBVfRzefroLz8fgBycn5lOU3/ycy8FIcjiW3b3qK1da3tOCoM9Vr6IhID\n3AucBEwCzhORST1WKwbmAbs6wtRujDnI/3XafuZVCui6SYrHU01S0rdITf2u7Tj9JjZ2MBkZXXfX\nKi39h+U0KhwFsqd/GLDZGFNgjHEDTwOnd1/BGFNojFkF+Pogo1LfYIzZUXjZ2T9DRCwn6l/Z2V1D\nPFVVj+N211pOo8JNIKWfBZR0+77UvyxQCSKyQkQ+FZEz9iqdUrvQ2PghLS0riY1Nj4iJ1fZWYuJ4\n0tJOwudzUVHxL9txVJjpjwO5I40x04EfAneKyE53gxCR+f43hhU1NTX9EEmFs+17+SNG/JiYmMi8\nGKs32dk/B6Cs7B58Po/lNCqcBFL6ZUBOt++z/csCYowp8/+3AHgXmLaLdRYaY6YbY6anp6cH+qNV\nFHK5iqitfQkRJyNG/MR2HGsGDz6exMSJuN3l1NQ8bzuOCiOBlP5yYJyIjBKROGAOENBZOCIyWETi\n/Y+HAkcB6/Y1rFJlZfcBPtLTzyE+foTtOFbkLVjMqGuWcP/ymQCUld1rOZEKJ72WvjGmE7gCWAqs\nB541xqwVkRtF5DQAETlUREqBc4AHRGT7uWQTgRUi8hWwDLjVGKOlr/aJ1+uiouIhgB3nq0ezj8tn\nEhOTQlPTRzQ3f2k7jgoTAU3DYIxZAizpsey6bo+X0zXs03O7j4ED9zOjUgDU1DxDZ2cdycmHkJIy\nw3Yc6zq8A8jImEdZ2V2Ul9/L+PF6UFf1Tq/IVWGjrOweALKyLo+60zR3Jyvrp0DXDeE9nm2W06hw\noBOuqbDQ1PQZzc0rcDrTGDZszo7l4TJh2v7a3e856cbN/Hr6NKYMXUll5SPk5ET+HERq/+ievgoL\n2w9WZmZeQkzMAMtpQsvbxacA2+ci0usj1Z7pnr4KeW53LaXlTxHjEOb8ZwLLr7edqH8E+inmy+pD\nqW1PZyhbqK9/nSFDTt7jzyq89ZSgZVThR/f0VcirrHyI2BgPq2sOoaY9w3ackGOIYVlxV9Fvn4RO\nqd3R0lchzRgf5eX/BL4exlA7e7/seERiqatbjMtVZDuOCmFa+iqk1dcvxeUqpKZtOKtrD7YdJ2Q1\nuweRnn4OYCgvX2g7jgphWvoq6PIWLN7xtb+2D1e8WzIbQ8x+/7xItn1aioqKB/H53JbTqFClpa9C\nlstVTF3dYkRieb/seNtxQl5q6lEkJU3B46mmtvYl23FUiNLSVyGra5jCR3r6WTS7B9mOE/JEZMfe\nflmZHtBVu6alr0KSz+emouJBgKieTXNvDR8+F4cjicbG92ht1Wmu1M70PH0Vkmpr/4vHU0Vi4mRS\nU79D96mfouUq3H3hdKYwfPhcKioeoLz8n4wbd5ftSCrE6J6+CknbT9McMeLHOs/OXsrK6vpkVFn5\nGF5vm+U0KtTonr6yrufVom1tG2hoWIbDkUhGxoUWk4Wn5ORvkZJyOE1Nn1Jd/QyZmRfbjqRCiO7p\nq5Cz/TzzYcPOw+lMtZwmPI0Y8X/A15+YlNpOS1+FFK+3ncrKR4Gvi0vtve/cNZBWTxLNzZ/x3Zt1\nXF99TUtfhZSamufp7KwnOflgUlKm244Ttjy+eD4qmwXAzJzXLKdRoUTH9FVIKS9/ANC9/H3R86ym\nZSUncULeIg4f8R7PbLgElzfRUjIVSnRPX4WMrORCmpo+IiZmIMOGnWc7TtiraM0hv34KA5ztHDHi\nXdtxVIjQ0lchY2bO60DXBUZOZ7LlNJFhWfFJwPYhHmM3jAoJWvoqJMTFuDhyxDtA17n5Kjg+rzqS\nJncKuSlbGZ260XYcFQK09FVIODzjfRJj20hJOYLk5G/ZjhMxOk0sH5R2TVanB3QVaOmrEHFMblch\n6QHc4Hu3ZDYAMzLfJ9HZYjmNsk3P3lH9Znf3aR2ZspnRqZtocSfznbuS8fgW77SO2nc17ZmsqZ3G\nlKErOTLrHeBc25GURbqnr6zbPuzwUfksPL54y2ki07KSrw/oGqMHdKOZlr6yqrOzkcMz3wO+HoZQ\nwfdl9WE0uAaTlVxCY+MHtuMoi7T0lVVVVU+Q4HSxvu5AKlpzbMeJWF7j5P3SEwCdjyfaaekra4wx\nOwpI9/L73rulJ+IzDmpqnsftrrYdR1mipa+saWr6mNbW1TR2DGJF1ZG240S8etcwvqqZjjEeKisf\nsR1HWaKlr6zZfh/X90uPx2tiLaeJDtuv0C0vfwBjfOQtWLzjS0UHLX1lhdtdS03Nc4DwXqkO7fSX\n1bUHk5CQh8u1lfr6N2zHURZo6SsrKisfwRg3aWknUds+3HacqGGIITOza5qL8vL7LadRNmjpq34n\n+LpNofwTy2miT2bmJYjEUlf3KmkJekA32mjpq343eehKXK4txMfnMmTISbbjRJ24uGGkp58F+Dgm\ne6ntOKqf6TQMqt8dm7N9np35iMTsdj09uNh3Roz4CdXVT3N0zlJe3jJHD6RHkYD29EVktohsEJHN\nIrJgF89/V0S+EJFOETm7x3MXicgm/9dFwQquwlNaQjUHDfsMkVgyMi61HSdqpaZ+h8TEyaTGN3DI\n8E9sx1H9qNfSl65dsXuBk4BJwHkiMqnHasXAPODJHtumAdcDM4DDgOtFZPD+x1bhambO6zjER3r6\nWcTHZ9iOE7VEhKysywE4NneJ5TSqPwWyp38YsNkYU2CMcQNPA6d3X8EYU2iMWQX4emx7IvCmMabe\nGLMNeBPQ8/OilFM8HO0fQx4x4nLLadTw4XNp7xzAhLQ1ZCcX2o6j+kkgY/pZQEm370vp2nMPxK62\nzeq5kojMB+YD5ObmBvijVbiZnvERKfGNlDTncXTqUbbjRK3ux0rmTjyW40YuZmbuEkDfiKNBSJy9\nY4xZaIyZboyZnp6ebjuO6iOzcrvK5u3iUxARy2kUwDslJwNw5IhldHY2WU6j+kMgpV8GdJ/+MNu/\nLBD7s62KIM3NXzJu8HraPIl8Un6M7TjKr7xlJPn1UxjgbKey8jHbcVQ/CKT0lwPjRGSUiMQBc4BF\nAf78pcAJIjLYfwD3BP8yFWXKy+8F4MOyWXR4B1hOo7p7u+hUoOvPSG+wEvl6LX1jTCdwBV1lvR54\n1hizVkRuFJHTAETkUBEpBc4BHhCRtf5t64Gb6HrjWA7c6F+moojHU09V1RMALPMPJ6jQ8UX14dS7\nhtDWls+2bW/ZjqP6WEAXZxljlgBLeiy7rtvj5XQN3exq24eBh/cjowpzFRUP4fO1s7p2mt4oJQR5\njZNlxSdz1gGPU1Z2N2lpx9uOpPpQSBzIVZHLGC9lZV1DO28VnWY5jdqdd0tPRCSOurpXaW8vsB1H\n9SEtfdWnamtfoaOjiISEMayqOcR2HLUbze5BDBs2BzA73qRVZNLSV/sskBtwlJXdBUB29pUY/esW\n0n7y0jQANmx9AK+31XIa1Vf0X6HqM1nJhTQ0LCMmJpmMjHm246heFDaNY9O2CSTFtlJZ+bjtOKqP\naOmrPnPcyFcBGD78IpzOVMtpVCDeKv4e0PUJTU/fjExa+qpPJMc2cuSIZQBkZV1hOY0K1IrKo/yn\nb66nvl4vqYlEWvqqT8zMeZ34mA7S0k4mKWmC7TgqQF7j3HGxVmnp3y2nUX1BS18FnVM8zMrtGtrJ\nzv6F5TRqb71bOhuHI5Ft296gtXWt7TgqyLT0VdDNyHyfQQnbKGnOY/DgWbbjqL3U6hlIRkbX/Y5K\nS++0nEYFm5a+CipjDCfm/ReApYVn6GyaYSo7++cAVFY+jttdYzmNCiYtfRVUDQ3LyE3ZSmPHID4t\nP9p2HLWPEhMPYMiQUzGmg/Lyf9qOo4JIS18FVUnJHUDXnPmderPtsLb9eExZ2T14ve2W06hg0dJX\nQdPSsob6+sW4vXEsK9bZNMPdoEEzSU4+BI+nmqoqnWs/Umjpq6ApKfkrAB+UHUezRy/GCnciQm7u\n1QCUlNyOMV7LiVQwaOmroHC5iqmufhJw8PrW79uOo4Jk6NDvk5Awmvb2zdTUvGQ7jgoCLX0VFKWl\nf8eYTtLTz6GmPcN2HBUkDoeTnJxfAVBS8hedmiECaOmr/ZYU20x5+b8AyM39reU0Kthm3jucJncK\nzc3LaWh4z3YctZ+09NV+OzZnMT5fK4MHn8DAgdNsx1FB5vYl8FZR10RsxcW3Wk6j9ldAt0tUarue\nc+fHOVwcP/IVABYsPZr1Ty3e4/oqPPT8c3u7+BROHvUC27YtpalpBSkp0y0lU/tL9/TVfjkm53VS\n4hspaBjH+vqptuOoPtLqSeEd/2m4xcU3W06j9oeWvtpnsQ43J416EYBFW+YAOuVCJFtaeCYORwK1\ntf+lpWW17ThqH2npq3323ew3GJxQT1HTaL6sOcx2HNXHGt2Dycy8DIDi4j9bTqP2lZa+2icx4uHk\nUS8AupcfTXJyfoNILNXVz9DWtsF2HLUPtPTVPvl21tsMGVBDSfNIvqg63HYc1U8SEnL89zs2FBXd\nYjuO2gda+mqvxUgnp4x+DoBXt/wAo3+NokbegsXMfXoGXp+Dqqr/0Na20XYktZf0X6vaa9/JfpNh\niVVUtGTzWeW3bcdR/aymPYMPy44DvBQW3mA7jtpLep5+lOt+Pnbhraf0un6sw81pY54G4MXN52OI\n2e/XVeHn5S1zODLrHSqrnuKyF4/ko99fbjuSCpDu6au9MjNnCWkJdRQ3jWJF5VG24yhL6l3DeK/k\nRBxiOGPsk7bjqL2gpa8C1tnZwqn+sfwXNl2gY/lR7tWCH+D2xnFoxsc0N39hO44KkP6rVQErK7uL\nlPhGtjSM56uaQ23HUZY1dAzZcZXu1q1/sJxGBUpLXwXE46nfcZOU5zdegJ6XrwAWbz2b9s4B1Ncv\nYdu2d23HUQHQ0lcBKSr6E52dDayrm8r6+oNsx1Ehotk9iNf8N80pKPgNxvgsJ1K90dJXvWpvL6Cs\n7B5AeDr/UttxVIh5vfBM4uIyaW5eQXX1s7bjqF5o6ateFRRcgzEehg+/gOLmMbbjqBDj9iaQl3cj\nAFu3XoPP12E5kdqTgEpfRGaLyAYR2SwiC3bxfLyIPON//n8ikudfnici7SLypf/rn8GNr/paY+Mn\n1NQ8i8ORwKhRf7IdR4WojIx5JCZOxuUqpKzsPttx1B70enGWiMQA9wLHA6XAchFZZIxZ1221S4Ft\nxpixIjIHuA041//cFmOMDgKHIWMMW7b8GoCXN53GhUtWWU6kQpXD4WTMmNtYvfpUVuX/gZMWDqfF\nk/qNdQK5+E/1vUD29A8DNhtjCowxbuBp4PQe65wO/Nv/+Hlglojo6R1hrrr6KZqaPqapI5XFBWfb\njqNCXFrayQwefDxJsa2cdcDjtuOo3Qik9LOAkm7fl/qX7XIdY0wn0AgM8T83SkRWish7IvKd/cyr\n+klnZ/OOvfznNl6Ey5toOZEKdSLC2LF30emL4ejspYxM2Ww7ktqFvj6QWwHkGmOmAb8EnhSRlJ4r\nich8EVkhIitqamr6OJIKRFHRjbjdFQwcOMM/uZZSvUtKmsCbRafhEMPcif9E0FM4Q00gE66VATnd\nvs/2L9vVOqUi4gRSgTpjjAE6AIwxn4vIFuAAYEX3jY0xC4GFANOnTzf78HuoINg+CdqIpGJu+e6d\ngDBu3D0YquwGUyGv+wR6CTHncUTmu4wbnM8RI5bxcfksi8lUT4Hs6S8HxonIKBGJA+YAi3qsswi4\nyP/4bOAdY4wRkXT/gWBEZDQwDigITnTVNwznT3wAYzrJzJxPSsp024FUmHF5E3l24zwAzh3/CInO\nFruB1Df0Wvr+MforgKXAeuBZY8xaEblRRE7zr/YQMERENtM1jLP9tM7vAqtE5Eu6DvD+nzGmPti/\nhAqeIzLfZfLQr3A60xg9+mbbcVSY+qR8JhvqJ5Ma38C54x8Guj4NbP9S9gQ0n74xZgmwpMey67o9\ndgHn7GK7F4AX9jOj6icD4xo4f+JCAMaMuZ3Y2CG9bKHUrhkcPLr2Cm486kqOznmDTyqOIb9+qu1Y\nCr0iV3Vz/sSFJMc1s6Z2mv8+qErtu4rWHF7Z0nW5zrzJdxPr0Ct1Q4GWvgLgoPT/cXjm+3R0xvPo\n2svRyyxUMCwuOJvS5lwykio4fexTtuMo9HaJUannmGqis4ULJ3ddOv/Cpgupbc/QcVcVFF4TyyNr\nr+TaGVdzUt6LfF51BFsbx9uOFdV0T19xwaT7SUuoY0vDeN4sOtV2HBVhtjRMZGnhGcQ4fPx46t+I\ni3HZjhTVtPSj3OGZ73LEiPfo6Ixn4apf7vONzpXakxc3XUBJ80gykso594BHbMeJalr6UWxIQjUX\nTrofgCfzL6OqrefsGkoFh8cXx8JVv6LT52TWyMXU1b1mO1LU0tKPUoKXy6beQWJsK19UHc57pSfa\njqQiXEnzaF7YNBeADRsuwe3WK71t0NKPUqePfZoJaWto7BjEI2uuRO95q/rD61vPJL9+Cm53JevW\n/RBjvLYjRR09eycKHTh0BaeNeRqfER5Y9Wuae8x7rlRfMcRw/1dXc98Jv6Gh4R0KC//IzAcO3+W6\nOv9+39A9/SjT3l7Ij6f+DYcYXto0l3V1en8b1b8aO9KYNOkpwEFR0Z84cOiKXrdRwaOlH0W8Xhdr\n155NclwzX1YfyqsFO82coVS/GDx4JqNG3QTAj6f+jfQBFZYTRQ8t/ShhjGHDhktoafmcmrbhLFz1\nK4z+8SuLcnMXMGTIqSTHNfOLQ27Q2Tj7if6rjxKFhddTXf0UMTHJ3LXyWto6k21HUlFOxMHEiU9S\n0pzHiORSfnrQbcRIp+1YEU9LPwpUVj5GUdFNgINJk56hpHm07UhKAeB0DuTOz6+jsWMQU4au5PyJ\nDwB6H6W+pKUf4err32LDhh8BMG7cXQwZcrLlREp9U51rGHetvBaPN5Zjc1/jtDFP77SOzsUfPFr6\nEayx8SPWrDkdYzxkZ/+crKzLbUdSape2NEzkgVW/xmccfH/cExw3sufN+VSwaOlHqObmz1m16mR8\nvjYyMuYxZszfbEdSao9WVB3FI2uuAGDuxIVUVv57l+vpXv/+0dKPQC0tX/HVVyfi9TaRnv4Dxo9/\nEBH9o1ah74OyE3hyfddwZH7+JVRWPmY5UeTRJogws269nfc+OYrOzjrS0k5h4sTH8d+bXqmw8EbR\nGby46XzAR37+Rcy79wrbkSKKln4Eqa9/i99M/wNJsa2sqDyCKVNewOGIsx1Lqb22aMt5PJN/MQDz\nJt/LiXkvWU4UOXTunQhRVfU0+fkXkeB082HZsTy85mf8ek78jud1/FOFm9cKz8Lti+eCSf/kvAkP\nMTihlmfyL9F7Puwn3dMPc8b42Lr1etavPw9j3LxZ9D0eWv1zfEb/Yajw93bxqSxc9Qs6fU5m573M\nVQffTHxMu+1YYU339MOY19vVpTROAAALTUlEQVRKfv7F1NQ8BzgYO/YO5r0+Bp0mWYW6vfnk+XH5\nLOrah3HltJuZNuwzrp1xNXevvJaa9ow+TBi5dE8/TLW0rObzzw+lpuY5YmJSOPDAV8nO/hla+CoS\nbdh2IDd9+jcqW0eQm7KVG478GYcM/9h2rLCke/phxhjDJfddwXkTHiQuxk1ZSw73rryG8sU+QMft\nVeSqasvixk/u4NID7+SQ4Z9y5bQ/87tHV/Pcxovx+OJ0/v0A6Z5+GHG5Sli9+lQumnwfcTFu3is5\ngRs++Tvlrbm2oynVL9o6k7l75bU8sf4yOn1OTsh7hRuPvIqxg9bbjhY2xJjQmtxo+vTpZsUKvakC\nfD3uKfj48MoSCgp+i9fbQpsnicfW/YRPK46xG1Api0albOSyqXcwIrkUnxFyc35OXt4NOJ0DbUez\nQkQ+N8ZM72093dMPcQcMXsP1R/yCTZsux+ttYejQM/ndh/dp4auot7XpAK7/+C5eLTgbY4TS0r/z\n2WcHUFHxKMb4bMcLWVr6Iaq1NZ8rDvozv5uxgLzULcTFjWDy5OeZMuVFGjqG2I6nVEjw+OJ4fuM8\nbvz0DgYOnIHbXcmGDRfzxRczqK9/k1AbyQgFWvohpqVlNWvXnsvy5ZOYnvExHZ3xvLjpfGbM2Eh6\n+lm24ykVkoqaxnLwwR8zYcLjxMWNoLl5BatWncCD/51Eff1SLf9u9OydEGCMj/r61ykru4f6+tcA\nEInlnaJZvLzlPBo6hnBXTJLllEqFNhEHGRlzGTr0DK566EpOGvUi4wbns2rVbJKSDiQr6yqGDz+f\nmJgBtqNapQdyLXK5iqiqepKKigdxuQoAEIlnxIj55ORczYQ/fmU5oVLhKz6mnWNzFzN3ymt4PFUA\nOJ1pDB9+PhkZF5GcfDAiX1/X0v2CsXA8/TPQA7m6p9/PXK4S6uoWUV39LI2N7+9YHh8/kqysn5KR\ncQlxcUP9S7X0ldpXHd4BvLb1bO697AGqq5+lrOwfNDevoKzsbsrK7iYxcRLp6eeQnn4mSUlTbcft\nN1r6+2F3ewbdl8c63IwdtJ6JQ1YxdegK8lK37HjO4UhgyJDTyci4gLS02ToFslJ9wOGIIyNjLsOH\nn09Ly0oqK/9NdfWTtLWto6joBoqKbiAhYRQXTJrIurqDWF8X2W8AWvpBZIyho6OUg4d9zJhBGxgz\naAOjUzcSF+PesY6rM4E1tQcz95j5DB16Jk5nisXESkUPEWHgwIMZOPBgxoy5nW3b3qK29iVqa1/G\n5drKrNytzMpdgs84WL78FlJTjyAl5QgGDpxOYuL4iNkpC2hMX0RmA/8AYoAHjTG39ng+HngMOASo\nA841xhT6n7sGuBTwAlcZY5bu6bXCYUzf5/PQ0VHK2ff8h8ykMjKSSjn7W620tHxFZ+e2ndYvbhrF\nurpvsbbuIPLrp+50ybhOe6yUHYW3noIxXpqalvP7Z+5l8tAvGTsoH6ej8xvrdXjjKW0eyVETjiIx\ncSKJiRMYMGAMCQmjQubAcNDG9KXr7e1e4HigFFguIouMMeu6rXYpsM0YM1ZE5gC3AeeKyCRgDjAZ\nGAG8JSIHGGO8e/8r9S1jfHR2NtLZuQ2Ppx6PpxaPpxqPp4aOjnLc7nI6OspwuYrp6CgFvPzm0K+3\nb2jo+q/TmcZXlTlsaZzAlobxbGkYT4sn1crvpJTqnUgMqamH80pBHa8UzCHW0cHyq9NpavqEpqZP\nyC/5mKEDqhkzaCOVlRt32j4uLoP4+Fzi47OJj88iLi6TuLhhxMYOIzY2ndjYNJzONJzOQTgc9gdX\net3TF5EjgD8aY070f38NgDHmlm7rLPWv84mIOIFKIB1Y0H3d7uvt7vX2dU+/rW0jdXVL+POSr3CK\nF6fDwxUzc/H5OvD5XPh87fh87Xi9bXy4qYSEmHbiYzpIcLaRGNvKAGfgc3T7jNDQkUZ1WyaVrVlU\ntmZR3pJDcfMo/4VTOtOlUpEk0dlCbkoBD50/iLa2fNra8nG5CnC5CjGms/cf4OdwJOJ0plJUH4PL\nm0CHN4GOzgTcvjg83njOmj6OsWPv3KdPD8E8eycLKOn2fSkwY3frGGM6RaQRGOJf/mmPbbMCeM29\n1tLyJVu2/IJzx3+9rKRk1+tO2s0FrTExKf535cHExg4hNnY4cXHDiIsbTlxcFvHxWcy+exN17cPo\nNLHB/yWUUiGprTOZ/PqpZGV981ROY7x0dJRx6p3PMji+jsEJtfz2hCF4PFW43VV4PHV0dtb7/9uI\nz9eG291GZvKuX6eiYhnjxt3Vp7+L/c8agIjMB+b7v20RkQ1BfomhQG3vqzX5vwqD/PJ7JcCsIUGz\n9o1wyRouOSFIWeW23tf51/6+CAn7mnVkICsFUvplQE6377P9y3a1Tql/eCeVrgO6gWyLMWYhsDCQ\nwPtCRFYE8rEnFGjWvqFZgy9ccoJm7S6QuXeWA+NEZJSIxNF1YHZRj3UWARf5H58NvGO6DhYsAuaI\nSLyIjALGAZ8FJ7pSSqm91euevn+M/gpgKV2nbD5sjFkrIjcCK4wxi4CHgMdFZDNQT9cbA/71ngXW\nAZ3A5aF45o5SSkWLgMb0jTFLgCU9ll3X7bELOGc3294M3LwfGYOhz4aO+oBm7RuaNfjCJSdo1h1C\nbsI1pZRSfUfn01dKqSgSNaUvIn8VkXwRWSUiL4nIINuZehKR2SKyQUQ2i8gC23l2RURyRGSZiKwT\nkbUi8jPbmXojIjEislJEXrWdZU9EZJCIPO//e7ref2FkSBKRX/j//NeIyFMikmA703Yi8rCIVIvI\nmm7L0kTkTRHZ5P/vYJsZt9tN1j7tqqgpfeBNYIoxZiqwEbjGcp5v6DbdxUnAJOA8/zQWoaYT+JUx\nZhJwOHB5iObs7mfAetshAvAP4HVjzATgW4RoZhHJAq4CphtjptB1gsccu6m+4VFgdo9lC4C3jTHj\ngLf934eCR9k5a592VdSUvjHmDfP19dKf0nXNQCg5DNhsjCkwxriBp4HTLWfaiTGmwhjzhf9xM13F\n1CdXWQeDiGQDpwAP2s6yJyKSCnyXrjPhMMa4jTENdlPtkRMY4L8uJxEot5xnB2PM+3SdRdjd6cC/\n/Y//DZzRr6F2Y1dZ+7qroqb0e7gEeM12iB52Nd1FyJYpgIjkAdOA/9lNskd3AlcDPttBejEKqAEe\n8Q9FPSgiIXmPTGNMGXA7UAxUAI3GmDfspurVcGNMhf9xJTDcZpi9EPSuiqjSF5G3/GOMPb9O77bO\ntXQNUTxhL2n4E5Fk4AXg58aYJtt5dkVETgWqjTGf284SACdwMHC/MWYa0EroDEF8g388/HS63qhG\nAEkiMtduqsD5LxwN+dMW+6qrQmLunWAxxhy3p+dFZB5wKjDLhN65qgFNWREKRCSWrsJ/whjzou08\ne3AUcJqInAwkACki8h9jTCgWVClQaozZ/qnpeUK09IHjgK3GmBoAEXkROBL4j9VUe1YlIpnGmAoR\nyQSqbQfak77sqoja098T/41grgZOM8a02c6zC4FMd2GddN1J+iFgvTHmDtt59sQYc40xJtsYk0fX\n/893QrTwMcZUAiUisn2e2Fl0XckeioqBw0Uk0f/3YRYhetC5m+5TxVwEvGwxyx71dVdFzcVZ/iki\n4umaCA7gU2PM/1mMtBP/HumdfD3dhe0rmXciIt8GPgBW8/U4+e/8V22HLBE5Bvi1MeZU21l2R0QO\nouuAcxxQAFxsjNn5VmwhQERuAM6la/hhJfAjY0yH3VRdROQp4Bi6ZtasAq4H/gs8C+QCRcAPjDE9\nD/b2u91kvYY+7KqoKX2llFJRNLyjlFJKS18ppaKKlr5SSkURLX2llIoiWvpKKRVFtPSVUiqKaOkr\npVQU0dJXSqko8v8iZOmLGB6q6QAAAABJRU5ErkJggg==\n", 170 | "text/plain": [ 171 | "
" 172 | ] 173 | }, 174 | "metadata": { 175 | "tags": [] 176 | } 177 | } 178 | ] 179 | } 180 | ] 181 | } -------------------------------------------------------------------------------- /Probability_and_It's_Uses/Probability.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Probability.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "code", 18 | "metadata": { 19 | "id": "tFQ4bdA53nVF", 20 | "colab_type": "code", 21 | "colab": {} 22 | }, 23 | "source": [ 24 | "%matplotlib inline" 25 | ], 26 | "execution_count": 0, 27 | "outputs": [] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "id": "jsmjmvb8Dlu9", 33 | "colab_type": "text" 34 | }, 35 | "source": [ 36 | "# Example 1:\n", 37 | "\n", 38 | "---\n", 39 | "\n" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "metadata": { 45 | "id": "z7E8ErzD3nVQ", 46 | "colab_type": "code", 47 | "colab": { 48 | "base_uri": "https://localhost:8080/", 49 | "height": 34 50 | }, 51 | "outputId": "87112d28-3763-4197-a925-6664a9d86dc0" 52 | }, 53 | "source": [ 54 | "# Sample Space\n", 55 | "cards = 52\n", 56 | "\n", 57 | "# Outcomes\n", 58 | "aces = 4\n", 59 | "\n", 60 | "\n", 61 | "# Divide possible outcomes by the sample set\n", 62 | "ace_probability = aces / cards\n", 63 | "\n", 64 | "# Print probability rounded to two decimal places\n", 65 | "print(\"Probability of getting aces:\", round(ace_probability, 2))" 66 | ], 67 | "execution_count": 4, 68 | "outputs": [ 69 | { 70 | "output_type": "stream", 71 | "text": [ 72 | "Probability of getting aces: 0.08\n" 73 | ], 74 | "name": "stdout" 75 | } 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": { 81 | "id": "2BZQV1rQDqIK", 82 | "colab_type": "text" 83 | }, 84 | "source": [ 85 | "# Example2:\n", 86 | "\n", 87 | "---\n", 88 | "\n" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "metadata": { 94 | "id": "V2Z-QhdeDvu2", 95 | "colab_type": "code", 96 | "colab": { 97 | "base_uri": "https://localhost:8080/", 98 | "height": 34 99 | }, 100 | "outputId": "33a30ded-8c60-4f30-ef41-08e8c3392078" 101 | }, 102 | "source": [ 103 | "# Sample Space\n", 104 | "cards = 52\n", 105 | "\n", 106 | "# Determine the probability of drawing a heart\n", 107 | "hearts = 13\n", 108 | "hearts_probability = hearts/cards\n", 109 | "\n", 110 | "print(\"Probability of getting hearts:\", round(hearts_probability,2))" 111 | ], 112 | "execution_count": 6, 113 | "outputs": [ 114 | { 115 | "output_type": "stream", 116 | "text": [ 117 | "Probability of getting hearts: 0.25\n" 118 | ], 119 | "name": "stdout" 120 | } 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": { 126 | "id": "Qjcd3tP4D50s", 127 | "colab_type": "text" 128 | }, 129 | "source": [ 130 | "# Example3:\n", 131 | "\n", 132 | "---\n", 133 | "\n" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "metadata": { 139 | "id": "T28jnb3VEMC_", 140 | "colab_type": "code", 141 | "colab": { 142 | "base_uri": "https://localhost:8080/", 143 | "height": 68 144 | }, 145 | "outputId": "5dd53c8c-7a08-4ee3-a999-4f57e66cc650" 146 | }, 147 | "source": [ 148 | "# Create function that returns probability percent rounded to one decimal place\n", 149 | "def event_probability(event_outcomes, sample_space):\n", 150 | " probability = (event_outcomes / sample_space) * 100\n", 151 | " return round(probability, 1)\n", 152 | "\n", 153 | "# Sample Space\n", 154 | "cards = 52\n", 155 | "\n", 156 | "# Determine the probability of drawing a heart\n", 157 | "hearts = 13\n", 158 | "heart_probability = event_probability(hearts, cards)\n", 159 | "\n", 160 | "# Determine the probability of drawing a face card\n", 161 | "face_cards = 12\n", 162 | "face_card_probability = event_probability(face_cards, cards)\n", 163 | "\n", 164 | "# Determine the probability of drawing the queen of hearts\n", 165 | "ace_of_hearts = 1\n", 166 | "ace_of_hearts_probability = event_probability(ace_of_hearts, cards)\n", 167 | "\n", 168 | "# Print each probability\n", 169 | "print(str(heart_probability) + '%')\n", 170 | "print(str(face_card_probability) + '%')\n", 171 | "print(str(ace_of_hearts_probability) + '%')" 172 | ], 173 | "execution_count": 7, 174 | "outputs": [ 175 | { 176 | "output_type": "stream", 177 | "text": [ 178 | "25.0%\n", 179 | "23.1%\n", 180 | "1.9%\n" 181 | ], 182 | "name": "stdout" 183 | } 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": { 189 | "id": "G6Yh-scfEXBn", 190 | "colab_type": "text" 191 | }, 192 | "source": [ 193 | "# Example4: \n", 194 | "\n", 195 | "---\n", 196 | "\n" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "metadata": { 202 | "id": "alpEsvVlEfdI", 203 | "colab_type": "code", 204 | "colab": { 205 | "base_uri": "https://localhost:8080/", 206 | "height": 34 207 | }, 208 | "outputId": "66f66c7c-e237-47fe-ac4f-dd5c503a2f12" 209 | }, 210 | "source": [ 211 | "# Sample Space\n", 212 | "cards = 52\n", 213 | "hole_cards = 2\n", 214 | "turn_community_cards = 4\n", 215 | "cards = cards - (hole_cards + turn_community_cards)\n", 216 | "\n", 217 | "# Outcomes\n", 218 | "diamonds = 13\n", 219 | "diamonds_drawn = 4\n", 220 | "# In poker, cards that complete a draw are known as \"outs\"\n", 221 | "outs = diamonds - diamonds_drawn\n", 222 | "\n", 223 | "#Determine river flush probability\n", 224 | "river_flush_probability = event_probability(outs, cards)\n", 225 | "print(river_flush_probability)" 226 | ], 227 | "execution_count": 8, 228 | "outputs": [ 229 | { 230 | "output_type": "stream", 231 | "text": [ 232 | "19.6\n" 233 | ], 234 | "name": "stdout" 235 | } 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "metadata": { 241 | "id": "9ieQ1lRWE_Qe", 242 | "colab_type": "code", 243 | "colab": {} 244 | }, 245 | "source": [ 246 | "import pandas as pd" 247 | ], 248 | "execution_count": 0, 249 | "outputs": [] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "metadata": { 254 | "id": "q4s2h12j3nVo", 255 | "colab_type": "code", 256 | "colab": { 257 | "base_uri": "https://localhost:8080/", 258 | "height": 136 259 | }, 260 | "outputId": "1440c065-aeb8-4fbe-d1a0-1cfb49c0d875" 261 | }, 262 | "source": [ 263 | "#Conditional Probability\n", 264 | "df = pd.DataFrame({'company': ['ford', 'chevy', 'ford', 'ford', 'ford', 'toyota'],\n", 265 | " 'model': ['mustang', 'camaro', 'fiesta', 'focus', 'taurus', 'camry'],\n", 266 | " 'rating': ['A', 'B', 'C', 'A', 'B', 'B'],\n", 267 | " 'type': ['coupe', 'coupe', 'sedan', 'sedan', 'sedan', 'sedan']})\n", 268 | "print(df)" 269 | ], 270 | "execution_count": 11, 271 | "outputs": [ 272 | { 273 | "output_type": "stream", 274 | "text": [ 275 | " company model rating type\n", 276 | "0 ford mustang A coupe\n", 277 | "1 chevy camaro B coupe\n", 278 | "2 ford fiesta C sedan\n", 279 | "3 ford focus A sedan\n", 280 | "4 ford taurus B sedan\n", 281 | "5 toyota camry B sedan\n" 282 | ], 283 | "name": "stdout" 284 | } 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "metadata": { 290 | "id": "JWjOZ4S73nVy", 291 | "colab_type": "code", 292 | "colab": { 293 | "base_uri": "https://localhost:8080/", 294 | "height": 102 295 | }, 296 | "outputId": "1af1ee6b-18e4-4bd5-dd00-07bb5e7c7878" 297 | }, 298 | "source": [ 299 | "df.groupby('rating').count()['model'] / len(df)" 300 | ], 301 | "execution_count": 12, 302 | "outputs": [ 303 | { 304 | "output_type": "execute_result", 305 | "data": { 306 | "text/plain": [ 307 | "rating\n", 308 | "A 0.333333\n", 309 | "B 0.500000\n", 310 | "C 0.166667\n", 311 | "Name: model, dtype: float64" 312 | ] 313 | }, 314 | "metadata": { 315 | "tags": [] 316 | }, 317 | "execution_count": 12 318 | } 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "metadata": { 324 | "id": "TVDhrdi23nV8", 325 | "colab_type": "code", 326 | "colab": { 327 | "base_uri": "https://localhost:8080/", 328 | "height": 136 329 | }, 330 | "outputId": "51c7da88-ce8f-4234-816c-10a266f791f5" 331 | }, 332 | "source": [ 333 | "(df.groupby(['rating', 'type']).count() / df.groupby('rating').count())['model']" 334 | ], 335 | "execution_count": 13, 336 | "outputs": [ 337 | { 338 | "output_type": "execute_result", 339 | "data": { 340 | "text/plain": [ 341 | "rating type \n", 342 | "A coupe 0.500000\n", 343 | " sedan 0.500000\n", 344 | "B coupe 0.333333\n", 345 | " sedan 0.666667\n", 346 | "C sedan 1.000000\n", 347 | "Name: model, dtype: float64" 348 | ] 349 | }, 350 | "metadata": { 351 | "tags": [] 352 | }, 353 | "execution_count": 13 354 | } 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "metadata": { 360 | "id": "963JjEk93nWF", 361 | "colab_type": "code", 362 | "colab": { 363 | "base_uri": "https://localhost:8080/", 364 | "height": 1000 365 | }, 366 | "outputId": "251d7268-4488-4e25-92f3-d84f584929fb" 367 | }, 368 | "source": [ 369 | "#Joint Probability\n", 370 | "import numpy as np\n", 371 | "numBins = 10 # number of bins in each dimension\n", 372 | "data = np.random.randn(100000, 3) # generate 100000 3-d random data points\n", 373 | "jointProbs, edges = np.histogramdd(data, bins=numBins)\n", 374 | "jointProbs /= jointProbs.sum()\n", 375 | "print(jointProbs)" 376 | ], 377 | "execution_count": 14, 378 | "outputs": [ 379 | { 380 | "output_type": "stream", 381 | "text": [ 382 | "[[[0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00\n", 383 | " 0.000e+00 0.000e+00 0.000e+00]\n", 384 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00\n", 385 | " 0.000e+00 0.000e+00 0.000e+00]\n", 386 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00\n", 387 | " 0.000e+00 0.000e+00 0.000e+00]\n", 388 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.000e-05 0.000e+00 1.000e-05\n", 389 | " 0.000e+00 0.000e+00 0.000e+00]\n", 390 | " [0.000e+00 0.000e+00 0.000e+00 1.000e-05 3.000e-05 1.000e-05 0.000e+00\n", 391 | " 0.000e+00 0.000e+00 0.000e+00]\n", 392 | " [0.000e+00 0.000e+00 1.000e-05 1.000e-05 2.000e-05 2.000e-05 0.000e+00\n", 393 | " 0.000e+00 0.000e+00 0.000e+00]\n", 394 | " [0.000e+00 0.000e+00 0.000e+00 1.000e-05 0.000e+00 0.000e+00 0.000e+00\n", 395 | " 0.000e+00 0.000e+00 0.000e+00]\n", 396 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00\n", 397 | " 0.000e+00 0.000e+00 0.000e+00]\n", 398 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00\n", 399 | " 0.000e+00 0.000e+00 0.000e+00]\n", 400 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00\n", 401 | " 0.000e+00 0.000e+00 0.000e+00]]\n", 402 | "\n", 403 | " [[0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00\n", 404 | " 0.000e+00 0.000e+00 0.000e+00]\n", 405 | " [0.000e+00 0.000e+00 1.000e-05 0.000e+00 0.000e+00 0.000e+00 1.000e-05\n", 406 | " 0.000e+00 0.000e+00 0.000e+00]\n", 407 | " [0.000e+00 0.000e+00 0.000e+00 3.000e-05 0.000e+00 2.000e-05 2.000e-05\n", 408 | " 0.000e+00 0.000e+00 0.000e+00]\n", 409 | " [0.000e+00 1.000e-05 4.000e-05 7.000e-05 8.000e-05 1.300e-04 8.000e-05\n", 410 | " 3.000e-05 0.000e+00 1.000e-05]\n", 411 | " [1.000e-05 0.000e+00 2.000e-05 1.100e-04 3.300e-04 1.800e-04 1.200e-04\n", 412 | " 2.000e-05 1.000e-05 0.000e+00]\n", 413 | " [0.000e+00 0.000e+00 3.000e-05 1.500e-04 2.500e-04 3.000e-04 1.400e-04\n", 414 | " 5.000e-05 3.000e-05 0.000e+00]\n", 415 | " [0.000e+00 0.000e+00 3.000e-05 8.000e-05 1.100e-04 1.200e-04 2.000e-05\n", 416 | " 2.000e-05 0.000e+00 0.000e+00]\n", 417 | " [0.000e+00 0.000e+00 0.000e+00 1.000e-05 1.000e-05 6.000e-05 0.000e+00\n", 418 | " 0.000e+00 0.000e+00 0.000e+00]\n", 419 | " [0.000e+00 0.000e+00 0.000e+00 1.000e-05 1.000e-05 0.000e+00 0.000e+00\n", 420 | " 0.000e+00 0.000e+00 0.000e+00]\n", 421 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00\n", 422 | " 0.000e+00 0.000e+00 0.000e+00]]\n", 423 | "\n", 424 | " [[0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00\n", 425 | " 0.000e+00 0.000e+00 0.000e+00]\n", 426 | " [0.000e+00 0.000e+00 1.000e-05 2.000e-05 4.000e-05 2.000e-05 0.000e+00\n", 427 | " 0.000e+00 0.000e+00 0.000e+00]\n", 428 | " [0.000e+00 0.000e+00 5.000e-05 1.000e-04 3.300e-04 2.700e-04 2.400e-04\n", 429 | " 3.000e-05 0.000e+00 0.000e+00]\n", 430 | " [0.000e+00 1.000e-05 2.700e-04 5.200e-04 1.160e-03 1.340e-03 5.700e-04\n", 431 | " 9.000e-05 1.000e-05 0.000e+00]\n", 432 | " [0.000e+00 4.000e-05 3.200e-04 1.330e-03 2.190e-03 2.260e-03 1.210e-03\n", 433 | " 2.400e-04 7.000e-05 0.000e+00]\n", 434 | " [0.000e+00 2.000e-05 3.800e-04 1.250e-03 2.060e-03 2.070e-03 1.050e-03\n", 435 | " 3.000e-04 5.000e-05 1.000e-05]\n", 436 | " [0.000e+00 4.000e-05 1.700e-04 6.200e-04 1.000e-03 1.090e-03 4.800e-04\n", 437 | " 1.100e-04 0.000e+00 0.000e+00]\n", 438 | " [1.000e-05 2.000e-05 2.000e-05 5.000e-05 2.100e-04 2.000e-04 1.000e-04\n", 439 | " 1.000e-05 0.000e+00 0.000e+00]\n", 440 | " [0.000e+00 0.000e+00 0.000e+00 2.000e-05 3.000e-05 1.000e-05 2.000e-05\n", 441 | " 1.000e-05 0.000e+00 0.000e+00]\n", 442 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.000e-05 0.000e+00\n", 443 | " 0.000e+00 0.000e+00 0.000e+00]]\n", 444 | "\n", 445 | " [[0.000e+00 0.000e+00 0.000e+00 1.000e-05 0.000e+00 0.000e+00 0.000e+00\n", 446 | " 0.000e+00 0.000e+00 0.000e+00]\n", 447 | " [0.000e+00 1.000e-05 2.000e-05 1.400e-04 1.400e-04 1.800e-04 1.000e-04\n", 448 | " 2.000e-05 0.000e+00 0.000e+00]\n", 449 | " [0.000e+00 1.000e-05 2.300e-04 6.600e-04 1.380e-03 1.190e-03 6.600e-04\n", 450 | " 1.400e-04 3.000e-05 0.000e+00]\n", 451 | " [0.000e+00 1.400e-04 7.400e-04 2.890e-03 5.430e-03 5.360e-03 2.740e-03\n", 452 | " 7.300e-04 6.000e-05 3.000e-05]\n", 453 | " [3.000e-05 2.500e-04 1.910e-03 5.540e-03 1.099e-02 1.046e-02 4.990e-03\n", 454 | " 1.390e-03 1.400e-04 0.000e+00]\n", 455 | " [4.000e-05 1.900e-04 1.590e-03 6.130e-03 1.023e-02 1.017e-02 4.770e-03\n", 456 | " 1.170e-03 9.000e-05 0.000e+00]\n", 457 | " [0.000e+00 1.400e-04 9.700e-04 2.630e-03 4.810e-03 5.030e-03 2.200e-03\n", 458 | " 5.800e-04 6.000e-05 2.000e-05]\n", 459 | " [0.000e+00 4.000e-05 1.300e-04 5.800e-04 1.070e-03 1.010e-03 6.200e-04\n", 460 | " 5.000e-05 1.000e-05 0.000e+00]\n", 461 | " [0.000e+00 0.000e+00 1.000e-05 6.000e-05 9.000e-05 1.100e-04 3.000e-05\n", 462 | " 0.000e+00 0.000e+00 0.000e+00]\n", 463 | " [0.000e+00 0.000e+00 1.000e-05 0.000e+00 1.000e-05 2.000e-05 0.000e+00\n", 464 | " 0.000e+00 0.000e+00 0.000e+00]]\n", 465 | "\n", 466 | " [[0.000e+00 0.000e+00 0.000e+00 0.000e+00 4.000e-05 2.000e-05 0.000e+00\n", 467 | " 0.000e+00 0.000e+00 0.000e+00]\n", 468 | " [0.000e+00 1.000e-05 7.000e-05 1.200e-04 2.800e-04 2.900e-04 1.500e-04\n", 469 | " 1.000e-05 1.000e-05 0.000e+00]\n", 470 | " [0.000e+00 9.000e-05 5.200e-04 1.850e-03 3.150e-03 2.690e-03 1.650e-03\n", 471 | " 3.400e-04 6.000e-05 0.000e+00]\n", 472 | " [3.000e-05 3.300e-04 1.950e-03 6.460e-03 1.284e-02 1.220e-02 6.530e-03\n", 473 | " 1.890e-03 2.300e-04 1.000e-05]\n", 474 | " [5.000e-05 6.000e-04 4.060e-03 1.413e-02 2.479e-02 2.449e-02 1.231e-02\n", 475 | " 2.790e-03 4.200e-04 6.000e-05]\n", 476 | " [4.000e-05 5.100e-04 4.130e-03 1.277e-02 2.498e-02 2.376e-02 1.222e-02\n", 477 | " 3.030e-03 4.100e-04 4.000e-05]\n", 478 | " [1.000e-05 2.900e-04 1.680e-03 5.900e-03 1.087e-02 1.023e-02 5.410e-03\n", 479 | " 1.340e-03 1.900e-04 2.000e-05]\n", 480 | " [2.000e-05 6.000e-05 2.900e-04 1.440e-03 2.130e-03 2.210e-03 1.160e-03\n", 481 | " 2.500e-04 4.000e-05 1.000e-05]\n", 482 | " [0.000e+00 0.000e+00 8.000e-05 1.500e-04 2.200e-04 1.600e-04 2.200e-04\n", 483 | " 5.000e-05 0.000e+00 0.000e+00]\n", 484 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.000e-05 0.000e+00 2.000e-05\n", 485 | " 0.000e+00 0.000e+00 0.000e+00]]\n", 486 | "\n", 487 | " [[0.000e+00 0.000e+00 1.000e-05 0.000e+00 1.000e-05 1.000e-05 0.000e+00\n", 488 | " 0.000e+00 0.000e+00 0.000e+00]\n", 489 | " [0.000e+00 0.000e+00 7.000e-05 2.500e-04 3.600e-04 4.000e-04 1.400e-04\n", 490 | " 1.000e-04 0.000e+00 0.000e+00]\n", 491 | " [0.000e+00 6.000e-05 5.000e-04 1.880e-03 3.840e-03 3.400e-03 1.900e-03\n", 492 | " 4.900e-04 6.000e-05 1.000e-05]\n", 493 | " [2.000e-05 3.900e-04 2.180e-03 8.620e-03 1.535e-02 1.487e-02 7.230e-03\n", 494 | " 1.880e-03 3.000e-04 3.000e-05]\n", 495 | " [2.000e-05 6.000e-04 4.870e-03 1.576e-02 3.144e-02 2.965e-02 1.473e-02\n", 496 | " 4.030e-03 6.400e-04 2.000e-05]\n", 497 | " [4.000e-05 8.200e-04 4.550e-03 1.685e-02 2.914e-02 2.796e-02 1.378e-02\n", 498 | " 3.760e-03 4.200e-04 1.000e-05]\n", 499 | " [3.000e-05 3.300e-04 1.960e-03 6.580e-03 1.300e-02 1.328e-02 6.280e-03\n", 500 | " 1.840e-03 2.300e-04 1.000e-05]\n", 501 | " [1.000e-05 9.000e-05 3.600e-04 1.590e-03 2.730e-03 2.690e-03 1.450e-03\n", 502 | " 2.800e-04 4.000e-05 0.000e+00]\n", 503 | " [0.000e+00 1.000e-05 4.000e-05 1.400e-04 2.300e-04 3.800e-04 7.000e-05\n", 504 | " 8.000e-05 1.000e-05 0.000e+00]\n", 505 | " [0.000e+00 0.000e+00 0.000e+00 1.000e-05 4.000e-05 0.000e+00 1.000e-05\n", 506 | " 0.000e+00 0.000e+00 0.000e+00]]\n", 507 | "\n", 508 | " [[0.000e+00 0.000e+00 0.000e+00 0.000e+00 2.000e-05 4.000e-05 0.000e+00\n", 509 | " 0.000e+00 0.000e+00 0.000e+00]\n", 510 | " [0.000e+00 0.000e+00 4.000e-05 1.100e-04 3.200e-04 2.300e-04 2.300e-04\n", 511 | " 1.000e-05 1.000e-05 0.000e+00]\n", 512 | " [0.000e+00 4.000e-05 3.700e-04 1.270e-03 2.320e-03 2.600e-03 1.020e-03\n", 513 | " 3.400e-04 5.000e-05 0.000e+00]\n", 514 | " [1.000e-05 1.100e-04 1.470e-03 5.450e-03 9.970e-03 9.110e-03 4.750e-03\n", 515 | " 1.370e-03 1.500e-04 0.000e+00]\n", 516 | " [3.000e-05 4.300e-04 3.190e-03 1.033e-02 1.953e-02 1.887e-02 8.910e-03\n", 517 | " 2.430e-03 3.500e-04 2.000e-05]\n", 518 | " [3.000e-05 4.000e-04 2.920e-03 1.000e-02 1.800e-02 1.801e-02 8.700e-03\n", 519 | " 2.570e-03 4.100e-04 1.000e-05]\n", 520 | " [0.000e+00 1.800e-04 1.220e-03 4.690e-03 8.410e-03 7.860e-03 4.430e-03\n", 521 | " 1.110e-03 1.500e-04 3.000e-05]\n", 522 | " [0.000e+00 2.000e-05 3.100e-04 9.300e-04 1.850e-03 1.500e-03 1.010e-03\n", 523 | " 2.400e-04 4.000e-05 0.000e+00]\n", 524 | " [0.000e+00 0.000e+00 1.000e-05 1.600e-04 1.600e-04 8.000e-05 9.000e-05\n", 525 | " 2.000e-05 0.000e+00 0.000e+00]\n", 526 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.000e-05 2.000e-05 0.000e+00\n", 527 | " 0.000e+00 0.000e+00 0.000e+00]]\n", 528 | "\n", 529 | " [[0.000e+00 0.000e+00 0.000e+00 1.000e-05 0.000e+00 1.000e-05 0.000e+00\n", 530 | " 0.000e+00 0.000e+00 0.000e+00]\n", 531 | " [0.000e+00 0.000e+00 1.000e-05 7.000e-05 1.100e-04 1.200e-04 4.000e-05\n", 532 | " 4.000e-05 1.000e-05 0.000e+00]\n", 533 | " [0.000e+00 3.000e-05 1.700e-04 3.600e-04 8.500e-04 6.400e-04 2.600e-04\n", 534 | " 9.000e-05 3.000e-05 0.000e+00]\n", 535 | " [0.000e+00 5.000e-05 4.700e-04 1.750e-03 3.430e-03 2.950e-03 1.490e-03\n", 536 | " 3.800e-04 6.000e-05 0.000e+00]\n", 537 | " [2.000e-05 1.700e-04 1.060e-03 3.520e-03 6.390e-03 5.720e-03 3.160e-03\n", 538 | " 7.600e-04 1.000e-04 0.000e+00]\n", 539 | " [2.000e-05 2.000e-04 8.200e-04 2.780e-03 5.660e-03 5.710e-03 3.340e-03\n", 540 | " 7.500e-04 6.000e-05 3.000e-05]\n", 541 | " [1.000e-05 8.000e-05 4.400e-04 1.720e-03 2.690e-03 2.510e-03 1.240e-03\n", 542 | " 3.400e-04 5.000e-05 1.000e-05]\n", 543 | " [0.000e+00 2.000e-05 1.100e-04 3.700e-04 6.000e-04 6.700e-04 3.700e-04\n", 544 | " 8.000e-05 0.000e+00 0.000e+00]\n", 545 | " [0.000e+00 0.000e+00 0.000e+00 6.000e-05 6.000e-05 1.100e-04 3.000e-05\n", 546 | " 1.000e-05 1.000e-05 0.000e+00]\n", 547 | " [0.000e+00 0.000e+00 0.000e+00 1.000e-05 0.000e+00 0.000e+00 0.000e+00\n", 548 | " 0.000e+00 0.000e+00 0.000e+00]]\n", 549 | "\n", 550 | " [[0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00\n", 551 | " 0.000e+00 0.000e+00 0.000e+00]\n", 552 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 2.000e-05 4.000e-05 1.000e-05\n", 553 | " 0.000e+00 0.000e+00 0.000e+00]\n", 554 | " [0.000e+00 0.000e+00 3.000e-05 1.000e-04 1.200e-04 1.100e-04 9.000e-05\n", 555 | " 2.000e-05 0.000e+00 0.000e+00]\n", 556 | " [0.000e+00 1.000e-05 9.000e-05 2.600e-04 6.500e-04 5.200e-04 1.800e-04\n", 557 | " 4.000e-05 2.000e-05 0.000e+00]\n", 558 | " [1.000e-05 3.000e-05 1.400e-04 4.900e-04 1.090e-03 1.030e-03 5.200e-04\n", 559 | " 9.000e-05 0.000e+00 0.000e+00]\n", 560 | " [0.000e+00 1.000e-05 1.600e-04 6.300e-04 1.100e-03 9.500e-04 4.800e-04\n", 561 | " 1.400e-04 0.000e+00 0.000e+00]\n", 562 | " [0.000e+00 1.000e-05 5.000e-05 2.600e-04 4.200e-04 4.200e-04 3.200e-04\n", 563 | " 7.000e-05 1.000e-05 0.000e+00]\n", 564 | " [0.000e+00 0.000e+00 2.000e-05 9.000e-05 1.000e-04 7.000e-05 7.000e-05\n", 565 | " 3.000e-05 0.000e+00 0.000e+00]\n", 566 | " [0.000e+00 0.000e+00 0.000e+00 1.000e-05 2.000e-05 0.000e+00 1.000e-05\n", 567 | " 0.000e+00 0.000e+00 0.000e+00]\n", 568 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00\n", 569 | " 0.000e+00 0.000e+00 0.000e+00]]\n", 570 | "\n", 571 | " [[0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00\n", 572 | " 0.000e+00 0.000e+00 0.000e+00]\n", 573 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00\n", 574 | " 0.000e+00 0.000e+00 0.000e+00]\n", 575 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.000e-05 2.000e-05 1.000e-05\n", 576 | " 0.000e+00 0.000e+00 0.000e+00]\n", 577 | " [0.000e+00 0.000e+00 0.000e+00 1.000e-05 6.000e-05 3.000e-05 1.000e-05\n", 578 | " 1.000e-05 0.000e+00 0.000e+00]\n", 579 | " [0.000e+00 0.000e+00 1.000e-05 7.000e-05 1.300e-04 1.200e-04 1.000e-05\n", 580 | " 0.000e+00 0.000e+00 0.000e+00]\n", 581 | " [0.000e+00 0.000e+00 1.000e-05 1.000e-05 8.000e-05 7.000e-05 3.000e-05\n", 582 | " 1.000e-05 0.000e+00 0.000e+00]\n", 583 | " [0.000e+00 0.000e+00 0.000e+00 1.000e-05 4.000e-05 3.000e-05 1.000e-05\n", 584 | " 0.000e+00 0.000e+00 0.000e+00]\n", 585 | " [0.000e+00 0.000e+00 0.000e+00 1.000e-05 1.000e-05 2.000e-05 0.000e+00\n", 586 | " 0.000e+00 0.000e+00 0.000e+00]\n", 587 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00\n", 588 | " 0.000e+00 0.000e+00 0.000e+00]\n", 589 | " [0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00\n", 590 | " 0.000e+00 0.000e+00 0.000e+00]]]\n" 591 | ], 592 | "name": "stdout" 593 | } 594 | ] 595 | }, 596 | { 597 | "cell_type": "code", 598 | "metadata": { 599 | "id": "a3i62jBMIKr0", 600 | "colab_type": "code", 601 | "colab": { 602 | "base_uri": "https://localhost:8080/", 603 | "height": 136 604 | }, 605 | "outputId": "bc9aaa9e-1345-4f7c-bd5b-9a0ab2b21de9" 606 | }, 607 | "source": [ 608 | "import scipy.stats\n", 609 | "print(scipy.stats.norm(0, 1))\n", 610 | "\n", 611 | "print(scipy.stats.norm(0, 1).pdf(0))\n", 612 | "print(scipy.stats.norm(0, 1).cdf(0))\n", 613 | "print(scipy.stats.norm(100, 12))\n", 614 | "print(scipy.stats.norm(100, 12).pdf(98))\n", 615 | "print(scipy.stats.norm(100, 12).cdf(98))\n", 616 | "print(scipy.stats.norm(100, 12).cdf(100))" 617 | ], 618 | "execution_count": 18, 619 | "outputs": [ 620 | { 621 | "output_type": "stream", 622 | "text": [ 623 | "\n", 624 | "0.3989422804014327\n", 625 | "0.5\n", 626 | "\n", 627 | "0.032786643008494994\n", 628 | "0.43381616738909634\n", 629 | "0.5\n" 630 | ], 631 | "name": "stdout" 632 | } 633 | ] 634 | } 635 | ] 636 | } -------------------------------------------------------------------------------- /Probability_and_It's_Uses/Type_of_Probability.py: -------------------------------------------------------------------------------- 1 | #------------------------Marginal Probability---------------------------------- 2 | 3 | #total number of students 4 | total=105 5 | 6 | #Students attended Online training 7 | students_trained=45 8 | 9 | # The probability that a candidate has undergone Online training 10 | edu_training=students_trained/total 11 | print("The probability that a candidate has undergone Online training is ",round(edu_training,2)) 12 | 13 | 14 | #-----------------------Joint probability-------------------------------------- 15 | 16 | #students with Online training having good salary 17 | good_sal_edu=30 18 | 19 | #students without Online training having good salary 20 | good_sal=5 21 | 22 | #Finding the probability that a candidate has attended Online training and 23 | # also has good package 24 | student_good_sal=good_sal_edu/total 25 | 26 | print("The probability that a candidate has attended Online training and also has good " 27 | "package ",student_good_sal) 28 | 29 | 30 | 31 | #------------------------Conditional Probability------------------------------- 32 | 33 | #Total no. of students without Online training 34 | student_without_training=60 35 | 36 | #Students getting good salary without training 37 | student_without_training_good_sal=5 38 | 39 | print("The probability of students getting good package without Online training " 40 | "is ",student_without_training_good_sal/student_without_training) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Statistics-for-Data-Science-using-Python 2 | Using Python, learn statistical and probabilistic approaches to understand and gain insights from data. Learning statistical concepts are very important to Data science domain and its application using Python. Learn about Numpy, Pandas Data Frame. 3 | 4 | ### Statistics: 5 | Science of Average and their Estimate 6 | 7 | ### Data Science: 8 | Data Science is primarily used to make decisions and predictions. 9 | 10 | ### Business Intelligence: 11 | Enable the business to make intelligent, fact-based decision. 12 | 13 | 14 | # Table of Content 15 | 1. Data and its Types 16 | 2. Variable and it's Types 17 | 3. Sample and Population 18 | 4. Sampling Techniques 19 | 5. Descriptive Statistics 20 | 6. Information Gain and Entropy 21 | 7. Probability and it's Uses 22 | 8. Probability Distribution 23 | 9. Baye's Theorem 24 | 10. Statistical Inference 25 | 11. Hypothesis Testing 26 | 12. Testing the Data 27 | 28 | 29 | ### What is statistics? 30 | Statistics Definition: (Science of Average and their Estimate) 31 | Statistics is the science of collecting, organizing, presenting, analyzing and interpreting data for specific purpose to help in making more effective decision. 32 | 33 | #### Why study statistics: 34 | To make more effective decision for the betterment of individual, society, business, nature and so on 35 | 36 | #### Statistical Analysis: 37 | 38 | Statistical analysis is implemented to manipulate, summarize, and investigate data, so that useful decision making information results are obtained. 39 | 40 | ##### Two type of statistics: 41 | 42 | 1. Descriptive Statistics (used to describe the basic features of the data) 43 | 44 | 2. Inferential statistics (aims at learning characteristics of the population from a sample) 45 | 46 | 47 | # 1. Data and its Types 48 | #### What is data? 49 | Data is a set of collected or recorded facts of particular subject. 50 | 51 | Data in general terms refer to facts and statistics collected together for reference or analysis. 52 | 53 | Types of Data: 54 | #### 1. Qualitative Data 55 | #### 2. Quantitative Data 56 | 57 | #### 1. Qualitative Data: 58 | “Data Associated with the quality in different categories”. Data is measurements, each fall into one of several categories. (Hair Color, ethnic groups and other attributes of the population) 59 | 60 | ##### (a). Nominal Data: “With no inherent order or ranking” 61 | ~ Data with no inherent order or ranking such as gender or race, suck kind of data called Nominal Data. 62 | 63 | ##### (b). Ordinal Data: “with an order series” 64 | 65 | #### 2. Quantitative Data: 66 | “Data associated with Quantity which can be measured” 67 | ~ Data measured on a numeric scale (distance travelled to college, the number of children in a family etc.) 68 | 69 | ##### (a). Discrete Data: “Based on count, finite number of values possible and value cannot be subdivided” 70 | ~ Data which can be categorized into classification, data which is based upon counts, there are only a finite number of values possible and values cannot be subdivided meaningfully, such kind of data is called Discrete Data. 71 | 72 | ##### (b). # Continuous Data: “measured on a continuum or a scale, value which can be subdivided into finer increments” 73 | ~ Data which can be measured on a continuum or a scale, data which can be have almost any numeric value and can be subdivided into finer and finer increments, such kind of data is called Continuous Data. 74 | 75 | 76 | # 2. Variable and it's Types 77 | #### What is variable? 78 | A variable in algebra represents an unknown value or a value that varies. 79 | 80 | #### Types of Variables: 81 | ##### 1. Categorical Variable: 82 | Variable that can be put into categories. For example, male and female are two categories in a Gender. 83 | 84 | ##### 2. Control Variable: 85 | A factor in an experiment which must be held constant 86 | 87 | ##### 3. Confounding Variable: 88 | Extra variables that have a hidden effects on your experimental results 89 | 90 | ##### 4. Dependent Variable (Output Variable): 91 | The outcome of an experiment 92 | 93 | ##### 5. Independent Variable (Input Variable): 94 | A variable that is not affected by anything 95 | 96 | 97 | # 3. Sample and Population 98 | #### Population: 99 | A Population is the set of all possible states of a random variable. The size of the population may be either infinite or finite. 100 | 101 | In other words, A collection or set of individual or objects or events whose properties are to be anlysed called population. 102 | 103 | #### Sample: 104 | A Sample is a subset of the population; its size is always finite. 105 | 106 | A subset of population is called "Sample". A well choosen sample will contain most of the information about a particular population parameter. 107 | 108 | 109 | # 4. Sampling Techniques 110 | 111 | #### 1. Probability Sampling : 112 | This Sampling technique uses randomization to make sure that every element of the population gets an equal chance to be part of the selected sample. It’s alternatively known as random sampling. 113 | 114 | ##### (a) Random Sampling : 115 | Every element has an equal chance of getting selected to be the part sample. It is used when we don’t have any kind of prior information about the target population. 116 | 117 | For example: Random selection of 20 students from class of 50 student. Each student has equal chance of getting selected. Here probability of selection is 1/50. 118 | 119 | ##### (b) Systematic Sampling 120 | 121 | Here the selection of elements is systematic and not random except the first element. Elements of a sample are chosen at regular intervals of population. All the elements are put together in a sequence first where each element has the equal chance of being selected. 122 | 123 | For a sample of size n, we divide our population of size N into subgroups of k elements. 124 | 125 | We select our first element randomly from the first subgroup of k elements. 126 | 127 | To select other elements of sample, perform following: 128 | 129 | We know number of elements in each group is k i.e N/n 130 | 131 | So if our first element is n1 then 132 | 133 | Second element is n1+k i.e n2 134 | 135 | Third element n2+k i.e n3 and so on.. 136 | 137 | Taking an example of N=20, n=5 138 | 139 | No of elements in each of the subgroups is N/n i.e 20/5 =4= k 140 | 141 | Now, randomly select first element from the first subgroup. 142 | 143 | If we select n1= 3 144 | 145 | n2 = n1+k = 3+4 = 7 146 | 147 | n3 = n2+k = 7+4 = 11 148 | 149 | 150 | ##### (c) Stratified Sampling 151 | This technique divides the elements of the population into small subgroups (strata) based on the similarity in such a way that the elements within the group are homogeneous and heterogeneous among the other subgroups formed. And then the elements are randomly selected from each of these strata. We need to have prior information about the population to create subgroups. 152 | 153 | #### 2. Non-Probability Sampling : 154 | It does not rely on randomization. This technique is more reliant on the researcher’s ability to select elements for a sample. Outcome of sampling might be biased and makes difficult for all the elements of population to be part of the sample equally. This type of sampling is also known as non-random sampling. 155 | 156 | ##### (a) Snowball Sampling: 157 | This technique is used in the situations where the population is completely unknown and rare. 158 | Therefore we will take the help from the first element which we select for the population and ask him to recommend other elements who will fit the description of the sample needed. 159 | 160 | So this referral technique goes on, increasing the size of population like a snowball. 161 | 162 | ##### For example: 163 | It’s used in situations of highly sensitive topics like HIV Aids where people will not openly discuss and participate in surveys to share information about HIV Aids. 164 | 165 | Not all the victims will respond to the questions asked so researchers can contact people they know or volunteers to get in touch with the victims and collect information 166 | 167 | Helps in situations where we do not have the access to sufficient people with the characteristics we are seeking. It starts with finding people to study. 168 | 169 | ##### (b) Quota Sampling: 170 | This type of sampling depends of some pre-set standard. It selects the representative sample from the population. Proportion of characteristics/ trait in sample should be same as population. Elements are selected until exact proportions of certain types of data is obtained or sufficient data in different categories is collected. 171 | 172 | ##### For example: 173 | If our population has 45% females and 55% males then our sample should reflect the same percentage of males and females. 174 | 175 | ##### (c) Judgement sampling 176 | This is based on the intention or the purpose of study. Only those elements will be selected from the population which suits the best for the purpose of our study. 177 | 178 | ##### For Example: 179 | If we want to understand the thought process of the people who are interested in pursuing master’s degree then the selection criteria would be “Are you interested for Masters in..?” 180 | 181 | All the people who respond with a “No” will be excluded from our sample. 182 | 183 | ##### (d) Convenience Sampling 184 | Here the samples are selected based on the availability. This method is used when the availability of sample is rare and also costly. So based on the convenience samples are selected. 185 | 186 | ##### For example: 187 | Researchers prefer this during the initial stages of survey research, as it’s quick and easy to deliver results. 188 | 189 | 190 | # 5. Descriptive Statistics: 191 | Collecting, Summarizing or Describing and Processing data to transform data into information 192 | 193 | Descriptive statistics are used to describe the basic features of the data in a study. 194 | * Descriptive statistics is a data analysis strategy. 195 | * It deals with the representation of numerical facts, or data, in either table or graphic form, and with the methodology of analysis the data. 196 | 197 | Example: A student’s grade point average (GPA), provides a good understanding in analysing his overall performance. 198 | 199 | 200 | ### Type of Descriptive Statstics: 201 | Descriptive statistics are broken down into two categories. **Measures of Central Tendency** and **Measures of Spread (variability or dispersion)**. 202 | 203 | ### (1) Measure of Centre(Central Tendency): 204 | The data values for most numerical variables tend to group around a specific value 205 | 206 | Measure of centre help us to describe what extent this pattern holds for a specific numerical variable 207 | 208 | Three commonly-used measures of centre: 209 | 210 | ##### (a) Mean (also known as the arithmetic mean or average) 211 | ##### (b) Median 212 | ##### (c) Mode 213 | 214 | #### Mean: “An Average” 215 | The mean (or average) of a number of observations is the sum of the values of all the observations divided by the total number of observations. It is denoted by the symbol X, read as ‘X bar’. 216 | 217 | #### Median: “A middle Value” 218 | The median is that value of the given number of observations, which divides it into exactly two parts. So, when the data is arranged in ascending (or descending) order the median of ungrouped data is calculated as follows: 219 | 220 | (i) When the number of observations (n) is odd, the median is the value of the {(n+1)/2}th observation. For example, if n = 13, the value of the {(13+1)/2}th, i.e., the 7th observation will be the median. 221 | 222 | (ii) When the number of observations (n) is even, the median is the mean of the {n/2}th and the {(n+1)/2}th observations. 223 | 224 | #### Mode: “The highest or maximum number of frequency” 225 | The mode is the most common observation of a data set, or the value in the data set that occurs most frequently. 226 | 227 | #### Comparison between median and mean: 228 | Median: 229 | • Ignore the extreme value 230 | • Tell the point from where 50% data is lesser and 50% is more 231 | 232 | Mean: 233 | • All the data are given equal importance 234 | 235 | #### Relationship among all 236 | Mean – Mode = 3 (Mean - Median) 237 | 238 | Mode = 3Median – 2Mean 239 | 240 | 241 | ### (2) Measure of Spread (Variability / Dispersion) 242 | A measure of spread, sometimes also called a measure of dispersion or measure of variability is used to describe the variability in a sample or population. 243 | 244 | It is usually used in conjunction with measure of central tendency, such as the mean or median, to provide an overall description of a set of data. 245 | 246 | ##### (a) Range 247 | ##### (b) Percentiles/Quartiles 248 | ##### (c) Inter-Quartile Range (IQR) 249 | ##### (d) Variance 250 | ##### (e) Standard Deviation 251 | ##### (f) Skewness 252 | ##### (g) Kurtosis 253 | 254 | #### (a) Range: 255 | 256 | The range is simply the difference between the maximum and minimum values in a data set. 257 | ##### Range = max - min 258 | 259 | So in a data set of 2, 2, 3, 4, 5, 5, 6, 7, 8, 9, 11, 13, 15, 15, 17, 19, 20, the range is the difference between 2 and 20. 260 | 18 = 20 - 2 261 | 262 | While it is useful in seeing how large the difference in observations is in a sample, it says nothing about the spread of the data. 263 | 264 | #### (b) Percentiles/Quartiles 265 | ##### Percentiles divide a data set into 100 equal parts. A percentile is simply a measure that tells us what percent of the total frequency of a data set was at or below that measure. 266 | 267 | ##### The Quartiles also divide the data into divisions of 25%, so: 268 | 269 | Quartile 1 (Q1) can be called the 25th percentile 270 | Quartile 2 (Q2) can be called the 50th percentile 271 | Quartile 3 (Q3) can be called the 75th percentile 272 | 273 | #### (c) Inter-Quartile Range (IQR) 274 | The inter-quartile range (IQR) gives more information about how the observation values of a data set are dispersed. It shows the range of the middle 50% of observations. 275 | 276 | #### (d) Variance 277 | 278 | ##### Deviation: The difference between each xi and the mean is called deviation about the mean 279 | 280 | ##### Variance: is based on deviations and entails computing square of deviations 281 | 282 | ##### Population Variance: Average of Standard Deviations 283 | 284 | ##### Sample Variance: sum of square deviations divided by n-1 285 | 286 | #### (e) Standard Deviation 287 | 288 | The standard deviation indicates the average distance between an observation value, and the mean of a data set. In this way, it shows how well the mean represents the values in a data set. Like the mean, it is appropriate to use when the data set is not skewed or containing outliers. 289 | 290 | #### (f) Skewness 291 | In probability theory and statistics, skewness is a measure of the asymmetry of the probability distribution of a real-valued random variable about its mean. The skewness value can be positive or negative, or undefined. 292 | 293 | #### (g) Kurtosis 294 | In probability theory and statistics, kurtosis is a measure of the "tailedness" of the probability distribution of a real-valued random variable. 295 | 296 | # 6. Information Gain and Entropy 297 | 298 | ### Information Gain: 299 | * Information is a measure of facts 300 | * Information gain is the ratio of factual information, to uncertain information 301 | * Is signifies a reduction is entropy or uncertain 302 | 303 | ### Entropy: 304 | #### What is Entropy? 305 | * In the most layman terms, Entropy is nothing but the measure of disorder or uncertainty (You can think of it as a measure of purity as well) the goal of machine learning models and Data Scientists in general is to reduce uncertainty. 306 | * We sometimes need to choose the options which have high information gain and low entropy while taking crucial decision 307 | ##### Example: We are certain that flight 3 p.m., but uncertain regarding the exact time to reach airport 308 | 309 | 310 | ##### Confusion Matrix: 311 | A confusion matrix is a table that is often used to describe the performance of a classification model (or “classifier”) on set of test data for which the true value are known: 312 | * Confusion matrix represents a tabular presentation of Actual Vs Predict Value 313 | * You can calculate the accuracy of your model with: 314 | 315 | (True Positive + True Negative) / (True Positive + True Negative + False Positive + False Negative) 316 | 317 | #### Example: 318 | * There are two possible predicted classes: “yes” and “no” 319 | * The classifier made a total of 165 predictions 320 | * Out of those 165 cases, the classifier predicted “yes” 110 times, and “no” 55 times 321 | * In reality, 105 patients in the sample have the disease, and 60 patient do not 322 | 323 | ##### (A): Type I Error: We predict yes, but they don’t actually have the disease (Also known as Type I Error) 324 | 325 | ##### (B): Type II Error: We predict No, but they actually do have the disease (Also known as Type II Error) 326 | 327 | 328 | ### Sensitivity: 329 | * Sensitivity (also called the true positive rate, the recall, or probability of detection in some fields) measures the proportion of positives that are correctly identified 330 | * In probability notation: 331 | 332 | Sensitivity = TRUE POSITIVE / (TRUE POSITIVE + FALSE NEGATIVE) 333 | 334 | 335 | ### Specificity: 336 | * Specificity (also called the true negative rate) measures the proportion of negatives that are correctly identified as such (e.g. the percentage of healthy people who are correctly identified as not having the condition) 337 | * In probability notation: 338 | 339 | Specificity = TRUE NEGATIVE / (TRUE NEGATIVE + FALSE POSITIVE) 340 | 341 | 342 | # 7. Probability and it's Uses 343 | 344 | ### What is probability? 345 | 346 | A measure of uncertainty of various phenomenon, numerically 347 | 348 | * Probability is measure of how likely something will occur 349 | * It is the ratio of desired outcomes to total outcomes: 350 | 351 | Desired Outcomes/ #Total Outcomes 352 | 353 | ##### In mathematical terms : 354 | P (E) = No. of favourable outcome / Total no. of outcomes 355 | 356 | ##### The probability of all outcomes always sum of 1 357 | P (E) + P (E’) = 1 358 | 359 | ### Terminologies of Probability 360 | ##### (1) Random Experiment: 361 | An operation which can produce some well-defined outcomes is called an experiment. Each outcome is called an event 362 | 363 | For example; throwing a die or tossing a coin etc. 364 | In an experiment where all possible outcomes are known and in advance if the exact outcome cannot be predicted, is called a random experiment. 365 | 366 | ##### (3) Trial: 367 | By a trial, we mean performing a random experiment. 368 | 369 | ##### (4) Sample Space 370 | The sample space for a probability experiment is the set of all possible outcomes. This is usually written with set notation (curly brackets). For example, going back to a regular 6-sided die the sample space would be: 371 | 372 | S={1,2,3,4,5,6} 373 | 374 | ##### (5) Event 375 | Out of the total results obtained from a certain experiment, the set of those results which are in favor of a definite result is called the event and it is denoted as E. 376 | 377 | #### (6) Equally Likely Events: 378 | 379 | When there is no reason to expect the happening of one event in preference to the other, then the events are known equally likely events. 380 | 381 | For example; when an unbiased coin is tossed the chances of getting a head or a tail are the same. 382 | 383 | #### (7) Exhaustive Events: 384 | 385 | All the possible outcomes of the experiments are known as exhaustive events. 386 | 387 | For example; in throwing a die there are 6 exhaustive events in a trial. 388 | 389 | #### (8) Favorable Events: 390 | 391 | The outcomes which make necessary the happening of an event in a trial are called favorable events. 392 | 393 | For example; if two dice are thrown, the number of favorable events of getting a sum 5 is four, 394 | 395 | i.e., (1, 4), (2, 3), (3, 2) and (4, 1). 396 | 397 | #### (9) Mutually Exclusive Events: 398 | If there be no element common between two or more events, i.e., between two or more subsets of the sample space, then these events are called mutually exclusive events. 399 | 400 | If E1 and E2 are two mutually exclusive events, then E1 ∩ E2 = ∅ 401 | 402 | #### (10) Complementary Event: 403 | An event which consists in the negation of another event is called complementary event of the er event. In case of throwing a die, ‘even face’ and ‘odd face’ are complementary to each other. “Multiple of 3” ant “Not multiple of 3” are complementary events of each other. 404 | 405 | ##### (11) Union of Event: 406 | Union of events is simply a union of two or more than two events. If A and B are two events then A U B is called union of A and B. suppose that two events are given A and B then. The union of two events A and B is the event which consists all the elements of A and B. 407 | 408 | ##### Formula: 409 | Suppose A and B are two events associated with a random experiment. Then the union of A and B is represented by A ∪ B. 410 | 411 | The probability of union of two events is given by: 412 | 413 | P(A∪B) = P(A)+P(B) – P(A∩B) 414 | 415 | Here, P (A) is the probability of event A, P (B) is the probability of event B. 416 | 417 | Also, P(A∩B) is the probability of the intersection of events A and B. 418 | 419 | 420 | When A and B are two independent or mutually exclusive events that is the occurrence of event A does not affect the occurrence of event B at all, in such a case, P(A∩B) = 0 and hence we have, 421 | 422 | P(A∪B) = P(A)+P(B) 423 | 424 | If we have more than two independent events say A, B & C, then in that case the union probability is given by: 425 | 426 | P(A∪B∪C) = P(A)+P(B)+P(C) 427 | 428 | If AB and C are not independent or mutually exclusive then the union probability is given by: 429 | 430 | P(A∪B∪C) = P(A)+P(B)+P(C) – P(A∩B)–P(B∩C)–P(A∩C) - P(A∩B∩C) 431 | 432 | 433 | ##### (12) Intersection of Event 434 | Intersection of events means that all the events are occurring together. Even if one event holds false all will be false. The intersection of events can only be true if and only if all the events holds true. 435 | 436 | The probability that Events A and B both occur is the probability of the intersection of A and B. The probability of the intersection of Events A and B is denoted by P(A ∩ B). If Events A and B are mutually exclusive, P(A ∩ B) = 0. 437 | 438 | The rule of multiplication applies to the situation when we want to know the probability of the intersection of two events; that is, we want to know the probability that two events (Event A and Event B) both occur. 439 | 440 | Rule of Multiplication The probability that Events A and B both occur is equal to the probability that Event A occurs times the probability that Event B occurs, given that A has occurred. 441 | 442 | P(A ∩ B) = P(A) P(B|A) 443 | 444 | 445 | # 8. Probability Distribution: 446 | Probability is often associated with at least one event. This event can be anything. Basic examples of events include rolling a die or pulling a coloured ball out of a bag. In these examples the outcome of the event is random (you can’t be sure of the value that the die will show when you roll it), so the **variable that represents the outcome of these events is called a random variable (often abbreviated to RV)**. 447 | 448 | ### The 3 types of probability 449 | 450 | #### Marginal Probability: 451 | If A is an event, then the marginal probability is the probability of that event occurring, P(A). 452 | **Example:** Assuming that we have a pack of traditional playing cards, an example of a marginal probability would be the probability that a card drawn from a pack is red: P(red) = 0.5. 453 | 454 | #### Joint Probability: 455 | The probability of the intersection of two or more events. Visually it is the intersection of the circles of two events on a Venn Diagram (see figure below). If A and B are two events then the joint probability of the two events is written as P(A ∩ B). 456 | **Example:** The probability that a card drawn from a pack is red and has the value 4 is P(red and 4) = 2/52 = 1/26. (There are 52 cards in a pack of traditional playing cards and the 2 red ones are the hearts and diamonds). We’ll go through this example in more detail later. 457 | 458 | #### Conditional Probability: 459 | The conditional probability is the probability that some event(s) occur given that we know other events have already occurred. If A and B are two events then the conditional probability of A occurring given that B has occurred is written as P(A|B). 460 | **Example:** The probability that a card is a four given that we have drawn a red card is P(4|red) = 2/26 = 1/13. (There are 52 cards in the pack, 26 are red and 26 are black. Now because we’ve already picked a red card, we know that there are only 26 cards to choose from, hence why the first denominator is 26). 461 | 462 | ##### Distribution: 463 | Before we jump on to the explanation of distributions, let’s see what kind of data can we encounter. The data can be discrete or continuous. 464 | 465 | **Discrete Data**, as the name suggests, can take only specified values. For example, when you roll a die, the possible outcomes are 1, 2, 3, 4, 5 or 6 and not 1.5 or 2.45. 466 | 467 | **Continuous Data** can take any value within a given range. The range may be finite or infinite. For example, A girl’s weight or height, the length of the road. The weight of a girl can be any value from 54 kgs, or 54.5 kgs, or 54.5436kgs. 468 | 469 | ##### Types of Distributions 470 | **1.Bernoulli Distribution** 471 | **2. Uniform Distribution** 472 | **3. Binomial Distribution** 473 | **4. Normal Distribution** 474 | **5. Poisson Distribution** 475 | **6. Exponential Distribution** 476 | 477 | #### Normal Distribution 478 | Normal distribution represents the behavior of most of the situations in the universe (That is why it’s called a “normal” distribution). The large sum of (small) random variables often turns out to be normally distributed, contributing to its widespread application. Any distribution is known as Normal distribution if it has the following characteristics: 479 | 480 | * The mean, median and mode of the distribution coincide. 481 | * The curve of the distribution is bell-shaped and symmetrical about the line x=μ. 482 | * The total area under the curve is 1. 483 | * Exactly half of the values are to the left of the center and the other half to the right. 484 | * A normal distribution is highly different from Binomial Distribution. However, if the number of trials approaches infinity then the shapes will be quite similar. 485 | 486 | ##### Some of the properties of a standard normal distribution are mentioned below: 487 | * The normal curve is symmetric about the mean and bell shaped. 488 | * Mean, mode and median is zero which is the centre of the curve. 489 | * Approximately 68% of the data will be between -1 and +1 (i.e. within 1 standard deviation from the mean), 95% between -2 and +2 (within 2 SD from the mean) and 99.7% between -3 and 3 (within 3 SD from the mean) 490 | 491 | ##### There are a few commonly used terms which we need to understand: 492 | * **Population** : Space of all possible elements from a set of data 493 | * **Sample** : consists of observations drawn from population 494 | * **Parameter** : measurable characteristic of a population such as mean, SD 495 | * **Statistic**: measurable characteristic of a sample 496 | 497 | 498 | #### Central Limit Theorem (CLT) 499 | The central limit theorem (CLT) is simple. It just says that with a large sample size, sample means are normally distributed. 500 | 501 | Well, the central limit theorem (CLT) is at the heart of hypothesis testing – a critical component of the data science lifecycle. 502 | 503 | #### Formally Defining the Central Limit Theorem: 504 | Given a dataset with unknown distribution (it could be uniform, binomial or completely random), the sample means will approximate the normal distribution. 505 | 506 | #### Assumptions Behind the Central Limit Theorem 507 | Before we dive into the implementation of the central limit theorem, it’s important to understand the assumptions behind this technique: 508 | 509 | * The data must follow the **randomization condition**. It must be sampled randomly 510 | * Samples should be **independent of each other**. One sample should not influence the other samples 511 | * **Sample size** should be not more than 10% of the population when sampling is done without replacement 512 | * The sample size should be sufficiently large. Now, how we will figure out how large this size should be? Well, it depends on the population. When the population is skewed or asymmetric, the sample size should be large. If the population is symmetric, then we can draw small samples as well 513 | In general, a **sample size of 30 is considered sufficient when the population is symmetric**. 514 | 515 | The mean of the sample means is denoted as: 516 | 517 | µ X̄ = µ 518 | 519 | where, 520 | 521 | µ X̄ = Mean of the sample means 522 | µ= Population mean 523 | 524 | 525 | And, the standard deviation of the sample mean is denoted as: 526 | 527 | σ X̄ = σ/sqrt(n) 528 | 529 | where, 530 | 531 | σ X̄ = Standard deviation of the sample mean 532 | σ = Population standard deviation 533 | n = sample size 534 | 535 | 536 | # 9. Baye's Theorem (aka, Bayes Rule) 537 | Before understanding Baye's Theorem first we learn about **Conditional Probability**: 538 | 539 | ##### Conditional Probability : 540 | The probability that event A occurs, given that event B has occurred, is called a conditional probability. 541 | 542 | The conditional probability of A, given B, is denoted by the symbol P(A|B). 543 | 544 | ##### Baye's Theorem: 545 | * Bayes' theorem (also known as Bayes' rule) is a useful tool for calculating conditional probabilities. 546 | 547 | * Bayes’ Theorem is a way of finding a probability when we know certain other probabilities. 548 | 549 | Bayes' theorem can be stated as follows: 550 | 551 | 552 | The formula is: P(A|B) = P(A) P(B|A)P(B) 553 | 554 | Which tells us: how often A happens given that B happens, written P(A|B), 555 | When we know: how often B happens given that A happens, written P(B|A), 556 | and how likely A is on its own, written P(A), 557 | and how likely B is on its own, written P(B) 558 | 559 | #### Bayes Theorem Rule: 560 | 561 | The rule has a very simple derivation that directly leads from the relationship between joint and conditional probabilities. First, note that P(A,B) = P(A|B)P(B) = P(B,A) = P(B|A)P(A). Next, we can set the two terms involving conditional probabilities equal to each other, so P(A|B)P(B) = P(B|A)P(A), and finally, divide both sides by P(B) to arrive at Bayes rule. 562 | 563 | In this formula, A is the event we want the probability of, and B is the new evidence that is related to A in some way. 564 | 565 | P(A|B) is called the **posterior**; this is what we are trying to estimate. In the above example, this would be the “probability of having cancer given that the person is a smoker”. 566 | 567 | P(B|A) is called the **likelihood**; this is the probability of observing the new evidence, given our initial hypothesis. In the above example, this would be the “probability of being a smoker given that the person has cancer”. 568 | 569 | P(A) is called the **prior**; this is the probability of our hypothesis without any additional prior information. In the above example, this would be the “probability of having cancer”. 570 | 571 | P(B) is called the marginal **likelihood**; this is the total probability of observing the evidence. In the above example, this would be the “probability of being a smoker”. In many applications of Bayes Rule, this is ignored, as it mainly serves as normalization. 572 | 573 | ##### Example: 574 | Let us say P(Fire) means how often there is fire, and P(Smoke) means how often we see smoke, then: 575 | 576 | P(Fire|Smoke) means how often there is fire when we can see smoke 577 | P(Smoke|Fire) means how often we can see smoke when there is fire 578 | 579 | So the formula kind of tells us "forwards" P(Fire|Smoke) when we know "backwards" P(Smoke|Fire) 580 | 581 | Example: If dangerous fires are rare (1%) but smoke is fairly common (10%) due to barbecues, and 90% of dangerous fires make smoke then: 582 | P(Fire|Smoke) = P(Fire) * P(Smoke|Fire) / P(Smoke) 583 | = 1% x 90% / 10% 584 | = 9% 585 | So the "Probability of dangerous Fire when there is Smoke" is 9% 586 | 587 | 588 | # 10. Statistical Inference 589 | ### What is statistical inference? 590 | Statistical inference is the process of drawing conclusions about populations or scientific truths from data. 591 | 592 | The four-step process that encompasses statistics: Data Production, Exploratory Data Analysis, Probability and Inference. 593 | 594 | A **statistical inference** aims at learning characteristics of the population from a sample; the population characteristics are parameters and sample characteristics are statistics. 595 | 596 | A **statistical model** is a representation of a complex phenomena that generated the data. 597 | * It has mathematical formulations that describe relationships between random variables and parameters. 598 | * It makes assumptions about the random variables, and sometimes parameters. 599 | * A general form: data = model + residuals 600 | * Model should explain most of the variation in the data 601 | * Residuals are a representation of a lack-of-fit, that is of the portion of the data unexplained by the model. 602 | 603 | **Estimation** represents ways or a process of learning and determining the population parameter based on the model fitted to the data. 604 | 605 | **Point Estimation** and **Interval Estimation**, and **Hypothesis Testing** are three main ways of learning about the population parameter from the sample statistic. 606 | 607 | An **estimator** is particular example of a statistic, which becomes an **estimate** when the formula is replaced with actual observed sample values. 608 | 609 | **Point Estimation** = a single value that estimates the parameter. Point estimates are single values calculated from the sample 610 | 611 | **Confidence Intervals** = gives a range of values for the parameter Interval estimates are intervals within which the parameter is expected to fall, with a certain degree of confidence. 612 | 613 | **Hypothesis Tests** = tests for a specific value(s) of the parameter. 614 | In order to perform these inferential tasks, i.e., make inference about the unknown population parameter from the sample statistic, we need to know the likely values of the sample statistic. What would happen if we do sampling many times? 615 | 616 | We need the sampling distribution of the statistic 617 | * It depends on the model assumptions about the population distribution, and/or on the sample size. 618 | * Standard error refers to the standard deviation of a sampling distribution. 619 | 620 | 621 | # 11. Hypothesis Testing 622 | ### What is Hypothesis Testing? 623 | A statistical hypothesis is an assumption about a population parameter. This assumption may or may not be true. Hypothesis testing refers to the formal procedures used by statisticians to accept or reject statistical hypotheses. 624 | 625 | #### Statistical Hypotheses 626 | The best way to determine whether a statistical hypothesis is true would be to examine the entire population. Since that is often impractical, researchers typically examine a random sample from the population. If sample data are not consistent with the statistical hypothesis, the hypothesis is rejected. 627 | 628 | #### There are two types of statistical hypotheses. 629 | 630 | #### Null hypothesis. 631 | The null hypothesis, denoted by Ho, is usually the hypothesis that sample observations result purely from chance. 632 | #### Alternative hypothesis. 633 | The alternative hypothesis, denoted by H1 or Ha, is the hypothesis that sample observations are influenced by some non-random cause. 634 | 635 | ### Hypothesis Tests 636 | Statisticians follow a formal process to determine whether to reject a null hypothesis, based on sample data. This process, called hypothesis testing, consists of four steps. 637 | 638 | * **State the hypotheses**: This involves stating the null and alternative hypotheses. The hypotheses are stated in such a way that they are mutually exclusive. That is, if one is true, the other must be false. 639 | * **Formulate an analysis plan**: The analysis plan describes how to use sample data to evaluate the null hypothesis. The evaluation often focuses around a single test statistic. 640 | * **Analyze sample data**: Find the value of the test statistic (mean score, proportion, t statistic, z-score, etc.) described in the analysis plan. 641 | * **Interpret results**: Apply the decision rule described in the analysis plan. If the value of the test statistic is unlikely, based on the null hypothesis, reject the null hypothesis. 642 | 643 | ### Decision Errors 644 | Two types of errors can result from a hypothesis test. 645 | 646 | #### Type I error. 647 | A Type I error occurs when the researcher rejects a null hypothesis when it is true. The probability of committing a Type I error is called the significance level. This probability is also called alpha, and is often denoted by α. 648 | 649 | #### Type II error. 650 | A Type II error occurs when the researcher fails to reject a null hypothesis that is false. The probability of committing a Type II error is called Beta, and is often denoted by β. The probability of not committing a Type II error is called the Power of the test. 651 | 652 | ### Decision Rules 653 | The analysis plan includes decision rules for rejecting the null hypothesis. In practice, statisticians describe these decision rules in two ways - with reference to a P-value or with reference to a region of acceptance. 654 | 655 | * **P-value**. The strength of evidence in support of a null hypothesis is measured by the P-value. Suppose the test statistic is equal to S. The P-value is the probability of observing a test statistic as extreme as S, assuming the null hypothesis is true. If the P-value is less than the significance level, we reject the null hypothesis. 656 | * **Region of acceptance**. The region of acceptance is a range of values. If the test statistic falls within the region of acceptance, the null hypothesis is not rejected. The region of acceptance is defined so that the chance of making a Type I error is equal to the significance level. 657 | * The set of values outside the region of acceptance is called the region of rejection. If the test statistic falls within the region of rejection, the null hypothesis is rejected. In such cases, we say that the hypothesis has been rejected at the α level of significance. 658 | 659 | #### Significance Level 660 | Significance level is the probablity of rejecting the null hypothesis when it is true, which is known as **Type I Error**. Denoted by alpha. 661 | 662 | #### Confidence Level 663 | The Confidence level is just the compliment of Significance level which signifies how confident you are in your decision. Express as 1 - alpha. 664 | 665 | Confidence Level + Significance Level = 1 (always) 666 | 667 | #### Computing the Significance Level : Two ways the significance level can be calculated: 668 | 669 | ##### (A) One Tail Test : 670 | One-Tailed and Two-Tailed Tests 671 | A test of a statistical hypothesis, where the region of rejection is on only one side of the sampling distribution, is called a **one-tailed test**. 672 | 673 | **For example**, suppose the null hypothesis states that the mean is less than or equal to 10. The alternative hypothesis would be that the mean is greater than 10. The region of rejection would consist of a range of numbers located on the right side of sampling distribution; that is, a set of numbers greater than 10. 674 | 675 | Example :- a college has ≥ 4000 student or data science ≤ 80% org adopted. 676 | 677 | ##### (2) Two Tail Test 678 | A test of a statistical hypothesis, where the region of rejection is on both sides of the sampling distribution, is called a **two-tailed test**. 679 | 680 | **For example**, suppose the null hypothesis states that the mean is equal to 10. The alternative hypothesis would be that the mean is less than 10 or greater than 10. The region of rejection would consist of a range of numbers located on both sides of sampling distribution; that is, the region of rejection would consist partly of numbers that were less than 10 and partly of numbers that were greater than 10. 681 | 682 | Example : a college != 4000 student or data science != 80% org adopted 683 | 684 | 685 | # 12. Statistical Testing of Data 686 | 687 | Statistical Tests are intended to decide weather a hypothesis about distribution of one or more populations should be accepted or rejected. 688 | 689 | Their are two type of statistical tests: 690 | #### (1) Parametric Tests 691 | #### (2) Non Parametric Tests 692 | 693 | #### Why to use Statistical Testing? 694 | * To calculate the difference in the sample and population means 695 | * To find the difference in sample means 696 | * To test the significance of association between two variables 697 | * To calculate several population means 698 | * To test the difference in proportions between two independent populations 699 | * To test the difference in proporation between sample and population 700 | 701 | #### What are parameters? 702 | * Parameters are numbers which summarize the data for the entrire population, while statistics are numbers which summarize the data from a sample 703 | * Parametric Testing is used for quanititve data and continuous variables 704 | 705 | #### (1) Parametric Tests : A parametric test makes assumption regarding population parameters and distribution 706 | ##### (a) Z Testing 707 | ##### (b) Student T-Testing 708 | ##### (c) P Testing 709 | ##### (d) ANOVA Testing 710 | 711 | #### (a) Z Testing: 712 | The Z Test is used for testing significance difference between two point estimates 713 | ##### Assumptions for Z Test 714 | * The sample must be randomly selected and data must be quantitative 715 | * Sample should be larger 716 | * Data should follow a normal distribution 717 | 718 | #### (2) Non-Parametric Tests: 719 | * Chi-Square Testing 720 | 721 | ### A/B Testing: 722 | 723 | 724 | ##### Problem 1: Two-Tailed Test 725 | 726 | The CEO of a large electric utility claims that 80 percent of his 1,000,000 customers are very satisfied with the service they receive. To test this claim, the local newspaper surveyed 100 customers, using simple random sampling. Among the sampled customers, 73 percent say they are very satisified. Based on these findings, can we reject the CEO's hypothesis that 80% of the customers are very satisfied? Use a 0.05 level of significance. 727 | 728 | ##### Solution: 729 | The solution to this problem takes four steps: (1) state the hypotheses, (2) formulate an analysis plan, (3) analyze sample data, and (4) interpret results. We work through those steps below: 730 | 731 | State the hypotheses. The first step is to state the null hypothesis and an alternative hypothesis. 732 | 733 | Null hypothesis: P = 0.80 734 | 735 | Alternative hypothesis: P ≠ 0.80 736 | 737 | Note that these hypotheses constitute a two-tailed test. The null hypothesis will be rejected if the sample proportion is too big or if it is too small. 738 | 739 | Formulate an analysis plan. For this analysis, the significance level is 0.05. The test method, shown in the next section, is a one-sample z-test. 740 | 741 | Analyze sample data. Using sample data, we calculate the standard deviation (σ) and compute the z-score test statistic (z). 742 | 743 | σ = sqrt[ P * ( 1 - P ) / n ] 744 | 745 | σ = sqrt [(0.8 * 0.2) / 100] 746 | 747 | σ = sqrt(0.0016) = 0.04 748 | 749 | z = (p - P) / σ = (.73 - .80)/0.04 = -1.75 750 | 751 | where P is the hypothesized value of population proportion in the null hypothesis, p is the sample proportion, and n is the sample size. 752 | 753 | Since we have a two-tailed test, the P-value is the probability that the z-score is less than -1.75 or greater than 1.75. 754 | 755 | We use the Normal Distribution Calculator to find P(z < -1.75) = 0.04, and P(z > 1.75) = 0.04. Thus, the P-value = 0.04 + 0.04 = 0.08. 756 | Interpret results. Since the P-value (0.08) is greater than the significance level (0.05), we cannot reject the null hypothesis. 757 | Note: If you use this approach on an exam, you may also want to mention why this approach is appropriate. Specifically, the approach is appropriate because the sampling method was simple random sampling, the sample included at least 10 successes and 10 failures, and the population size was at least 10 times the sample size. 758 | 759 | 760 | ##### Problem 2: One-Tailed Test 761 | Suppose the previous example is stated a little bit differently. Suppose the CEO claims that at least 80 percent of the company's 1,000,000 customers are very satisfied. Again, 100 customers are surveyed using simple random sampling. The result: 73 percent are very satisfied. Based on these results, should we accept or reject the CEO's hypothesis? Assume a significance level of 0.05. 762 | 763 | ##### Solution: 764 | The solution to this problem takes four steps: (1) state the hypotheses, (2) formulate an analysis plan, (3) analyze sample data, and (4) interpret results. We work through those steps below: 765 | 766 | State the hypotheses. The first step is to state the null hypothesis and an alternative hypothesis. 767 | 768 | Null hypothesis: P >= 0.80 769 | 770 | Alternative hypothesis: P < 0.80 771 | 772 | Note that these hypotheses constitute a one-tailed test. The null hypothesis will be rejected only if the sample proportion is too small. 773 | 774 | Formulate an analysis plan. For this analysis, the significance level is 0.05. The test method, shown in the next section, is a one-sample z-test. 775 | 776 | Analyze sample data. Using sample data, we calculate the standard deviation (σ) and compute the z-score test statistic (z). 777 | 778 | σ = sqrt[ P * ( 1 - P ) / n ] = sqrt [(0.8 * 0.2) / 100] 779 | 780 | σ = sqrt(0.0016) = 0.04 781 | 782 | z = (p - P) / σ = (.73 - .80)/0.04 = -1.75 783 | 784 | where P is the hypothesized value of population proportion in the null hypothesis, p is the sample proportion, and n is the sample size. 785 | 786 | Since we have a one-tailed test, the P-value is the probability that the z-score is less than -1.75. We use the Normal Distribution Calculator to find P(z < -1.75) = 0.04. Thus, the P-value = 0.04. 787 | Interpret results. Since the P-value (0.04) is less than the significance level (0.05), we cannot accept the null hypothesis. 788 | Note: If you use this approach on an exam, you may also want to mention why this approach is appropriate. Specifically, the approach is appropriate because the sampling method was simple random sampling, the sample included at least 10 successes and 10 failures, and the population size was at least 10 times the sample size. 789 | -------------------------------------------------------------------------------- /Statistical_Inference/Point_Estimates.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Point_Estimates.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "code", 18 | "metadata": { 19 | "id": "j1cQgMzxLqxN", 20 | "colab_type": "code", 21 | "colab": { 22 | "base_uri": "https://localhost:8080/", 23 | "height": 34 24 | }, 25 | "outputId": "fde15623-382f-48db-e42d-df4f5fd8a3ac" 26 | }, 27 | "source": [ 28 | "%matplotlib inline \n", 29 | "import numpy as np\n", 30 | "import pandas as pd\n", 31 | "import scipy.stats as stats\n", 32 | "import matplotlib.pyplot as plt\n", 33 | "import random\n", 34 | "import math\n", 35 | "np.random.seed(10)\n", 36 | "population_ages1 = stats.poisson.rvs(loc=18, mu=35, size=150000)\n", 37 | "population_ages2 = stats.poisson.rvs(loc=18, mu=10, size=100000)\n", 38 | "population_ages = np.concatenate((population_ages1, population_ages2))\n", 39 | "\n", 40 | "population_ages.mean()" 41 | ], 42 | "execution_count": 2, 43 | "outputs": [ 44 | { 45 | "output_type": "execute_result", 46 | "data": { 47 | "text/plain": [ 48 | "43.002372" 49 | ] 50 | }, 51 | "metadata": { 52 | "tags": [] 53 | }, 54 | "execution_count": 2 55 | } 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "metadata": { 61 | "id": "Stlpz09vLqxh", 62 | "colab_type": "code", 63 | "colab": {} 64 | }, 65 | "source": [ 66 | "np.random.seed(6)\n", 67 | "sample_ages = np.random.choice(a= population_ages,size=500)" 68 | ], 69 | "execution_count": 0, 70 | "outputs": [] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "metadata": { 75 | "id": "80mLxWv1Lqxq", 76 | "colab_type": "code", 77 | "colab": { 78 | "base_uri": "https://localhost:8080/", 79 | "height": 34 80 | }, 81 | "outputId": "9e21fac5-721b-48e3-f9e0-89dfc23c910c" 82 | }, 83 | "source": [ 84 | "print ( sample_ages.mean() )" 85 | ], 86 | "execution_count": 5, 87 | "outputs": [ 88 | { 89 | "output_type": "stream", 90 | "text": [ 91 | "42.388\n" 92 | ], 93 | "name": "stdout" 94 | } 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "metadata": { 100 | "id": "oxYfwI5qLqx1", 101 | "colab_type": "code", 102 | "colab": { 103 | "base_uri": "https://localhost:8080/", 104 | "height": 34 105 | }, 106 | "outputId": "2835e1a1-9b6a-4236-fa27-216fa148e499" 107 | }, 108 | "source": [ 109 | "population_ages.mean() - sample_ages.mean()" 110 | ], 111 | "execution_count": 9, 112 | "outputs": [ 113 | { 114 | "output_type": "execute_result", 115 | "data": { 116 | "text/plain": [ 117 | "0.614372000000003" 118 | ] 119 | }, 120 | "metadata": { 121 | "tags": [] 122 | }, 123 | "execution_count": 9 124 | } 125 | ] 126 | } 127 | ] 128 | } -------------------------------------------------------------------------------- /Statistical_Inference/confidence_interval_margin_of_error.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "confidence_interval_margin_of_error.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "code", 18 | "metadata": { 19 | "id": "qE_pk1w3Luqb", 20 | "colab_type": "code", 21 | "colab": { 22 | "base_uri": "https://localhost:8080/", 23 | "height": 119 24 | }, 25 | "outputId": "c99b445c-c0a4-4963-879c-b303182fda1d" 26 | }, 27 | "source": [ 28 | "import numpy as np\n", 29 | "import scipy.stats as stats\n", 30 | "import math\n", 31 | "\n", 32 | "np.random.seed(10)\n", 33 | "population_ages1 = stats.poisson.rvs(loc=18, mu=35, size=150000)\n", 34 | "population_ages2 = stats.poisson.rvs(loc=18, mu=10, size=100000)\n", 35 | "population_ages = np.concatenate((population_ages1, population_ages2))\n", 36 | "\n", 37 | "sample_size = 1000\n", 38 | "sample = np.random.choice(a= population_ages, size = sample_size)\n", 39 | "sample_mean = sample.mean()\n", 40 | "z_critical = stats.norm.ppf(q = 0.975) # Get the z-critical value*\n", 41 | "print(\"z-critical value:\") # Check the z-critical value\n", 42 | "print(z_critical) \n", 43 | "\n", 44 | "pop_stdev = population_ages.std() # Get the population standard deviation\n", 45 | "margin_of_error = z_critical * (pop_stdev/math.sqrt(sample_size))\n", 46 | "\n", 47 | "print(\"Margin of Error:\")\n", 48 | "print(margin_of_error)\n", 49 | "\n", 50 | "confidence_interval = (sample_mean - margin_of_error,\n", 51 | " sample_mean + margin_of_error) \n", 52 | "\n", 53 | "print(\"Confidence interval:\")\n", 54 | "print(confidence_interval)" 55 | ], 56 | "execution_count": 3, 57 | "outputs": [ 58 | { 59 | "output_type": "stream", 60 | "text": [ 61 | "z-critical value:\n", 62 | "1.959963984540054\n", 63 | "Margin of Error:\n", 64 | "0.8199359311731705\n", 65 | "Confidence interval:\n", 66 | "(42.39706406882683, 44.03693593117317)\n" 67 | ], 68 | "name": "stdout" 69 | } 70 | ] 71 | } 72 | ] 73 | } -------------------------------------------------------------------------------- /Statistical_Testing/Non-Parametric_Tests/Chi-Square_Test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import scipy.stats as stats 4 | 5 | national = pd.DataFrame(["white"] * 100000 + ["hispanic"] * 60000 + \ 6 | ["black"] * 50000 + ["asian"] * 15000 + ["other"] * 35000) 7 | 8 | minnesota = pd.DataFrame(["white"] * 600 + ["hispanic"] * 300 + \ 9 | ["black"] * 250 + ["asian"] * 75 + ["other"] * 150) 10 | 11 | national_table = pd.crosstab(index=national[0], columns="count") 12 | minnesota_table = pd.crosstab(index=minnesota[0], columns="count") 13 | 14 | print("National") 15 | print(national_table) 16 | print(" ") 17 | print("Minnesota") 18 | print(minnesota_table) 19 | 20 | observed = minnesota_table 21 | 22 | national_ratios = national_table/len(national) # Get population ratios 23 | 24 | expected = national_ratios * len(minnesota) # Get expected counts 25 | 26 | chi_squared_stat = (((observed-expected)**2)/expected).sum() 27 | 28 | print(chi_squared_stat) 29 | 30 | 31 | crit = stats.chi2.ppf(q = 0.95, # Find the critical value for 95% confidence* 32 | df = 4) # Df = number of variable categories - 1 33 | 34 | print("Critical value") 35 | print(crit) 36 | 37 | p_value = 1 - stats.chi2.cdf(x=chi_squared_stat, # Find the p-value 38 | df=4) 39 | print("P value") 40 | print(p_value) 41 | 42 | stats.chisquare(f_obs= observed, # Array of observed counts 43 | f_exp= expected) # Array of expected counts 44 | 45 | 46 | -------------------------------------------------------------------------------- /Statistical_Testing/Parametric_Tests/Z_Test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Z-Test.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "code", 18 | "metadata": { 19 | "id": "AfRng3KPuSBQ", 20 | "colab_type": "code", 21 | "colab": { 22 | "base_uri": "https://localhost:8080/", 23 | "height": 51 24 | }, 25 | "outputId": "01b490f3-42b4-410f-ff53-bd9bf782e0d6" 26 | }, 27 | "source": [ 28 | "def twoSampZ(X1, X2, mudiff, sd1, sd2, n1, n2):\n", 29 | " from numpy import sqrt, abs, round\n", 30 | " from scipy.stats import norm\n", 31 | " pooledSE = sqrt(sd1**2/n1 + sd2**2/n2)\n", 32 | " z = ((X1 - X2) - mudiff)/pooledSE\n", 33 | " pval = 2*(1 - norm.cdf(abs(z)))\n", 34 | " return round(z, 3), round(pval, 4)\n", 35 | "\n", 36 | "\n", 37 | "\n", 38 | "z, p = twoSampZ(28, 33, 0, 14.1, 9.5, 75, 50)\n", 39 | "print(\"Z Score:\",z)\n", 40 | "print(\"P-Value:\",p)" 41 | ], 42 | "execution_count": 2, 43 | "outputs": [ 44 | { 45 | "output_type": "stream", 46 | "text": [ 47 | "Z Score: -2.369\n", 48 | "P-Value: 0.0179\n" 49 | ], 50 | "name": "stdout" 51 | } 52 | ] 53 | } 54 | ] 55 | } -------------------------------------------------------------------------------- /Statistical_Testing/Parametric_Tests/t_Test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "t-Test.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "code", 18 | "metadata": { 19 | "id": "Y21N_2yv3Grl", 20 | "colab_type": "code", 21 | "colab": {} 22 | }, 23 | "source": [ 24 | "## Import the packages\n", 25 | "import numpy as np\n", 26 | "from scipy import stats" 27 | ], 28 | "execution_count": 0, 29 | "outputs": [] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "metadata": { 34 | "id": "Aga_0SM43OdO", 35 | "colab_type": "code", 36 | "colab": { 37 | "base_uri": "https://localhost:8080/", 38 | "height": 85 39 | }, 40 | "outputId": "8d4b8f3d-a6df-4129-b3b6-867144288009" 41 | }, 42 | "source": [ 43 | "## Define 2 random distributions\n", 44 | "\n", 45 | "#Sample Size\n", 46 | "N = 10\n", 47 | "\n", 48 | "#Gaussian distributed data with mean = 2 and var = 1\n", 49 | "a = np.random.randn(N) + 2\n", 50 | "print(a)\n", 51 | "\n", 52 | "#Gaussian distributed data with with mean = 0 and var = 1\n", 53 | "b = np.random.randn(N)\n", 54 | "print(b)" 55 | ], 56 | "execution_count": 6, 57 | "outputs": [ 58 | { 59 | "output_type": "stream", 60 | "text": [ 61 | "[3.41987841 2.4642942 1.3074381 1.88900262 1.5018451 2.08785958\n", 62 | " 4.18763608 2.76111147 1.25673154 1.22916177]\n", 63 | "[ 0.09625918 -0.426427 -0.81593085 -0.27386856 -0.19758738 0.71729565\n", 64 | " -0.44211666 0.07106772 -0.53144206 -0.21403634]\n" 65 | ], 66 | "name": "stdout" 67 | } 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "metadata": { 73 | "id": "DGw_0SoQ2Uhj", 74 | "colab_type": "code", 75 | "colab": { 76 | "base_uri": "https://localhost:8080/", 77 | "height": 51 78 | }, 79 | "outputId": "b6caa6b7-64df-44e7-b626-13fb8165baeb" 80 | }, 81 | "source": [ 82 | "## Calculate the Standard Deviation\n", 83 | "\n", 84 | "#Calculate the variance to get the standard deviation\n", 85 | "\n", 86 | "#For unbiased max likelihood estimate we have to divide the var by N-1, and therefore the parameter ddof = 1\n", 87 | "var_a = a.var(ddof=1)\n", 88 | "var_b = b.var(ddof=1)\n", 89 | "\n", 90 | "#std deviation\n", 91 | "s = np.sqrt((var_a + var_b)/2)\n", 92 | "\n", 93 | "print(\"Std Deviation:\", s)\n", 94 | "\n", 95 | "## Calculate the t-statistics\n", 96 | "t = (a.mean() - b.mean())/(s*np.sqrt(2/N))\n", 97 | "\n", 98 | "print(\"T-value:\", t)" 99 | ], 100 | "execution_count": 8, 101 | "outputs": [ 102 | { 103 | "output_type": "stream", 104 | "text": [ 105 | "Std Deviation: 0.7693967525636721\n", 106 | "T-value: 7.0104093570005945\n" 107 | ], 108 | "name": "stdout" 109 | } 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "metadata": { 115 | "id": "9atPC3HO3Z2U", 116 | "colab_type": "code", 117 | "colab": { 118 | "base_uri": "https://localhost:8080/", 119 | "height": 51 120 | }, 121 | "outputId": "683ac7bd-8bd8-4e55-d3fc-7942748b66d1" 122 | }, 123 | "source": [ 124 | "## Compare with the critical t-value\n", 125 | "\n", 126 | "#Degrees of freedom\n", 127 | "df = 2*N - 2\n", 128 | "\n", 129 | "#p-value after comparison with the t\n", 130 | "p = 1 - stats.t.cdf(t,df=df)\n", 131 | "\n", 132 | "print(\"t-Score = \" + str(t))\n", 133 | "print(\"p-Value = \" + str(2*p))\n", 134 | "\n", 135 | "#Note that we multiply the p value by 2 because its a twp tail t-test\n", 136 | "\n", 137 | "### You can see that after comparing the t statistic with the critical t value (computed internally)\n", 138 | "# we get a good p value of 0.0005 and thus we reject the null hypothesis and thus it proves that the mean\n", 139 | "# of the two distributions are different and statistically significant." 140 | ], 141 | "execution_count": 9, 142 | "outputs": [ 143 | { 144 | "output_type": "stream", 145 | "text": [ 146 | "t-Score = 7.0104093570005945\n", 147 | "p-Value = 1.522899394812427e-06\n" 148 | ], 149 | "name": "stdout" 150 | } 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "metadata": { 156 | "id": "I_ve3N6a3Mlo", 157 | "colab_type": "code", 158 | "colab": { 159 | "base_uri": "https://localhost:8080/", 160 | "height": 51 161 | }, 162 | "outputId": "cc8bcc64-e1a2-4c05-98b9-db0cf91a0a01" 163 | }, 164 | "source": [ 165 | "## Cross Checking with the internal scipy function\n", 166 | "t2, p2 = stats.ttest_ind(a,b)\n", 167 | "print(\"t = \" + str(t2))\n", 168 | "print(\"p = \" + str(2*p2))" 169 | ], 170 | "execution_count": 10, 171 | "outputs": [ 172 | { 173 | "output_type": "stream", 174 | "text": [ 175 | "t = 7.010409357000594\n", 176 | "p = 3.045798789679482e-06\n" 177 | ], 178 | "name": "stdout" 179 | } 180 | ] 181 | } 182 | ] 183 | } --------------------------------------------------------------------------------