├── mooc data analysis ├── MLE.xlsx ├── MPG.xlsx ├── RBD.xlsx ├── lrm.xlsx ├── Tyler.xlsx ├── _RBD.xlsx ├── acad.xlsx ├── anova.xlsx ├── binpdf.gif ├── dummy.xlsx ├── rbd2.xlsx ├── reg2.xlsx ├── regr.xlsx ├── HARDNESS.xls ├── Reynolds.xlsx ├── Simmons.xls ├── TRUCKING.xlsx ├── Twoway.xlsx ├── dummy2.xlsx ├── icecream.xlsx ├── oneway.xlsx ├── regcar.xlsx ├── dataLRnew.xlsx ├── IBM-313 Marks.xlsx ├── cotton weight.xlsx ├── P_distribution.xlsx ├── icecream sale data.xlsx └── Tensile strength of paper.xlsx ├── README.md ├── Week2.ipynb ├── Week3.ipynb ├── Week4.ipynb ├── Week11.ipynb ├── Week8.ipynb ├── Week10.ipynb └── Week5.ipynb /mooc data analysis/MLE.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/MLE.xlsx -------------------------------------------------------------------------------- /mooc data analysis/MPG.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/MPG.xlsx -------------------------------------------------------------------------------- /mooc data analysis/RBD.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/RBD.xlsx -------------------------------------------------------------------------------- /mooc data analysis/lrm.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/lrm.xlsx -------------------------------------------------------------------------------- /mooc data analysis/Tyler.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/Tyler.xlsx -------------------------------------------------------------------------------- /mooc data analysis/_RBD.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/_RBD.xlsx -------------------------------------------------------------------------------- /mooc data analysis/acad.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/acad.xlsx -------------------------------------------------------------------------------- /mooc data analysis/anova.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/anova.xlsx -------------------------------------------------------------------------------- /mooc data analysis/binpdf.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/binpdf.gif -------------------------------------------------------------------------------- /mooc data analysis/dummy.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/dummy.xlsx -------------------------------------------------------------------------------- /mooc data analysis/rbd2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/rbd2.xlsx -------------------------------------------------------------------------------- /mooc data analysis/reg2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/reg2.xlsx -------------------------------------------------------------------------------- /mooc data analysis/regr.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/regr.xlsx -------------------------------------------------------------------------------- /mooc data analysis/HARDNESS.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/HARDNESS.xls -------------------------------------------------------------------------------- /mooc data analysis/Reynolds.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/Reynolds.xlsx -------------------------------------------------------------------------------- /mooc data analysis/Simmons.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/Simmons.xls -------------------------------------------------------------------------------- /mooc data analysis/TRUCKING.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/TRUCKING.xlsx -------------------------------------------------------------------------------- /mooc data analysis/Twoway.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/Twoway.xlsx -------------------------------------------------------------------------------- /mooc data analysis/dummy2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/dummy2.xlsx -------------------------------------------------------------------------------- /mooc data analysis/icecream.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/icecream.xlsx -------------------------------------------------------------------------------- /mooc data analysis/oneway.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/oneway.xlsx -------------------------------------------------------------------------------- /mooc data analysis/regcar.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/regcar.xlsx -------------------------------------------------------------------------------- /mooc data analysis/dataLRnew.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/dataLRnew.xlsx -------------------------------------------------------------------------------- /mooc data analysis/IBM-313 Marks.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/IBM-313 Marks.xlsx -------------------------------------------------------------------------------- /mooc data analysis/cotton weight.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/cotton weight.xlsx -------------------------------------------------------------------------------- /mooc data analysis/P_distribution.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/P_distribution.xlsx -------------------------------------------------------------------------------- /mooc data analysis/icecream sale data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/icecream sale data.xlsx -------------------------------------------------------------------------------- /mooc data analysis/Tensile strength of paper.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/Tensile strength of paper.xlsx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data-Analysis-with-Python 2 | 3 | ## CODES WRITTEN FOR NPTEL COURSE OF DATA ANALYSIS WITH PYTHON 4 | # COURSE TYPE 5 | Elective 6 | # COURSE LEVEL 7 | Undergraduate/Postgraduate 8 | # COURSE LAYOUT 9 | ## Week 1 : Introduction to data analytics and Python fundamentals 10 | ## Week 2 : Introduction to probability 11 | ## Week 3 : Sampling and sampling distributions 12 | ## Week 4 : Hypothesis testing 13 | ## Week 5 : Two sample testing and introduction to ANOVA 14 | ## Week 6 : Two way ANOVA and linear regression 15 | ## Week 7 : Linear regression and multiple regression 16 | ## Week 8 : Concepts of MLE and Logistic regression 17 | ## Week 9 : ROC and Regression Analysis Model Building 18 | ## Week 10 : c2 Test and introduction to cluster analysis 19 | ## Week 11 : Clustering analysis 20 | ## Week 12 : Classification and Regression Trees (CART) 21 | 22 | -------------------------------------------------------------------------------- /Week2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import scipy\n", 10 | "import numpy as np\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "from scipy.stats import binom" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stderr", 29 | "output_type": "stream", 30 | "text": [ 31 | "'[SegmentLocal]' is not recognized as an internal or external command,\n", 32 | "operable program or batch file.\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "![SegmentLocal](http://www.stat.yale.edu/Courses/1997-98/101/binpdf.gif)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 5, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "name": "stderr", 47 | "output_type": "stream", 48 | "text": [ 49 | "'[]' is not recognized as an internal or external command,\n", 50 | "operable program or batch file.\n" 51 | ] 52 | } 53 | ], 54 | "source": [ 55 | "![](C:/Users/Garima Singh/Desktop/mooc data analysis/binpdf.gif \"segment\")" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 6, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "name": "stderr", 65 | "output_type": "stream", 66 | "text": [ 67 | "'[ChessUrl]' is not recognized as an internal or external command,\n", 68 | "operable program or batch file.\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "![ChessUrl](https://upload.wikimedia.org/wikipedia/commons/7/71/ChessPawnSpecialMoves.gif \"chess\")" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 7, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "name": "stdout", 83 | "output_type": "stream", 84 | "text": [ 85 | "0.09077799859322791\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "print(binom.pmf(k=19,n=25,p=0.65))" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 11, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | "0.8850275957378545\n" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "print(binom.cdf(k=2,n=20,p=0.06))" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [] 116 | } 117 | ], 118 | "metadata": { 119 | "kernelspec": { 120 | "display_name": "Python 3", 121 | "language": "python", 122 | "name": "python3" 123 | }, 124 | "language_info": { 125 | "codemirror_mode": { 126 | "name": "ipython", 127 | "version": 3 128 | }, 129 | "file_extension": ".py", 130 | "mimetype": "text/x-python", 131 | "name": "python", 132 | "nbconvert_exporter": "python", 133 | "pygments_lexer": "ipython3", 134 | "version": "3.7.4" 135 | } 136 | }, 137 | "nbformat": 4, 138 | "nbformat_minor": 2 139 | } 140 | -------------------------------------------------------------------------------- /Week3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "from scipy.stats import binom" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 4, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stdout", 22 | "output_type": "stream", 23 | "text": [ 24 | "0.09077799859322791\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "print(binom.pmf(k=19,n=25,p=0.65))" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 5, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | "5.276857914295109e-07\n" 42 | ] 43 | } 44 | ], 45 | "source": [ 46 | "print(binom.cdf(k=2,n=20,p=0.65))" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 6, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "from scipy.stats import poisson" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 7, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "0.18044704431548356" 67 | ] 68 | }, 69 | "execution_count": 7, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "poisson.pmf(3,2)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 8, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "0.10081881344492458" 87 | ] 88 | }, 89 | "execution_count": 8, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "poisson.pmf(7,3,2)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 9, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "0.9831701582510425" 107 | ] 108 | }, 109 | "execution_count": 9, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "poisson.cdf(7,3.2)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 10, 121 | "metadata": {}, 122 | "outputs": [ 123 | { 124 | "data": { 125 | "text/plain": [ 126 | "array([27, 30, 33, 36, 39])" 127 | ] 128 | }, 129 | "execution_count": 10, 130 | "metadata": {}, 131 | "output_type": "execute_result" 132 | } 133 | ], 134 | "source": [ 135 | "np.arange(27,40,3)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 11, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "from scipy.stats import uniform" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 12, 150 | "metadata": {}, 151 | "outputs": [ 152 | { 153 | "data": { 154 | "text/plain": [ 155 | "33.0" 156 | ] 157 | }, 158 | "execution_count": 12, 159 | "metadata": {}, 160 | "output_type": "execute_result" 161 | } 162 | ], 163 | "source": [ 164 | "uniform.mean(loc=27,scale=12)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 15, 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "data": { 174 | "text/plain": [ 175 | "array([0.25 , 0.33333333, 0.41666667, 0.5 , 0.58333333,\n", 176 | " 0.66666667])" 177 | ] 178 | }, 179 | "execution_count": 15, 180 | "metadata": {}, 181 | "output_type": "execute_result" 182 | } 183 | ], 184 | "source": [ 185 | "uniform.cdf(np.arange(30,36,1),loc=27,scale=12)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 16, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "data": { 195 | "text/plain": [ 196 | "1.4433756729740643" 197 | ] 198 | }, 199 | "execution_count": 16, 200 | "metadata": {}, 201 | "output_type": "execute_result" 202 | } 203 | ], 204 | "source": [ 205 | "uniform.std(loc=200,scale=5)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 17, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "from scipy.stats import norm" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 19, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "data": { 224 | "text/plain": [ 225 | "0.8413447460685429" 226 | ] 227 | }, 228 | "execution_count": 19, 229 | "metadata": {}, 230 | "output_type": "execute_result" 231 | } 232 | ], 233 | "source": [ 234 | "val=68\n", 235 | "mean=65.5\n", 236 | "sd=2.5\n", 237 | "norm.cdf(val,mean,sd)" 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": {}, 243 | "source": [ 244 | "norm.cdf(68,m,sd)-norm.cdf(63,m,sd)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 21, 250 | "metadata": {}, 251 | "outputs": [ 252 | { 253 | "data": { 254 | "text/plain": [ 255 | "0.6826894921370859" 256 | ] 257 | }, 258 | "execution_count": 21, 259 | "metadata": {}, 260 | "output_type": "execute_result" 261 | } 262 | ], 263 | "source": [ 264 | "norm.cdf(68,mean,sd)-norm.cdf(63,mean,sd)" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": 22, 270 | "metadata": {}, 271 | "outputs": [ 272 | { 273 | "data": { 274 | "text/plain": [ 275 | "1.6448536269514722" 276 | ] 277 | }, 278 | "execution_count": 22, 279 | "metadata": {}, 280 | "output_type": "execute_result" 281 | } 282 | ], 283 | "source": [ 284 | "norm.ppf(0.95)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 23, 290 | "metadata": {}, 291 | "outputs": [ 292 | { 293 | "data": { 294 | "text/plain": [ 295 | "-1.6448536269514722" 296 | ] 297 | }, 298 | "execution_count": 23, 299 | "metadata": {}, 300 | "output_type": "execute_result" 301 | } 302 | ], 303 | "source": [ 304 | "norm.ppf(1-0.95)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 24, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "from scipy.stats import hypergeom" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 25, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "pval=hypergeom.sf(0,18,3,12)" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 26, 328 | "metadata": {}, 329 | "outputs": [ 330 | { 331 | "data": { 332 | "text/plain": [ 333 | "0.9754901960784306" 334 | ] 335 | }, 336 | "execution_count": 26, 337 | "metadata": {}, 338 | "output_type": "execute_result" 339 | } 340 | ], 341 | "source": [ 342 | "pval" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 27, 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "pval=hypergeom.cdf(1,18,5,11)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 28, 357 | "metadata": {}, 358 | "outputs": [ 359 | { 360 | "data": { 361 | "text/plain": [ 362 | "0.04738562091503275" 363 | ] 364 | }, 365 | "execution_count": 28, 366 | "metadata": {}, 367 | "output_type": "execute_result" 368 | } 369 | ], 370 | "source": [ 371 | "pval" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 31, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "from scipy.stats import expon" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": 32, 386 | "metadata": {}, 387 | "outputs": [ 388 | { 389 | "data": { 390 | "text/plain": [ 391 | "0.8466450331550716" 392 | ] 393 | }, 394 | "execution_count": 32, 395 | "metadata": {}, 396 | "output_type": "execute_result" 397 | } 398 | ], 399 | "source": [ 400 | "expon.cdf(0.75,0,0.4)" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": null, 406 | "metadata": {}, 407 | "outputs": [], 408 | "source": [] 409 | } 410 | ], 411 | "metadata": { 412 | "kernelspec": { 413 | "display_name": "Python 3", 414 | "language": "python", 415 | "name": "python3" 416 | }, 417 | "language_info": { 418 | "codemirror_mode": { 419 | "name": "ipython", 420 | "version": 3 421 | }, 422 | "file_extension": ".py", 423 | "mimetype": "text/x-python", 424 | "name": "python", 425 | "nbconvert_exporter": "python", 426 | "pygments_lexer": "ipython3", 427 | "version": "3.7.4" 428 | } 429 | }, 430 | "nbformat": 4, 431 | "nbformat_minor": 2 432 | } 433 | -------------------------------------------------------------------------------- /Week4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from scipy import stats" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 4, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "0.07214503696589378" 21 | ] 22 | }, 23 | "execution_count": 4, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | } 27 | ], 28 | "source": [ 29 | "stats.norm.cdf(-1.46)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 5, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "0.9750021048517795" 41 | ] 42 | }, 43 | "execution_count": 5, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "stats.norm.cdf(1.96)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 6, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "data": { 59 | "text/plain": [ 60 | "-1.2815515655446004" 61 | ] 62 | }, 63 | "execution_count": 6, 64 | "metadata": {}, 65 | "output_type": "execute_result" 66 | } 67 | ], 68 | "source": [ 69 | "stats.norm.ppf(0.1)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 7, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "data": { 79 | "text/plain": [ 80 | "0.011010658324411393" 81 | ] 82 | }, 83 | "execution_count": 7, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "1-stats.norm.cdf(2.29)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 8, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "text/plain": [ 100 | "0.040059156863817114" 101 | ] 102 | }, 103 | "execution_count": 8, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "1-stats.norm.cdf(1.75)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 9, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "data": { 119 | "text/plain": [ 120 | "-1.2815515655446004" 121 | ] 122 | }, 123 | "execution_count": 9, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "stats.norm.ppf(0.1)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 10, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "data": { 139 | "text/plain": [ 140 | "1.2815515655446004" 141 | ] 142 | }, 143 | "execution_count": 10, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "stats.norm.ppf(1-0.1)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 11, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "#t test\n", 159 | "import numpy as np" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 12, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "x=[10,12,20,21,22,24,18,15]" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 13, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "text/plain": [ 179 | "Ttest_1sampResult(statistic=1.5623450931857947, pvalue=0.1621787560592894)" 180 | ] 181 | }, 182 | "execution_count": 13, 183 | "metadata": {}, 184 | "output_type": "execute_result" 185 | } 186 | ], 187 | "source": [ 188 | "stats.ttest_1samp(x,15)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 15, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "data": { 198 | "text/plain": [ 199 | "0.90311273450826" 200 | ] 201 | }, 202 | "execution_count": 15, 203 | "metadata": {}, 204 | "output_type": "execute_result" 205 | } 206 | ], 207 | "source": [ 208 | "stats.t.cdf(1.56,4) #4 is dof" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 16, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "from statsmodels.stats.proportion import proportions_ztest" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 19, 223 | "metadata": {}, 224 | "outputs": [ 225 | { 226 | "data": { 227 | "text/plain": [ 228 | "(1.286806739751111, 0.1981616572238455)" 229 | ] 230 | }, 231 | "execution_count": 19, 232 | "metadata": {}, 233 | "output_type": "execute_result" 234 | } 235 | ], 236 | "source": [ 237 | "proportions_ztest(67,120,0.5) #o/p is z value and p value" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 20, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "def z_value(x,mu,sem):\n", 247 | " z=(x-mu)/sem\n", 248 | " if(z<0):\n", 249 | " alfa=stats.norm.cdf(z)\n", 250 | " else:\n", 251 | " alfa=1-stats.norm.cdf(z)\n", 252 | " print(alfa)" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 21, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "x=48.5\n", 262 | "mu=50\n", 263 | "sem=0.79" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 22, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "0.02879971774715278\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "z_value(x,mu,sem) #type 1 error or alfa/2" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 23, 286 | "metadata": {}, 287 | "outputs": [ 288 | { 289 | "data": { 290 | "text/plain": [ 291 | "0.26339575390741593" 292 | ] 293 | }, 294 | "execution_count": 23, 295 | "metadata": {}, 296 | "output_type": "execute_result" 297 | } 298 | ], 299 | "source": [ 300 | "#type 2 error or beta\n", 301 | "beta=stats.norm.cdf((51.5-52)/0.79)\n", 302 | "beta" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 25, 308 | "metadata": {}, 309 | "outputs": [ 310 | { 311 | "data": { 312 | "text/plain": [ 313 | "0.8972117321157791" 314 | ] 315 | }, 316 | "execution_count": 25, 317 | "metadata": {}, 318 | "output_type": "execute_result" 319 | } 320 | ], 321 | "source": [ 322 | "beta=stats.norm.cdf((51.5-50.5)/0.79)\n", 323 | "beta\n", 324 | "# myu is 50.5" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 26, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "b=[89.19,90,95,90.46,93.21,97.19,97.04,91.07,92.75]" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 27, 339 | "metadata": {}, 340 | "outputs": [], 341 | "source": [ 342 | "a=[91.5,94.18,92.18,95.39,91.79,89.07,94.72,89.21]" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 28, 348 | "metadata": {}, 349 | "outputs": [ 350 | { 351 | "data": { 352 | "text/plain": [ 353 | "Ttest_indResult(statistic=-0.4712203461464123, pvalue=0.6442636980321892)" 354 | ] 355 | }, 356 | "execution_count": 28, 357 | "metadata": {}, 358 | "output_type": "execute_result" 359 | } 360 | ], 361 | "source": [ 362 | "stats.ttest_ind(a,b, equal_var=True)" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 29, 368 | "metadata": {}, 369 | "outputs": [ 370 | { 371 | "data": { 372 | "text/plain": [ 373 | "-2.1447866879169277" 374 | ] 375 | }, 376 | "execution_count": 29, 377 | "metadata": {}, 378 | "output_type": "execute_result" 379 | } 380 | ], 381 | "source": [ 382 | "stats.t.ppf(0.025,14) #critical t value" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 30, 388 | "metadata": {}, 389 | "outputs": [], 390 | "source": [ 391 | "#sigma 1 and sigma 2 known\n", 392 | "import pandas as pd\n", 393 | "import numpy as np\n", 394 | "import math\n", 395 | "from scipy import stats" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": {}, 402 | "outputs": [], 403 | "source": [ 404 | "def zandp(x1,x2,sigma1,sigma2,n1,n2):\n", 405 | " z=(x1-x2)/(math.sqrt(((sigma**2)/n1)+((sigma2**2))))" 406 | ] 407 | } 408 | ], 409 | "metadata": { 410 | "kernelspec": { 411 | "display_name": "Python 3", 412 | "language": "python", 413 | "name": "python3" 414 | }, 415 | "language_info": { 416 | "codemirror_mode": { 417 | "name": "ipython", 418 | "version": 3 419 | }, 420 | "file_extension": ".py", 421 | "mimetype": "text/x-python", 422 | "name": "python", 423 | "nbconvert_exporter": "python", 424 | "pygments_lexer": "ipython3", 425 | "version": "3.7.4" 426 | } 427 | }, 428 | "nbformat": 4, 429 | "nbformat_minor": 2 430 | } 431 | -------------------------------------------------------------------------------- /Week11.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import scipy\n", 10 | "from scipy.spatial import distance" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 5, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "\n", 20 | "import numpy as np\n" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 6, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "5.196152422706632" 32 | ] 33 | }, 34 | "execution_count": 6, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": [ 40 | "\n", 41 | "a=[1,2,3]\n", 42 | "b=[4,5,6]\n", 43 | "dist=distance.euclidean(a,b)\n", 44 | "dist" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 7, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/plain": [ 55 | "2.0" 56 | ] 57 | }, 58 | "execution_count": 7, 59 | "metadata": {}, 60 | "output_type": "execute_result" 61 | } 62 | ], 63 | "source": [ 64 | "distance.minkowski([1,0,0],[0,1,0],1) #manhattan" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 8, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "data": { 74 | "text/plain": [ 75 | "1.4142135623730951" 76 | ] 77 | }, 78 | "execution_count": 8, 79 | "metadata": {}, 80 | "output_type": "execute_result" 81 | } 82 | ], 83 | "source": [ 84 | "distance.minkowski([1,0,0],[0,1,0],2) #euclidean" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 9, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/plain": [ 95 | "1.2599210498948732" 96 | ] 97 | }, 98 | "execution_count": 9, 99 | "metadata": {}, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "distance.minkowski([1,0,0],[0,1,0],3) #minkowski" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 10, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "import pandas as pd\n", 114 | "from scipy.spatial import distance_matrix" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 11, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "text/html": [ 125 | "
\n", 126 | "\n", 139 | "\n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | "
ab
014
125
236
\n", 165 | "
" 166 | ], 167 | "text/plain": [ 168 | " a b\n", 169 | "0 1 4\n", 170 | "1 2 5\n", 171 | "2 3 6" 172 | ] 173 | }, 174 | "execution_count": 11, 175 | "metadata": {}, 176 | "output_type": "execute_result" 177 | } 178 | ], 179 | "source": [ 180 | "data=[[1,4],[2,5],[3,6]]\n", 181 | "df=pd.DataFrame(data,columns=['a','b'])\n", 182 | "df" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 12, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "data": { 192 | "text/html": [ 193 | "
\n", 194 | "\n", 207 | "\n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | "
012
00.0000001.4142142.828427
11.4142140.0000001.414214
22.8284271.4142140.000000
\n", 237 | "
" 238 | ], 239 | "text/plain": [ 240 | " 0 1 2\n", 241 | "0 0.000000 1.414214 2.828427\n", 242 | "1 1.414214 0.000000 1.414214\n", 243 | "2 2.828427 1.414214 0.000000" 244 | ] 245 | }, 246 | "execution_count": 12, 247 | "metadata": {}, 248 | "output_type": "execute_result" 249 | } 250 | ], 251 | "source": [ 252 | "pd.DataFrame(distance_matrix(df.values,df.values))" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 13, 258 | "metadata": {}, 259 | "outputs": [ 260 | { 261 | "data": { 262 | "text/html": [ 263 | "
\n", 264 | "\n", 277 | "\n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | "
wtht
a14
b25
c36
\n", 303 | "
" 304 | ], 305 | "text/plain": [ 306 | " wt ht\n", 307 | "a 1 4\n", 308 | "b 2 5\n", 309 | "c 3 6" 310 | ] 311 | }, 312 | "execution_count": 13, 313 | "metadata": {}, 314 | "output_type": "execute_result" 315 | } 316 | ], 317 | "source": [ 318 | "data=[[1,4],[2,5],[3,6]]\n", 319 | "ctys=['a','b','c']\n", 320 | "df=pd.DataFrame(data,columns=['wt','ht'],index=ctys)\n", 321 | "df" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 14, 327 | "metadata": {}, 328 | "outputs": [], 329 | "source": [ 330 | "dist_mat=pd.DataFrame(distance_matrix(df.values,df.values),index=df.index,columns=df.index)" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 15, 336 | "metadata": {}, 337 | "outputs": [ 338 | { 339 | "data": { 340 | "text/html": [ 341 | "
\n", 342 | "\n", 355 | "\n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | "
abc
a0.0000001.4142142.828427
b1.4142140.0000001.414214
c2.8284271.4142140.000000
\n", 385 | "
" 386 | ], 387 | "text/plain": [ 388 | " a b c\n", 389 | "a 0.000000 1.414214 2.828427\n", 390 | "b 1.414214 0.000000 1.414214\n", 391 | "c 2.828427 1.414214 0.000000" 392 | ] 393 | }, 394 | "execution_count": 15, 395 | "metadata": {}, 396 | "output_type": "execute_result" 397 | } 398 | ], 399 | "source": [ 400 | "dist_mat" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": 16, 406 | "metadata": {}, 407 | "outputs": [ 408 | { 409 | "data": { 410 | "text/html": [ 411 | "
\n", 412 | "\n", 425 | "\n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | "
abc
a0.001.412.83
b1.410.001.41
c2.831.410.00
\n", 455 | "
" 456 | ], 457 | "text/plain": [ 458 | " a b c\n", 459 | "a 0.00 1.41 2.83\n", 460 | "b 1.41 0.00 1.41\n", 461 | "c 2.83 1.41 0.00" 462 | ] 463 | }, 464 | "execution_count": 16, 465 | "metadata": {}, 466 | "output_type": "execute_result" 467 | } 468 | ], 469 | "source": [ 470 | "dist_mat.round(decimals=2,out=None)" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": 17, 476 | "metadata": {}, 477 | "outputs": [ 478 | { 479 | "data": { 480 | "text/plain": [ 481 | "KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,\n", 482 | " n_clusters=2, n_init=10, n_jobs=None, precompute_distances='auto',\n", 483 | " random_state=None, tol=0.0001, verbose=0)" 484 | ] 485 | }, 486 | "execution_count": 17, 487 | "metadata": {}, 488 | "output_type": "execute_result" 489 | } 490 | ], 491 | "source": [ 492 | "#k means clustering\n", 493 | "#plot scatter plot\n", 494 | "from sklearn.cluster import KMeans\n", 495 | "kmeans=KMeans(n_clusters=2) #no of clusters 2\n", 496 | "kmeans.fit(data)" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": 18, 502 | "metadata": {}, 503 | "outputs": [ 504 | { 505 | "data": { 506 | "text/plain": [ 507 | "array([[3. , 6. ],\n", 508 | " [1.5, 4.5]])" 509 | ] 510 | }, 511 | "execution_count": 18, 512 | "metadata": {}, 513 | "output_type": "execute_result" 514 | } 515 | ], 516 | "source": [ 517 | "labels=kmeans.predict(data)\n", 518 | "centroids=kmeans.cluster_centers_\n", 519 | "centroids" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": 19, 525 | "metadata": {}, 526 | "outputs": [ 527 | { 528 | "ename": "NameError", 529 | "evalue": "name 'centroid' is not defined", 530 | "output_type": "error", 531 | "traceback": [ 532 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 533 | "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", 534 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[0mfig\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0max\u001b[0m\u001b[1;33m=\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msubplots\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0max\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mscatter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcolor\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcolors1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0malpha\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.5\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0medgecolor\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'k'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[0midx\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcentroid\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcentroid\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 10\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mscatter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mcentroid\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolor\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcolmap\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0midx\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mtxt\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mn\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 535 | "\u001b[1;31mNameError\u001b[0m: name 'centroid' is not defined" 536 | ] 537 | } 538 | ], 539 | "source": [ 540 | "#diagram of clusters\n", 541 | "import matplotlib.pyplot as plt\n", 542 | "fig=plt.figure(figsize= (5,5))\n", 543 | "colmap={1:'r',2:'b'}\n", 544 | "colors=map(lambda x: colmap[x+1], labels)\n", 545 | "colors1=list(colors)\n", 546 | "fig,ax= plt.subplots()\n", 547 | "ax.scatter(a,b,color=colors1,alpha=0.5,edgecolor='k')\n", 548 | "for idx,centroid in enumerate(centroid):\n", 549 | " plt.scatter(*centroid, color=colmap[idx+1])\n", 550 | "for i,txt in enumerate(n):\n", 551 | " ax.annotate(txt,(x[i],y[i]))\n", 552 | "plt.grid()\n", 553 | "plt.xlim(0,5)\n", 554 | "plt.ylim(0,5)\n", 555 | "plt.show()" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": null, 561 | "metadata": {}, 562 | "outputs": [], 563 | "source": [] 564 | } 565 | ], 566 | "metadata": { 567 | "kernelspec": { 568 | "display_name": "Python 3", 569 | "language": "python", 570 | "name": "python3" 571 | }, 572 | "language_info": { 573 | "codemirror_mode": { 574 | "name": "ipython", 575 | "version": 3 576 | }, 577 | "file_extension": ".py", 578 | "mimetype": "text/x-python", 579 | "name": "python", 580 | "nbconvert_exporter": "python", 581 | "pygments_lexer": "ipython3", 582 | "version": "3.7.4" 583 | } 584 | }, 585 | "nbformat": 4, 586 | "nbformat_minor": 2 587 | } 588 | -------------------------------------------------------------------------------- /Week8.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 14, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import matplotlib as mp\n", 19 | "import numpy as np\n", 20 | "from scipy.optimize import minimize\n", 21 | "import scipy.stats as stats\n", 22 | "from scipy import stats\n", 23 | "import statsmodels.api as sm\n", 24 | "from statsmodels.formula.api import ols\n", 25 | "from matplotlib import pyplot as plt\n", 26 | "import pandas as pd\n", 27 | "import numpy as np\n", 28 | "import math\n", 29 | "import scipy\n", 30 | "from scipy import stats\n", 31 | "import pandas as pd\n", 32 | "import numpy as np\n", 33 | "import math\n", 34 | "import scipy\n", 35 | "from scipy import stats\n", 36 | "from scipy import stats\n", 37 | "import statsmodels.api as sm\n", 38 | "\n", 39 | "import statsmodels.formula.api \n", 40 | "import statsmodels.formula.api as smf\n", 41 | "from statsmodels.formula.api import ols\n", 42 | "from matplotlib import pyplot as plt" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 15, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "text/html": [ 53 | "
\n", 54 | "\n", 67 | "\n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | "
IdYX
0121
1264
2375
3496
45159
\n", 109 | "
" 110 | ], 111 | "text/plain": [ 112 | " Id Y X\n", 113 | "0 1 2 1\n", 114 | "1 2 6 4\n", 115 | "2 3 7 5\n", 116 | "3 4 9 6\n", 117 | "4 5 15 9" 118 | ] 119 | }, 120 | "execution_count": 15, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "tb1=pd.read_excel('C:/Users/Garima Singh/Desktop/mooc data analysis/MLE.xlsx')\n", 127 | "tb1" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 17, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "name": "stdout", 137 | "output_type": "stream", 138 | "text": [ 139 | " OLS Regression Results \n", 140 | "==============================================================================\n", 141 | "Dep. Variable: Y R-squared: 0.980\n", 142 | "Model: OLS Adj. R-squared: 0.973\n", 143 | "Method: Least Squares F-statistic: 145.9\n", 144 | "Date: Sun, 29 Mar 2020 Prob (F-statistic): 0.00122\n", 145 | "Time: 16:15:58 Log-Likelihood: -4.5811\n", 146 | "No. Observations: 5 AIC: 13.16\n", 147 | "Df Residuals: 3 BIC: 12.38\n", 148 | "Df Model: 1 \n", 149 | "Covariance Type: nonrobust \n", 150 | "==============================================================================\n", 151 | " coef std err t P>|t| [0.025 0.975]\n", 152 | "------------------------------------------------------------------------------\n", 153 | "const -0.2882 0.755 -0.382 0.728 -2.692 2.115\n", 154 | "X 1.6176 0.134 12.079 0.001 1.191 2.044\n", 155 | "==============================================================================\n", 156 | "Omnibus: nan Durbin-Watson: 1.405\n", 157 | "Prob(Omnibus): nan Jarque-Bera (JB): 0.551\n", 158 | "Skew: 0.089 Prob(JB): 0.759\n", 159 | "Kurtosis: 1.384 Cond. No. 12.5\n", 160 | "==============================================================================\n", 161 | "\n", 162 | "Warnings:\n", 163 | "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" 164 | ] 165 | }, 166 | { 167 | "name": "stderr", 168 | "output_type": "stream", 169 | "text": [ 170 | "C:\\Users\\Garima Singh\\Anaconda3\\lib\\site-packages\\statsmodels\\stats\\stattools.py:71: ValueWarning: omni_normtest is not valid with less than 8 observations; 5 samples were given.\n", 171 | " \"samples were given.\" % int(n), ValueWarning)\n" 172 | ] 173 | } 174 | ], 175 | "source": [ 176 | "#lse to get reg eqn\n", 177 | "import statsmodels.api as sm\n", 178 | "\n", 179 | "from statsmodels.formula.api import ols\n", 180 | "x=tb1['X']\n", 181 | "y=tb1['Y']\n", 182 | "x2=sm.add_constant(x)\n", 183 | "mod1=sm.OLS(y,x2)\n", 184 | "mod12=mod1.fit()\n", 185 | "print(mod12.summary())" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 18, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "#mle to get reg eqn\n", 195 | "import matplotlib.pyplot as plt\n", 196 | "def like(parameters):\n", 197 | " m=parameters[0] #slope\n", 198 | " b=parameters[1] # y-intercept\n", 199 | " sigma=parameters[2] #sd of error term\n", 200 | " for i in np.arange(0,len(x)):\n", 201 | " y_exp=m*x+b\n", 202 | " L=(len(x)/2*np.log(2*np.pi)+len(x)/2*np.log(sigma**2)+1/(2*sigma**2)*sum((y-y_exp)**2))\n", 203 | " return L\n", 204 | " " 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 21, 210 | "metadata": {}, 211 | "outputs": [ 212 | { 213 | "data": { 214 | "text/plain": [ 215 | " fun: 4.581084072762135\n", 216 | " hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>\n", 217 | " jac: array([1.24344979e-06, 2.84217094e-06, 1.33226763e-06])\n", 218 | " message: b'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'\n", 219 | " nfev: 108\n", 220 | " nit: 17\n", 221 | " status: 0\n", 222 | " success: True\n", 223 | " x: array([ 1.61764689, -0.28823426, 0.60488214])" 224 | ] 225 | }, 226 | "execution_count": 21, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "x=np.array([1,4,5,6,9])\n", 233 | "y=np.array([2,6,7,9,15])\n", 234 | "lik_model=minimize(like,np.array([2,2,2]),method=\"L-BFGS-B\")\n", 235 | "lik_model" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 24, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "ename": "NameError", 245 | "evalue": "name 'lik_mode' is not defined", 246 | "output_type": "error", 247 | "traceback": [ 248 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 249 | "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", 250 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mlik_mode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 251 | "\u001b[1;31mNameError\u001b[0m: name 'lik_mode' is not defined" 252 | ] 253 | } 254 | ], 255 | "source": [ 256 | "lik_mode.x" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 25, 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "data": { 266 | "text/plain": [ 267 | " final_simplex: (array([[ 1.61765326, -0.28825593, 0.60488098],\n", 268 | " [ 1.61765283, -0.28828724, 0.60488296],\n", 269 | " [ 1.61764444, -0.2882312 , 0.60486363],\n", 270 | " [ 1.61763731, -0.28820599, 0.60489994]]), array([4.58108408, 4.58108408, 4.58108408, 4.58108408]))\n", 271 | " fun: 4.581084075293504\n", 272 | " message: 'Optimization terminated successfully.'\n", 273 | " nfev: 182\n", 274 | " nit: 100\n", 275 | " status: 0\n", 276 | " success: True\n", 277 | " x: array([ 1.61765326, -0.28825593, 0.60488098])" 278 | ] 279 | }, 280 | "execution_count": 25, 281 | "metadata": {}, 282 | "output_type": "execute_result" 283 | } 284 | ], 285 | "source": [ 286 | "\n", 287 | "lik_model=minimize(like,np.array([2,2,2]),method=\"Nelder-Mead\")\n", 288 | "lik_model" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 29, 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "data": { 298 | "text/html": [ 299 | "
\n", 300 | "\n", 313 | "\n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | "
CustomerSpendingCardCoupon
012.29110
123.21510
232.13510
343.92400
452.52810
...............
95963.31800
96972.42110
97986.07300
98992.63010
991003.41101
\n", 403 | "

100 rows × 4 columns

\n", 404 | "
" 405 | ], 406 | "text/plain": [ 407 | " Customer Spending Card Coupon\n", 408 | "0 1 2.291 1 0\n", 409 | "1 2 3.215 1 0\n", 410 | "2 3 2.135 1 0\n", 411 | "3 4 3.924 0 0\n", 412 | "4 5 2.528 1 0\n", 413 | ".. ... ... ... ...\n", 414 | "95 96 3.318 0 0\n", 415 | "96 97 2.421 1 0\n", 416 | "97 98 6.073 0 0\n", 417 | "98 99 2.630 1 0\n", 418 | "99 100 3.411 0 1\n", 419 | "\n", 420 | "[100 rows x 4 columns]" 421 | ] 422 | }, 423 | "execution_count": 29, 424 | "metadata": {}, 425 | "output_type": "execute_result" 426 | } 427 | ], 428 | "source": [ 429 | "tb1=pd.read_excel('C:/Users/Garima Singh/Desktop/mooc data analysis/Simmons.xls')\n", 430 | "tb1" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": 31, 436 | "metadata": {}, 437 | "outputs": [ 438 | { 439 | "ename": "ValueError", 440 | "evalue": "Unrecognized marker style 't'", 441 | "output_type": "error", 442 | "traceback": [ 443 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 444 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", 445 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\markers.py\u001b[0m in \u001b[0;36mset_marker\u001b[1;34m(self, marker)\u001b[0m\n\u001b[0;32m 308\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 309\u001b[1;33m \u001b[0mPath\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmarker\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 310\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_marker_function\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_set_vertices\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 446 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\path.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, vertices, codes, _interpolation_steps, closed, readonly)\u001b[0m\n\u001b[0;32m 126\u001b[0m \"\"\"\n\u001b[1;32m--> 127\u001b[1;33m \u001b[0mvertices\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_to_unmasked_float_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvertices\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 128\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mvertices\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;36m2\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mvertices\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 447 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\cbook\\__init__.py\u001b[0m in \u001b[0;36m_to_unmasked_float_array\u001b[1;34m(x)\u001b[0m\n\u001b[0;32m 1389\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1390\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfloat\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1391\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 448 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\numpy\\core\\numeric.py\u001b[0m in \u001b[0;36masarray\u001b[1;34m(a, dtype, order)\u001b[0m\n\u001b[0;32m 537\u001b[0m \"\"\"\n\u001b[1;32m--> 538\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0morder\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 539\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 449 | "\u001b[1;31mValueError\u001b[0m: could not convert string to float: 't'", 450 | "\nDuring handling of the above exception, another exception occurred:\n", 451 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", 452 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mlinear_model\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmetrics\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mmean_squared_error\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mplt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mscatter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtb1\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSpending\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mtb1\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mCoupon\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mmarker\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m't'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcolor\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'red'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 453 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py\u001b[0m in \u001b[0;36mscatter\u001b[1;34m(x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, plotnonfinite, data, **kwargs)\u001b[0m\n\u001b[0;32m 2845\u001b[0m \u001b[0mverts\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mverts\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0medgecolors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0medgecolors\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2846\u001b[0m plotnonfinite=plotnonfinite, **({\"data\": data} if data is not\n\u001b[1;32m-> 2847\u001b[1;33m None else {}), **kwargs)\n\u001b[0m\u001b[0;32m 2848\u001b[0m \u001b[0msci\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m__ret\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2849\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0m__ret\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 454 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\__init__.py\u001b[0m in \u001b[0;36minner\u001b[1;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1599\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0minner\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0max\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1600\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdata\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1601\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0max\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0mmap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msanitize_sequence\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1602\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1603\u001b[0m \u001b[0mbound\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnew_sig\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0max\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 455 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\axes\\_axes.py\u001b[0m in \u001b[0;36mscatter\u001b[1;34m(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, plotnonfinite, **kwargs)\u001b[0m\n\u001b[0;32m 4479\u001b[0m \u001b[0mmarker_obj\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmarker\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4480\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4481\u001b[1;33m \u001b[0mmarker_obj\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmmarkers\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mMarkerStyle\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmarker\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4482\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4483\u001b[0m path = marker_obj.get_path().transformed(\n", 456 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\markers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, marker, fillstyle)\u001b[0m\n\u001b[0;32m 241\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_marker_function\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 242\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_fillstyle\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfillstyle\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 243\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_marker\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmarker\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 244\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 245\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_recache\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 457 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\markers.py\u001b[0m in \u001b[0;36mset_marker\u001b[1;34m(self, marker)\u001b[0m\n\u001b[0;32m 311\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 312\u001b[0m raise ValueError('Unrecognized marker style {!r}'\n\u001b[1;32m--> 313\u001b[1;33m .format(marker))\n\u001b[0m\u001b[0;32m 314\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 315\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_marker\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmarker\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 458 | "\u001b[1;31mValueError\u001b[0m: Unrecognized marker style 't'" 459 | ] 460 | }, 461 | { 462 | "data": { 463 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD8CAYAAAB0IB+mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAANgElEQVR4nO3ccYjfd33H8efLxE6mtY7lBEmi7Vi6Gsqg7ug6hFnRjbR/JP8USaC4SmnArQ5mETocKvWvKUMQsmm2iVPQWv1DD4nkD1fpECO50lmalMAtOnNE6Fm7/lO0Znvvj99P77hcct/e/e4u3vv5gMDv+/t9fr9758PdM798f/f7paqQJG1/r9rqASRJm8PgS1ITBl+SmjD4ktSEwZekJgy+JDWxavCTfC7Jc0meucLtSfLpJHNJnk7ytsmPKUlaryHP8D8PHLjK7XcB+8Z/jgL/tP6xJEmTtmrwq+oJ4GdXWXII+EKNnALekORNkxpQkjQZOyfwGLuBC0uO58fX/WT5wiRHGf0vgNe+9rV/dMstt0zgy0tSH08++eRPq2pqLfedRPCzwnUrfl5DVR0HjgNMT0/X7OzsBL68JPWR5L/Xet9J/JbOPLB3yfEe4OIEHleSNEGTCP4M8N7xb+vcAbxYVZedzpEkba1VT+kk+TJwJ7AryTzwUeDVAFX1GeAEcDcwB7wEvG+jhpUkrd2qwa+qI6vcXsBfTWwiSdKG8J22ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNTEo+EkOJDmXZC7Jwyvc/uYkjyd5KsnTSe6e/KiSpPVYNfhJdgDHgLuA/cCRJPuXLfs74LGqug04DPzjpAeVJK3PkGf4twNzVXW+ql4GHgUOLVtTwOvHl28ALk5uREnSJAwJ/m7gwpLj+fF1S30MuDfJPHAC+MBKD5TkaJLZJLMLCwtrGFeStFZDgp8Vrqtlx0eAz1fVHuBu4ItJLnvsqjpeVdNVNT01NfXKp5UkrdmQ4M8De5cc7+HyUzb3A48BVNX3gNcAuyYxoCRpMoYE/zSwL8lNSa5j9KLszLI1PwbeBZDkrYyC7zkbSbqGrBr8qroEPAicBJ5l9Ns4Z5I8kuTgeNlDwANJfgB8Gbivqpaf9pEkbaGdQxZV1QlGL8Yuve4jSy6fBd4+2dEkSZPkO20lqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0MCn6SA0nOJZlL8vAV1rwnydkkZ5J8abJjSpLWa+dqC5LsAI4BfwbMA6eTzFTV2SVr9gF/C7y9ql5I8saNGliStDZDnuHfDsxV1fmqehl4FDi0bM0DwLGqegGgqp6b7JiSpPUaEvzdwIUlx/Pj65a6Gbg5yXeTnEpyYKUHSnI0yWyS2YWFhbVNLElakyHBzwrX1bLjncA+4E7gCPAvSd5w2Z2qjlfVdFVNT01NvdJZJUnrMCT488DeJcd7gIsrrPlGVf2yqn4InGP0D4Ak6RoxJPingX1JbkpyHXAYmFm25uvAOwGS7GJ0iuf8JAeVJK3PqsGvqkvAg8BJ4Fngsao6k+SRJAfHy04Czyc5CzwOfKiqnt+ooSVJr1yqlp+O3xzT09M1Ozu7JV9bkn5TJXmyqqbXcl/faStJTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITg4Kf5ECSc0nmkjx8lXX3JKkk05MbUZI0CasGP8kO4BhwF7AfOJJk/wrrrgf+Gvj+pIeUJK3fkGf4twNzVXW+ql4GHgUOrbDu48AngJ9PcD5J0oQMCf5u4MKS4/nxdb+W5DZgb1V982oPlORoktkkswsLC694WEnS2g0Jfla4rn59Y/Iq4FPAQ6s9UFUdr6rpqpqempoaPqUkad2GBH8e2LvkeA9wccnx9cCtwHeS/Ai4A5jxhVtJurYMCf5pYF+Sm5JcBxwGZn51Y1W9WFW7qurGqroROAUcrKrZDZlYkrQmqwa/qi4BDwIngWeBx6rqTJJHkhzc6AElSZOxc8iiqjoBnFh23UeusPbO9Y8lSZo032krSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWpiUPCTHEhyLslckodXuP2DSc4meTrJt5O8ZfKjSpLWY9XgJ9kBHAPuAvYDR5LsX7bsKWC6qv4Q+BrwiUkPKklanyHP8G8H5qrqfFW9DDwKHFq6oKoer6qXxoengD2THVOStF5Dgr8buLDkeH583ZXcD3xrpRuSHE0ym2R2YWFh+JSSpHUbEvyscF2tuDC5F5gGPrnS7VV1vKqmq2p6ampq+JSSpHXbOWDNPLB3yfEe4OLyRUneDXwYeEdV/WIy40mSJmXIM/zTwL4kNyW5DjgMzCxdkOQ24LPAwap6bvJjSpLWa9XgV9Ul4EHgJPAs8FhVnUnySJKD42WfBF4HfDXJfyaZucLDSZK2yJBTOlTVCeDEsus+suTyuyc8lyRpwnynrSQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0MCn6SA0nOJZlL8vAKt/9Wkq+Mb/9+khsnPagkaX1WDX6SHcAx4C5gP3Akyf5ly+4HXqiq3wc+Bfz9pAeVJK3PkGf4twNzVXW+ql4GHgUOLVtzCPi38eWvAe9KksmNKUlar50D1uwGLiw5ngf++EprqupSkheB3wV+unRRkqPA0fHhL5I8s5aht6FdLNurxtyLRe7FIvdi0R+s9Y5Dgr/SM/Vawxqq6jhwHCDJbFVND/j62557sci9WOReLHIvFiWZXet9h5zSmQf2LjneA1y80pokO4EbgJ+tdShJ0uQNCf5pYF+Sm5JcBxwGZpatmQH+Ynz5HuDfq+qyZ/iSpK2z6imd8Tn5B4GTwA7gc1V1JskjwGxVzQD/CnwxyRyjZ/aHB3zt4+uYe7txLxa5F4vci0XuxaI170V8Ii5JPfhOW0lqwuBLUhMbHnw/lmHRgL34YJKzSZ5O8u0kb9mKOTfDanuxZN09SSrJtv2VvCF7keQ94++NM0m+tNkzbpYBPyNvTvJ4kqfGPyd3b8WcGy3J55I8d6X3KmXk0+N9ejrJ2wY9cFVt2B9GL/L+F/B7wHXAD4D9y9b8JfCZ8eXDwFc2cqat+jNwL94J/Pb48vs778V43fXAE8ApYHqr597C74t9wFPA74yP37jVc2/hXhwH3j++vB/40VbPvUF78afA24BnrnD73cC3GL0H6g7g+0Med6Of4fuxDItW3YuqeryqXhofnmL0noftaMj3BcDHgU8AP9/M4TbZkL14ADhWVS8AVNVzmzzjZhmyFwW8fnz5Bi5/T9C2UFVPcPX3Mh0CvlAjp4A3JHnTao+70cFf6WMZdl9pTVVdAn71sQzbzZC9WOp+Rv+Cb0er7kWS24C9VfXNzRxsCwz5vrgZuDnJd5OcSnJg06bbXEP24mPAvUnmgRPABzZntGvOK+0JMOyjFdZjYh/LsA0M/nsmuReYBt6xoRNtnavuRZJXMfrU1fs2a6AtNOT7Yiej0zp3Mvpf338kubWq/meDZ9tsQ/biCPD5qvqHJH/C6P0/t1bV/238eNeUNXVzo5/h+7EMi4bsBUneDXwYOFhVv9ik2TbbantxPXAr8J0kP2J0jnJmm75wO/Rn5BtV9cuq+iFwjtE/ANvNkL24H3gMoKq+B7yG0QerdTOoJ8ttdPD9WIZFq+7F+DTGZxnFfruep4VV9qKqXqyqXVV1Y1XdyOj1jINVteYPjbqGDfkZ+TqjF/RJsovRKZ7zmzrl5hiyFz8G3gWQ5K2Mgr+wqVNeG2aA945/W+cO4MWq+slqd9rQUzq1cR/L8Btn4F58Engd8NXx69Y/rqqDWzb0Bhm4Fy0M3IuTwJ8nOQv8L/Chqnp+66beGAP34iHgn5P8DaNTGPdtxyeISb7M6BTervHrFR8FXg1QVZ9h9PrF3cAc8BLwvkGPuw33SpK0At9pK0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDXx/4aZaro1YsjCAAAAAElFTkSuQmCC\n", 464 | "text/plain": [ 465 | "
" 466 | ] 467 | }, 468 | "metadata": { 469 | "needs_background": "light" 470 | }, 471 | "output_type": "display_data" 472 | } 473 | ], 474 | "source": [ 475 | "#graph between continuous and categorical dep var\n", 476 | "from sklearn import linear_model\n", 477 | "from sklearn.metrics import mean_squared_error\n", 478 | "plt.scatter(tb1.Spending,tb1.Coupon,marker='t',color='red')" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 34, 484 | "metadata": {}, 485 | "outputs": [ 486 | { 487 | "name": "stdout", 488 | "output_type": "stream", 489 | "text": [ 490 | "Optimization terminated successfully.\n", 491 | " Current function value: 0.604869\n", 492 | " Iterations 5\n" 493 | ] 494 | }, 495 | { 496 | "data": { 497 | "text/html": [ 498 | "\n", 499 | "\n", 500 | "\n", 501 | " \n", 502 | "\n", 503 | "\n", 504 | " \n", 505 | "\n", 506 | "\n", 507 | " \n", 508 | "\n", 509 | "\n", 510 | " \n", 511 | "\n", 512 | "\n", 513 | " \n", 514 | "\n", 515 | "\n", 516 | " \n", 517 | "\n", 518 | "\n", 519 | " \n", 520 | "\n", 521 | "
Logit Regression Results
Dep. Variable: Coupon No. Observations: 100
Model: Logit Df Residuals: 97
Method: MLE Df Model: 2
Date: Sun, 29 Mar 2020 Pseudo R-squ.: 0.1012
Time: 16:37:44 Log-Likelihood: -60.487
converged: True LL-Null: -67.301
Covariance Type: nonrobust LLR p-value: 0.001098
\n", 522 | "\n", 523 | "\n", 524 | " \n", 525 | "\n", 526 | "\n", 527 | " \n", 528 | "\n", 529 | "\n", 530 | " \n", 531 | "\n", 532 | "\n", 533 | " \n", 534 | "\n", 535 | "
coef std err z P>|z| [0.025 0.975]
const -2.1464 0.577 -3.718 0.000 -3.278 -1.015
Card 1.0987 0.445 2.471 0.013 0.227 1.970
Spending 0.3416 0.129 2.655 0.008 0.089 0.594
" 536 | ], 537 | "text/plain": [ 538 | "\n", 539 | "\"\"\"\n", 540 | " Logit Regression Results \n", 541 | "==============================================================================\n", 542 | "Dep. Variable: Coupon No. Observations: 100\n", 543 | "Model: Logit Df Residuals: 97\n", 544 | "Method: MLE Df Model: 2\n", 545 | "Date: Sun, 29 Mar 2020 Pseudo R-squ.: 0.1012\n", 546 | "Time: 16:37:44 Log-Likelihood: -60.487\n", 547 | "converged: True LL-Null: -67.301\n", 548 | "Covariance Type: nonrobust LLR p-value: 0.001098\n", 549 | "==============================================================================\n", 550 | " coef std err z P>|z| [0.025 0.975]\n", 551 | "------------------------------------------------------------------------------\n", 552 | "const -2.1464 0.577 -3.718 0.000 -3.278 -1.015\n", 553 | "Card 1.0987 0.445 2.471 0.013 0.227 1.970\n", 554 | "Spending 0.3416 0.129 2.655 0.008 0.089 0.594\n", 555 | "==============================================================================\n", 556 | "\"\"\"" 557 | ] 558 | }, 559 | "execution_count": 34, 560 | "metadata": {}, 561 | "output_type": "execute_result" 562 | } 563 | ], 564 | "source": [ 565 | "\n", 566 | "x=tb1[['Card','Spending']]\n", 567 | "y=tb1['Coupon']\n", 568 | "import statsmodels.api as sm\n", 569 | "x1=sm.add_constant(x)\n", 570 | "logit_model=sm.Logit(y,x1)\n", 571 | "result=logit_model.fit()\n", 572 | "result.summary()" 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "execution_count": 35, 578 | "metadata": {}, 579 | "outputs": [ 580 | { 581 | "data": { 582 | "text/plain": [ 583 | "0.000549145469075383" 584 | ] 585 | }, 586 | "execution_count": 35, 587 | "metadata": {}, 588 | "output_type": "execute_result" 589 | } 590 | ], 591 | "source": [ 592 | "#chi sq value of g statistic\n", 593 | "from scipy.stats import chi2\n", 594 | "chi2.pdf(13.628,2) #g value, dof\n", 595 | "#gives p value" 596 | ] 597 | }, 598 | { 599 | "cell_type": "code", 600 | "execution_count": null, 601 | "metadata": {}, 602 | "outputs": [], 603 | "source": [] 604 | } 605 | ], 606 | "metadata": { 607 | "kernelspec": { 608 | "display_name": "Python 3", 609 | "language": "python", 610 | "name": "python3" 611 | }, 612 | "language_info": { 613 | "codemirror_mode": { 614 | "name": "ipython", 615 | "version": 3 616 | }, 617 | "file_extension": ".py", 618 | "mimetype": "text/x-python", 619 | "name": "python", 620 | "nbconvert_exporter": "python", 621 | "pygments_lexer": "ipython3", 622 | "version": "3.7.4" 623 | } 624 | }, 625 | "nbformat": 4, 626 | "nbformat_minor": 2 627 | } 628 | -------------------------------------------------------------------------------- /Week10.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 3, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/html": [ 21 | "
\n", 22 | "\n", 35 | "\n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | "
Rsp Noaapesmaergc
01991912001
12461200000
23571511000
34941822111
45821321111
56591200200
67611212000
7829900110
89361311000
910911622110
1011551000100
1112581101000
1213671411011
1314771412210
1415711200210
1516831622101
1617961522201
1718871211001
1819621100000
192052901210
2021461010010
2122912022100
2223851221111
2324481111200
2425811711111
2526741621210
2627681221111
2728631210001
2829721402000
2930991911100
3031641311000
3132771310111
3233881622010
333454901100
3435861712101
3536731511010
3637791521001
3738851421211
3839961601101
3940591210010
4041841410101
4142711521100
4243891501011
4344381210110
4445621111201
4546931610101
4647711321100
4748551101000
4849741512010
4950762001101
\n", 602 | "
" 603 | ], 604 | "text/plain": [ 605 | " Rsp No aa pe sm ae r g c\n", 606 | "0 1 99 19 1 2 0 0 1\n", 607 | "1 2 46 12 0 0 0 0 0\n", 608 | "2 3 57 15 1 1 0 0 0\n", 609 | "3 4 94 18 2 2 1 1 1\n", 610 | "4 5 82 13 2 1 1 1 1\n", 611 | "5 6 59 12 0 0 2 0 0\n", 612 | "6 7 61 12 1 2 0 0 0\n", 613 | "7 8 29 9 0 0 1 1 0\n", 614 | "8 9 36 13 1 1 0 0 0\n", 615 | "9 10 91 16 2 2 1 1 0\n", 616 | "10 11 55 10 0 0 1 0 0\n", 617 | "11 12 58 11 0 1 0 0 0\n", 618 | "12 13 67 14 1 1 0 1 1\n", 619 | "13 14 77 14 1 2 2 1 0\n", 620 | "14 15 71 12 0 0 2 1 0\n", 621 | "15 16 83 16 2 2 1 0 1\n", 622 | "16 17 96 15 2 2 2 0 1\n", 623 | "17 18 87 12 1 1 0 0 1\n", 624 | "18 19 62 11 0 0 0 0 0\n", 625 | "19 20 52 9 0 1 2 1 0\n", 626 | "20 21 46 10 1 0 0 1 0\n", 627 | "21 22 91 20 2 2 1 0 0\n", 628 | "22 23 85 12 2 1 1 1 1\n", 629 | "23 24 48 11 1 1 2 0 0\n", 630 | "24 25 81 17 1 1 1 1 1\n", 631 | "25 26 74 16 2 1 2 1 0\n", 632 | "26 27 68 12 2 1 1 1 1\n", 633 | "27 28 63 12 1 0 0 0 1\n", 634 | "28 29 72 14 0 2 0 0 0\n", 635 | "29 30 99 19 1 1 1 0 0\n", 636 | "30 31 64 13 1 1 0 0 0\n", 637 | "31 32 77 13 1 0 1 1 1\n", 638 | "32 33 88 16 2 2 0 1 0\n", 639 | "33 34 54 9 0 1 1 0 0\n", 640 | "34 35 86 17 1 2 1 0 1\n", 641 | "35 36 73 15 1 1 0 1 0\n", 642 | "36 37 79 15 2 1 0 0 1\n", 643 | "37 38 85 14 2 1 2 1 1\n", 644 | "38 39 96 16 0 1 1 0 1\n", 645 | "39 40 59 12 1 0 0 1 0\n", 646 | "40 41 84 14 1 0 1 0 1\n", 647 | "41 42 71 15 2 1 1 0 0\n", 648 | "42 43 89 15 0 1 0 1 1\n", 649 | "43 44 38 12 1 0 1 1 0\n", 650 | "44 45 62 11 1 1 2 0 1\n", 651 | "45 46 93 16 1 0 1 0 1\n", 652 | "46 47 71 13 2 1 1 0 0\n", 653 | "47 48 55 11 0 1 0 0 0\n", 654 | "48 49 74 15 1 2 0 1 0\n", 655 | "49 50 76 20 0 1 1 0 1" 656 | ] 657 | }, 658 | "execution_count": 3, 659 | "metadata": {}, 660 | "output_type": "execute_result" 661 | } 662 | ], 663 | "source": [ 664 | "acad=pd.read_excel('C:/Users/Garima Singh/Desktop/mooc data analysis/acad.xlsx')\n", 665 | "acad" 666 | ] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "execution_count": 4, 671 | "metadata": {}, 672 | "outputs": [], 673 | "source": [ 674 | "#poisson gof test\n", 675 | "obs=pd.pivot_table(acad[['g','sm']],index='g',columns='sm',aggfunc=len)" 676 | ] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": 6, 681 | "metadata": {}, 682 | "outputs": [ 683 | { 684 | "data": { 685 | "text/html": [ 686 | "
\n", 687 | "\n", 700 | "\n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | "
sm012
g
010136
1498
\n", 730 | "
" 731 | ], 732 | "text/plain": [ 733 | "sm 0 1 2\n", 734 | "g \n", 735 | "0 10 13 6\n", 736 | "1 4 9 8" 737 | ] 738 | }, 739 | "execution_count": 6, 740 | "metadata": {}, 741 | "output_type": "execute_result" 742 | } 743 | ], 744 | "source": [ 745 | "obs #contingency table with obs f" 746 | ] 747 | }, 748 | { 749 | "cell_type": "code", 750 | "execution_count": 13, 751 | "metadata": {}, 752 | "outputs": [ 753 | { 754 | "data": { 755 | "text/plain": [ 756 | "2.3649585225939904" 757 | ] 758 | }, 759 | "execution_count": 13, 760 | "metadata": {}, 761 | "output_type": "execute_result" 762 | } 763 | ], 764 | "source": [ 765 | "from scipy.stats import chi2_contingency\n", 766 | "chi2,p,dof,tb1=chi2_contingency(obs)\n", 767 | "chi2" 768 | ] 769 | }, 770 | { 771 | "cell_type": "code", 772 | "execution_count": 14, 773 | "metadata": {}, 774 | "outputs": [ 775 | { 776 | "data": { 777 | "text/plain": [ 778 | "0.3065178579178871" 779 | ] 780 | }, 781 | "execution_count": 14, 782 | "metadata": {}, 783 | "output_type": "execute_result" 784 | } 785 | ], 786 | "source": [ 787 | "p" 788 | ] 789 | }, 790 | { 791 | "cell_type": "code", 792 | "execution_count": 15, 793 | "metadata": {}, 794 | "outputs": [ 795 | { 796 | "data": { 797 | "text/plain": [ 798 | "2" 799 | ] 800 | }, 801 | "execution_count": 15, 802 | "metadata": {}, 803 | "output_type": "execute_result" 804 | } 805 | ], 806 | "source": [ 807 | "dof" 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": 16, 813 | "metadata": {}, 814 | "outputs": [ 815 | { 816 | "data": { 817 | "text/plain": [ 818 | "array([[ 8.12, 12.76, 8.12],\n", 819 | " [ 5.88, 9.24, 5.88]])" 820 | ] 821 | }, 822 | "execution_count": 16, 823 | "metadata": {}, 824 | "output_type": "execute_result" 825 | } 826 | ], 827 | "source": [ 828 | "tb1 #contingency table of exp f" 829 | ] 830 | }, 831 | { 832 | "cell_type": "code", 833 | "execution_count": 37, 834 | "metadata": {}, 835 | "outputs": [ 836 | { 837 | "data": { 838 | "text/html": [ 839 | "
\n", 840 | "\n", 853 | "\n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | "
ArrivalsFrequency
000
111
224
3310
4414
5520
6612
7712
889
998
10106
11113
12121
\n", 929 | "
" 930 | ], 931 | "text/plain": [ 932 | " Arrivals Frequency\n", 933 | "0 0 0\n", 934 | "1 1 1\n", 935 | "2 2 4\n", 936 | "3 3 10\n", 937 | "4 4 14\n", 938 | "5 5 20\n", 939 | "6 6 12\n", 940 | "7 7 12\n", 941 | "8 8 9\n", 942 | "9 9 8\n", 943 | "10 10 6\n", 944 | "11 11 3\n", 945 | "12 12 1" 946 | ] 947 | }, 948 | "execution_count": 37, 949 | "metadata": {}, 950 | "output_type": "execute_result" 951 | } 952 | ], 953 | "source": [ 954 | "import matplotlib as mp\n", 955 | "import numpy as np\n", 956 | "from scipy.optimize import minimize\n", 957 | "import scipy.stats as stats\n", 958 | "from scipy import stats\n", 959 | "import statsmodels.api as sm\n", 960 | "from statsmodels.formula.api import ols\n", 961 | "from matplotlib import pyplot as plt\n", 962 | "import pandas as pd\n", 963 | "import numpy as np\n", 964 | "import math\n", 965 | "import scipy\n", 966 | "from scipy import stats\n", 967 | "import pandas as pd\n", 968 | "import numpy as np\n", 969 | "import math\n", 970 | "import scipy\n", 971 | "from scipy import stats\n", 972 | "from scipy import stats\n", 973 | "import statsmodels.api as sm\n", 974 | "\n", 975 | "import statsmodels.formula.api \n", 976 | "import statsmodels.formula.api as smf\n", 977 | "from statsmodels.formula.api import ols\n", 978 | "from matplotlib import pyplot as plt\n", 979 | "from scipy.stats import chi2\n", 980 | "from scipy.stats import poisson\n", 981 | "data=pd.read_excel('C:/Users/Garima Singh/Desktop/mooc data analysis/P_distribution.xlsx')\n", 982 | "data" 983 | ] 984 | }, 985 | { 986 | "cell_type": "code", 987 | "execution_count": 38, 988 | "metadata": {}, 989 | "outputs": [ 990 | { 991 | "data": { 992 | "text/plain": [ 993 | "0 0\n", 994 | "1 1\n", 995 | "2 4\n", 996 | "3 10\n", 997 | "4 14\n", 998 | "5 20\n", 999 | "6 12\n", 1000 | "7 12\n", 1001 | "8 9\n", 1002 | "9 8\n", 1003 | "10 6\n", 1004 | "11 3\n", 1005 | "12 1\n", 1006 | "Name: Frequency, dtype: int64" 1007 | ] 1008 | }, 1009 | "execution_count": 38, 1010 | "metadata": {}, 1011 | "output_type": "execute_result" 1012 | } 1013 | ], 1014 | "source": [ 1015 | "obs_freq=data['Frequency']\n", 1016 | "obs_freq" 1017 | ] 1018 | }, 1019 | { 1020 | "cell_type": "code", 1021 | "execution_count": 39, 1022 | "metadata": {}, 1023 | "outputs": [ 1024 | { 1025 | "data": { 1026 | "text/plain": [ 1027 | "6.0" 1028 | ] 1029 | }, 1030 | "execution_count": 39, 1031 | "metadata": {}, 1032 | "output_type": "execute_result" 1033 | } 1034 | ], 1035 | "source": [ 1036 | "total_arrival=600\n", 1037 | "total_time=100\n", 1038 | "mu=total_arrival/total_time\n", 1039 | "mu" 1040 | ] 1041 | }, 1042 | { 1043 | "cell_type": "code", 1044 | "execution_count": 40, 1045 | "metadata": {}, 1046 | "outputs": [], 1047 | "source": [ 1048 | "#finding expected f\n", 1049 | "exp_freq=[]\n", 1050 | "for i in range(len(obs_freq)):\n", 1051 | " e_freq=100*poisson.pmf(i,mu)\n", 1052 | " exp_freq.append(e_freq)" 1053 | ] 1054 | }, 1055 | { 1056 | "cell_type": "code", 1057 | "execution_count": 41, 1058 | "metadata": {}, 1059 | "outputs": [ 1060 | { 1061 | "data": { 1062 | "text/plain": [ 1063 | "[0.24787521766663584,\n", 1064 | " 1.4872513059998145,\n", 1065 | " 4.461753917999444,\n", 1066 | " 8.923507835998894,\n", 1067 | " 13.385261753998332,\n", 1068 | " 16.062314104797995,\n", 1069 | " 16.06231410479801,\n", 1070 | " 13.767697804112569,\n", 1071 | " 10.32577335308442,\n", 1072 | " 6.883848902056284,\n", 1073 | " 4.130309341233764,\n", 1074 | " 2.2528960043093247,\n", 1075 | " 1.1264480021546681]" 1076 | ] 1077 | }, 1078 | "execution_count": 41, 1079 | "metadata": {}, 1080 | "output_type": "execute_result" 1081 | } 1082 | ], 1083 | "source": [ 1084 | "exp_freq" 1085 | ] 1086 | }, 1087 | { 1088 | "cell_type": "code", 1089 | "execution_count": 42, 1090 | "metadata": {}, 1091 | "outputs": [ 1092 | { 1093 | "data": { 1094 | "text/plain": [ 1095 | "[0.25,\n", 1096 | " 1.49,\n", 1097 | " 4.46,\n", 1098 | " 8.92,\n", 1099 | " 13.39,\n", 1100 | " 16.06,\n", 1101 | " 16.06,\n", 1102 | " 13.77,\n", 1103 | " 10.33,\n", 1104 | " 6.88,\n", 1105 | " 4.13,\n", 1106 | " 2.25,\n", 1107 | " 1.13]" 1108 | ] 1109 | }, 1110 | "execution_count": 42, 1111 | "metadata": {}, 1112 | "output_type": "execute_result" 1113 | } 1114 | ], 1115 | "source": [ 1116 | "exp_freq_round=[round(elem,2) for elem in exp_freq]\n", 1117 | "exp_freq_round #rounding nos" 1118 | ] 1119 | }, 1120 | { 1121 | "cell_type": "code", 1122 | "execution_count": 43, 1123 | "metadata": {}, 1124 | "outputs": [ 1125 | { 1126 | "data": { 1127 | "text/html": [ 1128 | "
\n", 1129 | "\n", 1142 | "\n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | "
Obs freqExp freq
000.25
111.49
244.46
3108.92
41413.39
52016.06
61216.06
71213.77
8910.33
986.88
1064.13
1132.25
1211.13
\n", 1218 | "
" 1219 | ], 1220 | "text/plain": [ 1221 | " Obs freq Exp freq\n", 1222 | "0 0 0.25\n", 1223 | "1 1 1.49\n", 1224 | "2 4 4.46\n", 1225 | "3 10 8.92\n", 1226 | "4 14 13.39\n", 1227 | "5 20 16.06\n", 1228 | "6 12 16.06\n", 1229 | "7 12 13.77\n", 1230 | "8 9 10.33\n", 1231 | "9 8 6.88\n", 1232 | "10 6 4.13\n", 1233 | "11 3 2.25\n", 1234 | "12 1 1.13" 1235 | ] 1236 | }, 1237 | "execution_count": 43, 1238 | "metadata": {}, 1239 | "output_type": "execute_result" 1240 | } 1241 | ], 1242 | "source": [ 1243 | "df=pd.DataFrame(list(zip(obs_freq,exp_freq_round)),columns=['Obs freq','Exp freq'])\n", 1244 | "df" 1245 | ] 1246 | }, 1247 | { 1248 | "cell_type": "code", 1249 | "execution_count": 44, 1250 | "metadata": {}, 1251 | "outputs": [], 1252 | "source": [ 1253 | "obs_freq=[5,10,14,20,12,12,9,8,10]\n", 1254 | "exp_freq=[6.2,5,6,7,8,9,7,6,8]\n" 1255 | ] 1256 | }, 1257 | { 1258 | "cell_type": "code", 1259 | "execution_count": 45, 1260 | "metadata": {}, 1261 | "outputs": [ 1262 | { 1263 | "data": { 1264 | "text/plain": [ 1265 | "Power_divergenceResult(statistic=44.77987711213518, pvalue=4.050901717224414e-07)" 1266 | ] 1267 | }, 1268 | "execution_count": 45, 1269 | "metadata": {}, 1270 | "output_type": "execute_result" 1271 | } 1272 | ], 1273 | "source": [ 1274 | " scipy.stats.chisquare(obs_freq,exp_freq) # gives chi square cal and p value" 1275 | ] 1276 | }, 1277 | { 1278 | "cell_type": "code", 1279 | "execution_count": 46, 1280 | "metadata": {}, 1281 | "outputs": [ 1282 | { 1283 | "data": { 1284 | "text/plain": [ 1285 | "14.067140449340169" 1286 | ] 1287 | }, 1288 | "execution_count": 46, 1289 | "metadata": {}, 1290 | "output_type": "execute_result" 1291 | } 1292 | ], 1293 | "source": [ 1294 | "from scipy.stats import chi2\n", 1295 | "chi2.ppf(0.95,7) #gives table chi square value" 1296 | ] 1297 | }, 1298 | { 1299 | "cell_type": "code", 1300 | "execution_count": 47, 1301 | "metadata": {}, 1302 | "outputs": [ 1303 | { 1304 | "data": { 1305 | "text/plain": [ 1306 | "5032.2" 1307 | ] 1308 | }, 1309 | "execution_count": 47, 1310 | "metadata": {}, 1311 | "output_type": "execute_result" 1312 | } 1313 | ], 1314 | "source": [ 1315 | "#uniform gof test\n", 1316 | "x=[13,343,3432,234,123,45345,23,233,342,234]\n", 1317 | "np.mean(x)" 1318 | ] 1319 | }, 1320 | { 1321 | "cell_type": "code", 1322 | "execution_count": 48, 1323 | "metadata": {}, 1324 | "outputs": [ 1325 | { 1326 | "data": { 1327 | "text/plain": [ 1328 | "Power_divergenceResult(statistic=360703.37458765553, pvalue=0.0)" 1329 | ] 1330 | }, 1331 | "execution_count": 48, 1332 | "metadata": {}, 1333 | "output_type": "execute_result" 1334 | } 1335 | ], 1336 | "source": [ 1337 | "exp_f=[5032.2,5032.2,5032.2,5032.2,5032.2,5032.2,5032.2,5032.2,5032.2,5032.2]\n", 1338 | "from scipy.stats import chisquare\n", 1339 | "chisquare(x,exp_f)" 1340 | ] 1341 | }, 1342 | { 1343 | "cell_type": "code", 1344 | "execution_count": 49, 1345 | "metadata": {}, 1346 | "outputs": [], 1347 | "source": [ 1348 | "#normal dis gof test\n", 1349 | "a=[1,2,3,4,5,6,7,8,9]\n", 1350 | "mean=np.mean(a)" 1351 | ] 1352 | }, 1353 | { 1354 | "cell_type": "code", 1355 | "execution_count": 50, 1356 | "metadata": {}, 1357 | "outputs": [ 1358 | { 1359 | "data": { 1360 | "text/plain": [ 1361 | "5.0" 1362 | ] 1363 | }, 1364 | "execution_count": 50, 1365 | "metadata": {}, 1366 | "output_type": "execute_result" 1367 | } 1368 | ], 1369 | "source": [ 1370 | "mean" 1371 | ] 1372 | }, 1373 | { 1374 | "cell_type": "code", 1375 | "execution_count": 51, 1376 | "metadata": {}, 1377 | "outputs": [], 1378 | "source": [ 1379 | "std=np.std(a)" 1380 | ] 1381 | }, 1382 | { 1383 | "cell_type": "code", 1384 | "execution_count": 52, 1385 | "metadata": {}, 1386 | "outputs": [ 1387 | { 1388 | "data": { 1389 | "text/plain": [ 1390 | "2.581988897471611" 1391 | ] 1392 | }, 1393 | "execution_count": 52, 1394 | "metadata": {}, 1395 | "output_type": "execute_result" 1396 | } 1397 | ], 1398 | "source": [ 1399 | "std" 1400 | ] 1401 | }, 1402 | { 1403 | "cell_type": "code", 1404 | "execution_count": 54, 1405 | "metadata": {}, 1406 | "outputs": [], 1407 | "source": [ 1408 | "x=1/6\n", 1409 | "for j in range(1,6):\n", 1410 | " prob_int=[scipy.stats.norm.ppf(j*x,mean,std)]\n", 1411 | " " 1412 | ] 1413 | }, 1414 | { 1415 | "cell_type": "code", 1416 | "execution_count": 55, 1417 | "metadata": {}, 1418 | "outputs": [ 1419 | { 1420 | "data": { 1421 | "text/plain": [ 1422 | "[7.49787174284919]" 1423 | ] 1424 | }, 1425 | "execution_count": 55, 1426 | "metadata": {}, 1427 | "output_type": "execute_result" 1428 | } 1429 | ], 1430 | "source": [ 1431 | "prob_int" 1432 | ] 1433 | }, 1434 | { 1435 | "cell_type": "code", 1436 | "execution_count": 56, 1437 | "metadata": {}, 1438 | "outputs": [], 1439 | "source": [ 1440 | "exp_freq=[5,5,5,5,5,5]\n", 1441 | "obs_freq=[6,3,6,5,4,6]" 1442 | ] 1443 | }, 1444 | { 1445 | "cell_type": "code", 1446 | "execution_count": 59, 1447 | "metadata": {}, 1448 | "outputs": [ 1449 | { 1450 | "data": { 1451 | "text/plain": [ 1452 | "Power_divergenceResult(statistic=1.5999999999999999, pvalue=0.9012493445012737)" 1453 | ] 1454 | }, 1455 | "execution_count": 59, 1456 | "metadata": {}, 1457 | "output_type": "execute_result" 1458 | } 1459 | ], 1460 | "source": [ 1461 | "scipy.stats.chisquare(obs_freq,exp_freq)" 1462 | ] 1463 | }, 1464 | { 1465 | "cell_type": "code", 1466 | "execution_count": 60, 1467 | "metadata": {}, 1468 | "outputs": [ 1469 | { 1470 | "data": { 1471 | "text/plain": [ 1472 | "7.814727903251179" 1473 | ] 1474 | }, 1475 | "execution_count": 60, 1476 | "metadata": {}, 1477 | "output_type": "execute_result" 1478 | } 1479 | ], 1480 | "source": [ 1481 | "chi2.ppf(0.95,3) #table chi square value" 1482 | ] 1483 | }, 1484 | { 1485 | "cell_type": "code", 1486 | "execution_count": null, 1487 | "metadata": {}, 1488 | "outputs": [], 1489 | "source": [] 1490 | } 1491 | ], 1492 | "metadata": { 1493 | "kernelspec": { 1494 | "display_name": "Python 3", 1495 | "language": "python", 1496 | "name": "python3" 1497 | }, 1498 | "language_info": { 1499 | "codemirror_mode": { 1500 | "name": "ipython", 1501 | "version": 3 1502 | }, 1503 | "file_extension": ".py", 1504 | "mimetype": "text/x-python", 1505 | "name": "python", 1506 | "nbconvert_exporter": "python", 1507 | "pygments_lexer": "ipython3", 1508 | "version": "3.7.4" 1509 | } 1510 | }, 1511 | "nbformat": 4, 1512 | "nbformat_minor": 2 1513 | } 1514 | -------------------------------------------------------------------------------- /Week5.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 86, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import math\n", 12 | "import scipy\n", 13 | "from scipy import stats" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 87, 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "data": { 23 | "text/plain": [ 24 | "-3.3724679378582554" 25 | ] 26 | }, 27 | "execution_count": 87, 28 | "metadata": {}, 29 | "output_type": "execute_result" 30 | } 31 | ], 32 | "source": [ 33 | "stats.t.ppf(0.0025,13)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 88, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/plain": [ 44 | "Ttest_indResult(statistic=-6.550432296063072, pvalue=0.0004203925153912486)" 45 | ] 46 | }, 47 | "execution_count": 88, 48 | "metadata": {}, 49 | "output_type": "execute_result" 50 | } 51 | ], 52 | "source": [ 53 | "metro=[3,7,25,10,15]\n", 54 | "rural=[48,44,40,38,35]\n", 55 | "stats.ttest_ind(metro,rural,equal_var=False)\n", 56 | " " 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 89, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": [ 67 | "Ttest_relResult(statistic=-5.303497930706049, pvalue=0.0060728031285194545)" 68 | ] 69 | }, 70 | "execution_count": 89, 71 | "metadata": {}, 72 | "output_type": "execute_result" 73 | } 74 | ], 75 | "source": [ 76 | "metro=[3,7,25,10,15]\n", 77 | "rural=[48,44,40,38,35]\n", 78 | "stats.ttest_rel(metro,rural)\n", 79 | " " 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 90, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "import math\n", 89 | "def two_samp_proportion(p1,p2,n1,n2):\n", 90 | " p_pool=((p1*n1)+(p2*n2))/(n1+n2)\n", 91 | " x=(p_pool*(1-p_pool)*((1/n1)+(1/n2)))\n", 92 | " s=math.sqrt(x)\n", 93 | " z=(p1-p2)/s\n", 94 | " if(z<0):\n", 95 | " p_val=stats.norm.cdf(z)\n", 96 | " else:\n", 97 | " p_val=1-stats.norm.cdf(z)\n", 98 | " return z, p_val*2" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 91, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/plain": [ 109 | "(1.3442056254198995, 0.17888190308175567)" 110 | ] 111 | }, 112 | "execution_count": 91, 113 | "metadata": {}, 114 | "output_type": "execute_result" 115 | } 116 | ], 117 | "source": [ 118 | "two_samp_proportion(0.27,0.19,100,100)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 92, 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "text/plain": [ 129 | "0.910558947366134" 130 | ] 131 | }, 132 | "execution_count": 92, 133 | "metadata": {}, 134 | "output_type": "execute_result" 135 | } 136 | ], 137 | "source": [ 138 | "stats.norm.cdf(1.344205)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 93, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "data": { 148 | "text/plain": [ 149 | "2.8450165269958436" 150 | ] 151 | }, 152 | "execution_count": 93, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "\n", 159 | "from scipy import stats\n", 160 | "from scipy.stats import f\n", 161 | "from scipy.stats import poisson\n", 162 | "scipy.stats.f.ppf(q=1-0.05, dfn=15, dfd=10)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 94, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "f1=scipy.stats.f.ppf(q=0.05, dfn=15, dfd=15)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 95, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "x=[3,9,3,4,5,6]\n", 181 | "y=[1,2,3,4,5,6]\n", 182 | "f=np.var(x)/np.var(y)" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 96, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "dfn=len(x)-1\n", 192 | "dfd=len(y)-1" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 97, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "p_value=scipy.stats.f.cdf(f,dfn,dfd)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 98, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/plain": [ 212 | "0.6627143533357427" 213 | ] 214 | }, 215 | "execution_count": 98, 216 | "metadata": {}, 217 | "output_type": "execute_result" 218 | } 219 | ], 220 | "source": [ 221 | "p_value\n" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 99, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "def samplesize(alfa,beta,mu1,mu2,sigma):\n", 231 | " z1=-1*stats.norm.ppf(alfa)\n", 232 | " z2=-1*stats.norm.ppf(beta)\n", 233 | " n=((((z1+z2)**2)*(sigma**2))/(mu1-mu2)**2)\n", 234 | " print(n)" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 100, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "name": "stdout", 244 | "output_type": "stream", 245 | "text": [ 246 | "9.302043647889692\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "samplesize(0.05,0.08,5,7,2)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 101, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "a=[4,3,2]\n", 261 | "b=[2,4,6]\n", 262 | "c=[2,1,3]" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 102, 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "data": { 272 | "text/plain": [ 273 | "F_onewayResult(statistic=1.5, pvalue=0.2962962962962962)" 274 | ] 275 | }, 276 | "execution_count": 102, 277 | "metadata": {}, 278 | "output_type": "execute_result" 279 | } 280 | ], 281 | "source": [ 282 | "stats.f_oneway(a,b,c)" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 103, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "from scipy import stats\n", 292 | "import statsmodels.api as sm\n", 293 | "from statsmodels.formula.api import ols\n", 294 | "from matplotlib import pyplot as plt" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 104, 300 | "metadata": {}, 301 | "outputs": [], 302 | "source": [ 303 | "data=pd.read_excel('C:/Users/Garima Singh/Desktop/mooc data analysis/oneway.xlsx')" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 105, 309 | "metadata": {}, 310 | "outputs": [ 311 | { 312 | "data": { 313 | "text/html": [ 314 | "
\n", 315 | "\n", 328 | "\n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | "
Black BoardCase PresentationPPT
0422
1341
2263
\n", 358 | "
" 359 | ], 360 | "text/plain": [ 361 | " Black Board Case Presentation PPT \n", 362 | "0 4 2 2\n", 363 | "1 3 4 1\n", 364 | "2 2 6 3" 365 | ] 366 | }, 367 | "execution_count": 105, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | } 371 | ], 372 | "source": [ 373 | "data" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 130, 379 | "metadata": {}, 380 | "outputs": [ 381 | { 382 | "data": { 383 | "text/html": [ 384 | "
\n", 385 | "\n", 398 | "\n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | "
Black BoardCase PresentationPPT
0422
1341
2263
\n", 428 | "
" 429 | ], 430 | "text/plain": [ 431 | " Black Board Case Presentation PPT\n", 432 | "0 4 2 2\n", 433 | "1 3 4 1\n", 434 | "2 2 6 3" 435 | ] 436 | }, 437 | "execution_count": 130, 438 | "metadata": {}, 439 | "output_type": "execute_result" 440 | } 441 | ], 442 | "source": [ 443 | "data = pd.DataFrame({'Black Board': {0: 4, 1: 3, 2: 2}, \n", 444 | " 'Case Presentation': {0: 2, 1: 4, 2: 6}, \n", 445 | " 'PPT': {0: 2, 1: 1, 2: 3}}) \n", 446 | "data" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": 131, 452 | "metadata": {}, 453 | "outputs": [ 454 | { 455 | "name": "stdout", 456 | "output_type": "stream", 457 | "text": [ 458 | " Black Board variable value\n", 459 | "0 4 Case Presentation 2\n", 460 | "1 3 Case Presentation 4\n", 461 | "2 2 Case Presentation 6\n", 462 | "3 4 PPT 2\n", 463 | "4 3 PPT 1\n", 464 | "5 2 PPT 3\n" 465 | ] 466 | } 467 | ], 468 | "source": [ 469 | "datanew=pd.melt(data, id_vars=['Black Board'], value_vars=['Case Presentation','PPT'])\n", 470 | "print(datanew)" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": 132, 476 | "metadata": {}, 477 | "outputs": [], 478 | "source": [ 479 | "datanew.columns=['index','treatments','value']" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 133, 485 | "metadata": {}, 486 | "outputs": [ 487 | { 488 | "data": { 489 | "text/html": [ 490 | "
\n", 491 | "\n", 504 | "\n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | "
indextreatmentsvalue
04Case Presentation2
13Case Presentation4
22Case Presentation6
34PPT2
43PPT1
52PPT3
\n", 552 | "
" 553 | ], 554 | "text/plain": [ 555 | " index treatments value\n", 556 | "0 4 Case Presentation 2\n", 557 | "1 3 Case Presentation 4\n", 558 | "2 2 Case Presentation 6\n", 559 | "3 4 PPT 2\n", 560 | "4 3 PPT 1\n", 561 | "5 2 PPT 3" 562 | ] 563 | }, 564 | "execution_count": 133, 565 | "metadata": {}, 566 | "output_type": "execute_result" 567 | } 568 | ], 569 | "source": [ 570 | "datanew" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": 149, 576 | "metadata": {}, 577 | "outputs": [], 578 | "source": [ 579 | "model=ols('value~C(treatments)',data=datanew).fit()" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 148, 585 | "metadata": {}, 586 | "outputs": [ 587 | { 588 | "name": "stderr", 589 | "output_type": "stream", 590 | "text": [ 591 | "C:\\Users\\Garima Singh\\Anaconda3\\lib\\site-packages\\statsmodels\\stats\\stattools.py:71: ValueWarning: omni_normtest is not valid with less than 8 observations; 6 samples were given.\n", 592 | " \"samples were given.\" % int(n), ValueWarning)\n" 593 | ] 594 | }, 595 | { 596 | "data": { 597 | "text/html": [ 598 | "\n", 599 | "\n", 600 | "\n", 601 | " \n", 602 | "\n", 603 | "\n", 604 | " \n", 605 | "\n", 606 | "\n", 607 | " \n", 608 | "\n", 609 | "\n", 610 | " \n", 611 | "\n", 612 | "\n", 613 | " \n", 614 | "\n", 615 | "\n", 616 | " \n", 617 | "\n", 618 | "\n", 619 | " \n", 620 | "\n", 621 | "\n", 622 | " \n", 623 | "\n", 624 | "\n", 625 | " \n", 626 | "\n", 627 | "
OLS Regression Results
Dep. Variable: value R-squared: 0.375
Model: OLS Adj. R-squared: 0.219
Method: Least Squares F-statistic: 2.400
Date: Sun, 29 Mar 2020 Prob (F-statistic): 0.196
Time: 03:24:15 Log-Likelihood: -10.046
No. Observations: 6 AIC: 24.09
Df Residuals: 4 BIC: 23.68
Df Model: 1
Covariance Type: nonrobust
\n", 628 | "\n", 629 | "\n", 630 | " \n", 631 | "\n", 632 | "\n", 633 | " \n", 634 | "\n", 635 | "\n", 636 | " \n", 637 | "\n", 638 | "
coef std err t P>|t| [0.025 0.975]
Intercept 4.0000 0.913 4.382 0.012 1.465 6.535
C(treatments)[T.PPT] -2.0000 1.291 -1.549 0.196 -5.584 1.584
\n", 639 | "\n", 640 | "\n", 641 | " \n", 642 | "\n", 643 | "\n", 644 | " \n", 645 | "\n", 646 | "\n", 647 | " \n", 648 | "\n", 649 | "\n", 650 | " \n", 651 | "\n", 652 | "
Omnibus: nan Durbin-Watson: 1.700
Prob(Omnibus): nan Jarque-Bera (JB): 0.230
Skew: -0.000 Prob(JB): 0.891
Kurtosis: 2.040 Cond. No. 2.62


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." 653 | ], 654 | "text/plain": [ 655 | "\n", 656 | "\"\"\"\n", 657 | " OLS Regression Results \n", 658 | "==============================================================================\n", 659 | "Dep. Variable: value R-squared: 0.375\n", 660 | "Model: OLS Adj. R-squared: 0.219\n", 661 | "Method: Least Squares F-statistic: 2.400\n", 662 | "Date: Sun, 29 Mar 2020 Prob (F-statistic): 0.196\n", 663 | "Time: 03:24:15 Log-Likelihood: -10.046\n", 664 | "No. Observations: 6 AIC: 24.09\n", 665 | "Df Residuals: 4 BIC: 23.68\n", 666 | "Df Model: 1 \n", 667 | "Covariance Type: nonrobust \n", 668 | "========================================================================================\n", 669 | " coef std err t P>|t| [0.025 0.975]\n", 670 | "----------------------------------------------------------------------------------------\n", 671 | "Intercept 4.0000 0.913 4.382 0.012 1.465 6.535\n", 672 | "C(treatments)[T.PPT] -2.0000 1.291 -1.549 0.196 -5.584 1.584\n", 673 | "==============================================================================\n", 674 | "Omnibus: nan Durbin-Watson: 1.700\n", 675 | "Prob(Omnibus): nan Jarque-Bera (JB): 0.230\n", 676 | "Skew: -0.000 Prob(JB): 0.891\n", 677 | "Kurtosis: 2.040 Cond. No. 2.62\n", 678 | "==============================================================================\n", 679 | "\n", 680 | "Warnings:\n", 681 | "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", 682 | "\"\"\"" 683 | ] 684 | }, 685 | "execution_count": 148, 686 | "metadata": {}, 687 | "output_type": "execute_result" 688 | } 689 | ], 690 | "source": [ 691 | "model.summary()" 692 | ] 693 | }, 694 | { 695 | "cell_type": "markdown", 696 | "metadata": {}, 697 | "source": [ 698 | "model.summary()" 699 | ] 700 | }, 701 | { 702 | "cell_type": "code", 703 | "execution_count": 136, 704 | "metadata": {}, 705 | "outputs": [ 706 | { 707 | "data": { 708 | "text/html": [ 709 | "
\n", 710 | "\n", 723 | "\n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | "
NameCourseAge
0JohnMasters27
1BobGraduate23
2ShielaGraduate21
\n", 753 | "
" 754 | ], 755 | "text/plain": [ 756 | " Name Course Age\n", 757 | "0 John Masters 27\n", 758 | "1 Bob Graduate 23\n", 759 | "2 Shiela Graduate 21" 760 | ] 761 | }, 762 | "execution_count": 136, 763 | "metadata": {}, 764 | "output_type": "execute_result" 765 | } 766 | ], 767 | "source": [ 768 | "\n", 769 | "# Create a simple dataframe \n", 770 | " \n", 771 | "# importing pandas as pd \n", 772 | "import pandas as pd \n", 773 | " \n", 774 | "# creating a dataframe \n", 775 | "df = pd.DataFrame({'Name': {0: 'John', 1: 'Bob', 2: 'Shiela'}, \n", 776 | " 'Course': {0: 'Masters', 1: 'Graduate', 2: 'Graduate'}, \n", 777 | " 'Age': {0: 27, 1: 23, 2: 21}}) \n", 778 | "df " 779 | ] 780 | }, 781 | { 782 | "cell_type": "code", 783 | "execution_count": 112, 784 | "metadata": {}, 785 | "outputs": [ 786 | { 787 | "data": { 788 | "text/html": [ 789 | "
\n", 790 | "\n", 803 | "\n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | "
Namevariablevalue
0JohnCourseMasters
1BobCourseGraduate
2ShielaCourseGraduate
\n", 833 | "
" 834 | ], 835 | "text/plain": [ 836 | " Name variable value\n", 837 | "0 John Course Masters\n", 838 | "1 Bob Course Graduate\n", 839 | "2 Shiela Course Graduate" 840 | ] 841 | }, 842 | "execution_count": 112, 843 | "metadata": {}, 844 | "output_type": "execute_result" 845 | } 846 | ], 847 | "source": [ 848 | "# Name is id_vars and Course is value_vars \n", 849 | "pd.melt(df, id_vars =['Name'], value_vars =['Course']) " 850 | ] 851 | }, 852 | { 853 | "cell_type": "code", 854 | "execution_count": 113, 855 | "metadata": {}, 856 | "outputs": [], 857 | "source": [ 858 | "anova_table=sm.stats.anova_lm(model,type=1)" 859 | ] 860 | }, 861 | { 862 | "cell_type": "code", 863 | "execution_count": 114, 864 | "metadata": {}, 865 | "outputs": [ 866 | { 867 | "data": { 868 | "text/html": [ 869 | "
\n", 870 | "\n", 883 | "\n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | "
dfsum_sqmean_sqFPR(>F)
C(treatments)1.06.06.02.40.196261
Residual4.010.02.5NaNNaN
\n", 913 | "
" 914 | ], 915 | "text/plain": [ 916 | " df sum_sq mean_sq F PR(>F)\n", 917 | "C(treatments) 1.0 6.0 6.0 2.4 0.196261\n", 918 | "Residual 4.0 10.0 2.5 NaN NaN" 919 | ] 920 | }, 921 | "execution_count": 114, 922 | "metadata": {}, 923 | "output_type": "execute_result" 924 | } 925 | ], 926 | "source": [ 927 | "anova_table" 928 | ] 929 | }, 930 | { 931 | "cell_type": "code", 932 | "execution_count": 115, 933 | "metadata": {}, 934 | "outputs": [], 935 | "source": [ 936 | "fivep=[7,8,95,11,9,10]\n", 937 | "tenp=[74,8,95,141,0,30]\n", 938 | "fifteenp=[27,48,95,161,98,10]\n", 939 | "twentyp=[74,84,9,11,95,160]\n", 940 | "box_plot_data=[fivep,tenp,fifteenp,twentyp]" 941 | ] 942 | }, 943 | { 944 | "cell_type": "code", 945 | "execution_count": 116, 946 | "metadata": {}, 947 | "outputs": [ 948 | { 949 | "data": { 950 | "text/plain": [ 951 | "{'whiskers': [,\n", 952 | " ,\n", 953 | " ,\n", 954 | " ,\n", 955 | " ,\n", 956 | " ,\n", 957 | " ,\n", 958 | " ],\n", 959 | " 'caps': [,\n", 960 | " ,\n", 961 | " ,\n", 962 | " ,\n", 963 | " ,\n", 964 | " ,\n", 965 | " ,\n", 966 | " ],\n", 967 | " 'boxes': [,\n", 968 | " ,\n", 969 | " ,\n", 970 | " ],\n", 971 | " 'medians': [,\n", 972 | " ,\n", 973 | " ,\n", 974 | " ],\n", 975 | " 'fliers': [,\n", 976 | " ,\n", 977 | " ,\n", 978 | " ],\n", 979 | " 'means': []}" 980 | ] 981 | }, 982 | "execution_count": 116, 983 | "metadata": {}, 984 | "output_type": "execute_result" 985 | }, 986 | { 987 | "data": { 988 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAARWUlEQVR4nO3dbYylZX3H8e+vuyhag+y6g6G70KXNahc3PmVKabWNYhtBjcsLTSA+bOwmm7YUtdqqdJMuviDRtlGrbU22QsHUrFK1hRj7QHEt2UTAwUdwtWywwgi6YwCxNSLgvy/OjR1nz+7MeZg9M9d+P8nJnPu6r3Pu/94785trrnM/pKqQJLXl5yZdgCRp/Ax3SWqQ4S5JDTLcJalBhrskNWjtpAsA2LBhQ23evHnSZUjSqnLbbbd9r6qm+q1bEeG+efNmZmZmJl2GJK0qSb51tHVOy0hSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGLRruSa5KcjjJ7QvaL03yjSR3JPnzee2XJTnUrXvpchQtSTq2pRznfjXw18CHH29I8mJgO/Dsqno4yWld+9nARcCzgF8A/iPJM6rqsXEXLkk6ukVH7lV1E3D/gubfB95VVQ93fQ537duBj1bVw1X1TeAQcM4Y65VOOEnG+tCJYdg592cAv5nkliT/meRXu/aNwD3z+s12bUdIsivJTJKZubm5IcuQ2ldViz6W2s+b85w4vyyHDfe1wDrgXOBPgGvT+1f2+5f2/W6qqr1VNV1V01NTfS+NIEljd6L8shw23GeBT1bPrcBPgA1d+xnz+m0C7h2tREnSoIYN938GzgNI8gzgCcD3gOuBi5I8MclZwBbg1nEUKklaukWPlkmyD3gRsCHJLLAHuAq4qjs88sfAjur9fXJHkmuBrwGPApd4pIwkHX+LhntVXXyUVa89Sv8rgCtGKUqSNBrPUJWkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNWjRcE9yVZLD3V2XFq774ySVZEO3nCTvT3IoyVeSPH85ipYkHdtSRu5XA+cvbExyBvA7wN3zmi+gd9/ULcAu4IOjlyhJGtSi4V5VNwH391n1XuBtQM1r2w58uHpuBk5NcvpYKtWqkWSsD0mDW/Qeqv0keSXw7ar68oIfvo3APfOWZ7u2+/q8xy56o3vOPPPMYcrQCtW7V/rikiy5r6TBDPyBapInA7uBP+u3uk9b35/eqtpbVdNVNT01NTVoGZKkYxhm5P7LwFnA46P2TcAXkpxDb6R+xry+m4B7Ry1SkjSYgUfuVfXVqjqtqjZX1WZ6gf78qvoOcD3w+u6omXOB71fVEVMykqTltZRDIfcBnwOemWQ2yc5jdP80cBdwCPg74A/GUqUkaSCLTstU1cWLrN8873kBl4xeliRpFJ6hKkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoOWcrOOq5IcTnL7vLa/SPL1JF9J8k9JTp237rIkh5J8I8lLl6twSdLRLWXkfjVw/oK2G4BtVfVs4L+AywCSnA1cBDyre83fJlkztmolSUuyaLhX1U3A/Qva/r2qHu0Wb6Z3I2yA7cBHq+rhqvomvdvtnTPGeiVJSzCOOfffBf6le74RuGfeutmuTZJ0HI0U7kl2A48CH3m8qU+3OsprdyWZSTIzNzc3ShmSpAWGDvckO4BXAK/pbowNvZH6GfO6bQLu7ff6qtpbVdNVNT01NTVsGZKkPoYK9yTnA28HXllVP5y36nrgoiRPTHIWsAW4dfQyJUmDWLtYhyT7gBcBG5LMAnvoHR3zROCGJAA3V9XvVdUdSa4FvkZvuuaSqnpsuYqXJPW3aLhX1cV9mq88Rv8rgCtGKUqSNBrPUJWkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNWjRcE9yVZLDSW6f17Y+yQ1J7uy+ruvak+T9SQ4l+UqS5y9n8ZKk/pYycr8aOH9B2zuAG6tqC3BjtwxwAb37pm4BdgEfHE+ZkqRBLBruVXUTcP+C5u3ANd3za4AL57V/uHpuBk5Ncvq4ipUkLc2wc+5Pr6r7ALqvp3XtG4F75vWb7dqOkGRXkpkkM3Nzc0OWIUnqZ9wfqKZPW/XrWFV7q2q6qqanpqbGXIYkndiGDffvPj7d0n093LXPAmfM67cJuHf48iRJwxg23K8HdnTPdwDXzWt/fXfUzLnA9x+fvlnN9u3bx7Zt21izZg3btm1j3759ky5Jko5p7WIdkuwDXgRsSDIL7AHeBVybZCdwN/DqrvungZcBh4AfAm9YhpqPq3379rF7926uvPJKXvjCF3LgwAF27twJwMUXXzzh6rTarV+/ngceeGAs75X0mxUd3Lp167j//oXHUGi1SVXfKfHjanp6umZmZiZdRl/btm3jAx/4AC9+8Yt/2rZ//34uvfRSbr/99mO8UotJwkr4/puklbgPVmJNx9tq2QdJbquq6b7rVsI/YCWH+5o1a/jRj37ESSed9NO2Rx55hJNPPpnHHntsgpWtfqvlB2g5rcR9sBJrOt5Wyz44Vrh7+YFFbN26lQMHDvxM24EDB9i6deuEKpKkxRnui9i9ezc7d+5k//79PPLII+zfv5+dO3eye/fuSZcmSUe16AeqJ7rHPzS99NJLOXjwIFu3buWKK67ww1RJK5pz7pqY1TKvuZxW4j5YiTUtxTiPPBqn5Tz66Fhz7o7cJTXhgQceWJG/lMZ1iOqgnHOXpAY5ctdAxv2nryfeSMvDcNdA/NNXWh2clpGkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUEjhXuSP0pyR5Lbk+xLcnKSs5LckuTOJB9L8oRxFStJWpqhwz3JRuCNwHRVbQPWABcB7wbeW1VbgAeAneMoVJK0dKNOy6wFnpRkLfBk4D7gPODj3fprgAtH3IYkaUBDh3tVfRv4S3o3yL4P+D5wG/BgVT3adZsFNvZ7fZJdSWaSzMzNzQ1bhiSpj6GvLZNkHbAdOAt4EPhH4II+XfteiKSq9gJ7oXc992HrkCSA2nMKXP7USZdxhNpzykS2O8qFw34b+GZVzQEk+STwG8CpSdZ2o/dNwL2jlylJx5Z3PrRiL2pXlx//7Y4y5343cG6SJ6d3Sb6XAF8D9gOv6vrsAK4brURJ0qBGmXO/hd4Hp18Avtq9117g7cBbkhwCngZcOYY6JUkDGOl67lW1B9izoPku4JxR3leSNBrPUJWkBhnuktQgw12SGmS4S1KDvEG2NEEr8cSbSZ10o/Ey3KUJWokn3kzqpBuNl9MyktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoJHCPcmpST6e5OtJDib59STrk9yQ5M7u67pxFStJWppRR+5/BfxrVf0K8BzgIPAO4Maq2gLc2C1Lko6jocM9ySnAb9HdRq+qflxVDwLbgWu6btcAF45apCRpMKOM3H8JmAP+PskXk3woyc8DT6+q+wC6r6f1e3GSXUlmkszMzc2NUIYkaaFRwn0t8Hzgg1X1POB/GWAKpqr2VtV0VU1PTU2NUIYkaaFRwn0WmK2qW7rlj9ML++8mOR2g+3p4tBIlSYMaOtyr6jvAPUme2TW9BPgacD2wo2vbAVw3UoWSpIGNerOOS4GPJHkCcBfwBnq/MK5NshO4G3j1iNuQJA1opHCvqi8B031WvWSU95UkjcYzVCWpQYa7JDXIcJekBhnuktSgUY+W0Qmm9pwClz910mUcofacMukSpBXFcNdA8s6HqKpJl3GEJNTlk65CWjmclpGkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoJHDPcmaJF9M8qlu+awktyS5M8nHurs0SZKOo3GM3N8EHJy3/G7gvVW1BXgA2DmGbUiSBjBSuCfZBLwc+FC3HOA84ONdl2uAC0fZhiRpcKOO3N8HvA34Sbf8NODBqnq0W54FNvZ7YZJdSWaSzMzNzY1YhiRpvqHDPckrgMNVddv85j5d+14ftqr2VtV0VU1PTU0NW4YkqY9Rruf+AuCVSV4GnAycQm8kf2qStd3ofRNw7+hlSpIGMfTIvaouq6pNVbUZuAj4TFW9BtgPvKrrtgO4buQqJUkDWY7j3N8OvCXJIXpz8FcuwzYkSccwltvsVdVngc92z+8CzhnH+0qShuMZqpLUIMNdkhpkuEtSg8Yy5y5peL0Tu1eOdevWTboEjYHhLk1QVd9z/AaWZGzvpTY4LSNJDTLcJalBhrskNchwl6QGGe6S1CCPlpHUjJV2WClM7tBSw11SE8Z5KGgLh5Y6LSNJDTLcJalBhrskNWiUe6iekWR/koNJ7kjypq59fZIbktzZffVCFZJ0nI0ycn8UeGtVbQXOBS5JcjbwDuDGqtoC3NgtS5KOo1HuoXpfVX2he/4D4CCwEdgOXNN1uwa4cNQiJUmDGcuce5LNwPOAW4CnV9V90PsFAJx2lNfsSjKTZGZubm4cZUiSOiOHe5KnAJ8A3lxVDy31dVW1t6qmq2p6ampq1DIkSfOMFO5JTqIX7B+pqk92zd9Ncnq3/nTg8GglSpIGNcrRMgGuBA5W1Xvmrboe2NE93wFcN3x5kqRhjHL5gRcArwO+muRLXdufAu8Crk2yE7gbePVoJUqSBjV0uFfVAeBoV+l5ybDvK0kanWeoSlKDDHdJapCX/NXAvGa2tPIZ7hqI18yWVgenZSSpQY7cO+OcanA0KmnSmh+5r1+/niSLPsZpKdtbv379WLcpSfM1P3K//42PAadMuow+Hpt0AVolljr4WGo//7I8MTQf7nnnQyvymzkJdfmkq9BqsBK/f1ezE+WXZfPhDh66J+n/rdQwHrfmw32p/5F+oCqpJc2H+1IZyJJa0vzRMpJ0IjLcJalBhrskNchwl6QGLVu4Jzk/yTeSHEryjuXajiTpSMsS7knWAH8DXACcDVyc5Ozl2JYk6UjLNXI/BzhUVXdV1Y+BjwLbl2lbkqQFlus4943APfOWZ4Ffm98hyS5gF8CZZ565TGVoEgY5IWwpfT0HQRrcco3c+/3E/sxPaFXtrarpqpqemppapjI0CVU11oekwS1XuM8CZ8xb3gTcu0zbkiQtsFzh/nlgS5KzkjwBuAi4fpm2JUlaYFnm3Kvq0SR/CPwbsAa4qqruWI5tSZKOtGwXDquqTwOfXq73lyQdnWeoSlKDDHdJapDhLkkNMtwlqUFZCSeJJJkDvjXpOpZgA/C9SRfREPfn+Lgvx2u17M9frKq+Z4GuiHBfLZLMVNX0pOtohftzfNyX49XC/nRaRpIaZLhLUoMM98HsnXQBjXF/jo/7crxW/f50zl2SGuTIXZIaZLhLUoMM9yVIclWSw0lun3Qtq12SM5LsT3IwyR1J3jTpmlazJCcnuTXJl7v9+c5J17TaJVmT5ItJPjXpWkZhuC/N1cD5ky6iEY8Cb62qrcC5wCXePH0kDwPnVdVzgOcC5yc5d8I1rXZvAg5OuohRGe5LUFU3AfdPuo4WVNV9VfWF7vkP6P0QbZxsVatX9fxPt3hS9/AoiSEl2QS8HPjQpGsZleGuiUmyGXgecMtkK1ndummELwGHgRuqyv05vPcBbwN+MulCRmW4ayKSPAX4BPDmqnpo0vWsZlX1WFU9l969is9Jsm3SNa1GSV4BHK6q2yZdyzgY7jrukpxEL9g/UlWfnHQ9raiqB4HP4udDw3oB8Mok/w18FDgvyT9MtqThGe46rpIEuBI4WFXvmXQ9q12SqSSnds+fBPw28PXJVrU6VdVlVbWpqjYDFwGfqarXTrisoRnuS5BkH/A54JlJZpPsnHRNq9gLgNfRGxV9qXu8bNJFrWKnA/uTfAX4PL0591V9CJ/Gw8sPSFKDHLlLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSg/wMRdz+k+y/ZTQAAAABJRU5ErkJggg==\n", 989 | "text/plain": [ 990 | "
" 991 | ] 992 | }, 993 | "metadata": { 994 | "needs_background": "light" 995 | }, 996 | "output_type": "display_data" 997 | } 998 | ], 999 | "source": [ 1000 | "plt.boxplot(box_plot_data)" 1001 | ] 1002 | }, 1003 | { 1004 | "cell_type": "code", 1005 | "execution_count": 117, 1006 | "metadata": {}, 1007 | "outputs": [], 1008 | "source": [ 1009 | "plt.show()" 1010 | ] 1011 | }, 1012 | { 1013 | "cell_type": "code", 1014 | "execution_count": 118, 1015 | "metadata": {}, 1016 | "outputs": [ 1017 | { 1018 | "data": { 1019 | "text/plain": [ 1020 | "0.6795458900175544" 1021 | ] 1022 | }, 1023 | "execution_count": 118, 1024 | "metadata": {}, 1025 | "output_type": "execute_result" 1026 | } 1027 | ], 1028 | "source": [ 1029 | "#p value\n", 1030 | "1-scipy.stats.f.cdf(0.6,4,5)" 1031 | ] 1032 | }, 1033 | { 1034 | "cell_type": "code", 1035 | "execution_count": 119, 1036 | "metadata": {}, 1037 | "outputs": [ 1038 | { 1039 | "data": { 1040 | "text/plain": [ 1041 | "11.39192807134976" 1042 | ] 1043 | }, 1044 | "execution_count": 119, 1045 | "metadata": {}, 1046 | "output_type": "execute_result" 1047 | } 1048 | ], 1049 | "source": [ 1050 | "#f value\n", 1051 | "scipy.stats.f.ppf(1-0.01,4,5)" 1052 | ] 1053 | }, 1054 | { 1055 | "cell_type": "code", 1056 | "execution_count": 120, 1057 | "metadata": {}, 1058 | "outputs": [ 1059 | { 1060 | "data": { 1061 | "text/plain": [ 1062 | "F_onewayResult(statistic=1.2265003217482984, pvalue=0.32610743788671676)" 1063 | ] 1064 | }, 1065 | "execution_count": 120, 1066 | "metadata": {}, 1067 | "output_type": "execute_result" 1068 | } 1069 | ], 1070 | "source": [ 1071 | "scipy.stats.f_oneway(fivep,tenp,fifteenp,twentyp)\n", 1072 | "#gives f and p value" 1073 | ] 1074 | }, 1075 | { 1076 | "cell_type": "code", 1077 | "execution_count": 121, 1078 | "metadata": {}, 1079 | "outputs": [ 1080 | { 1081 | "data": { 1082 | "text/plain": [ 1083 | "3.599599239012541e-06" 1084 | ] 1085 | }, 1086 | "execution_count": 121, 1087 | "metadata": {}, 1088 | "output_type": "execute_result" 1089 | } 1090 | ], 1091 | "source": [ 1092 | "1-scipy.stats.f.cdf(19.6,3,20)" 1093 | ] 1094 | }, 1095 | { 1096 | "cell_type": "code", 1097 | "execution_count": 123, 1098 | "metadata": {}, 1099 | "outputs": [ 1100 | { 1101 | "data": { 1102 | "text/plain": [ 1103 | "4.938193382310539" 1104 | ] 1105 | }, 1106 | "execution_count": 123, 1107 | "metadata": {}, 1108 | "output_type": "execute_result" 1109 | } 1110 | ], 1111 | "source": [ 1112 | "scipy.stats.f.ppf(1-0.01,dfn=3,dfd=20)" 1113 | ] 1114 | }, 1115 | { 1116 | "cell_type": "code", 1117 | "execution_count": 124, 1118 | "metadata": {}, 1119 | "outputs": [], 1120 | "source": [ 1121 | "data=pd.read_excel('C:/Users/Garima Singh/Desktop/mooc data analysis/Tensile strength of paper.xlsx')" 1122 | ] 1123 | }, 1124 | { 1125 | "cell_type": "code", 1126 | "execution_count": 129, 1127 | "metadata": {}, 1128 | "outputs": [ 1129 | { 1130 | "data": { 1131 | "text/html": [ 1132 | "
\n", 1133 | "\n", 1146 | "\n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | "
hardwood concentration 5%hardwood concentration 10%hardwood concentration 15%hardwood concentration 20%
07121419
18171825
215131922
311181723
49191618
510151820
\n", 1201 | "
" 1202 | ], 1203 | "text/plain": [ 1204 | " hardwood concentration 5% hardwood concentration 10% \\\n", 1205 | "0 7 12 \n", 1206 | "1 8 17 \n", 1207 | "2 15 13 \n", 1208 | "3 11 18 \n", 1209 | "4 9 19 \n", 1210 | "5 10 15 \n", 1211 | "\n", 1212 | " hardwood concentration 15% hardwood concentration 20% \n", 1213 | "0 14 19 \n", 1214 | "1 18 25 \n", 1215 | "2 19 22 \n", 1216 | "3 17 23 \n", 1217 | "4 16 18 \n", 1218 | "5 18 20 " 1219 | ] 1220 | }, 1221 | "execution_count": 129, 1222 | "metadata": {}, 1223 | "output_type": "execute_result" 1224 | } 1225 | ], 1226 | "source": [ 1227 | "data" 1228 | ] 1229 | }, 1230 | { 1231 | "cell_type": "code", 1232 | "execution_count": 126, 1233 | "metadata": {}, 1234 | "outputs": [ 1235 | { 1236 | "ename": "KeyError", 1237 | "evalue": "\"The following 'id_vars' are not present in the DataFrame: ['index']\"", 1238 | "output_type": "error", 1239 | "traceback": [ 1240 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 1241 | "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", 1242 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdatanew\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmelt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mid_vars\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'index'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue_vars\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Case Presentation'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'PPT'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mdatanew\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 1243 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\reshape\\melt.py\u001b[0m in \u001b[0;36mmelt\u001b[1;34m(frame, id_vars, value_vars, var_name, value_name, col_level)\u001b[0m\n\u001b[0;32m 50\u001b[0m \u001b[1;34m\"The following 'id_vars' are not present\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;34m\" in the DataFrame: {missing}\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 52\u001b[1;33m \u001b[1;34m\"\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmissing\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmissing\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 53\u001b[0m )\n\u001b[0;32m 54\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 1244 | "\u001b[1;31mKeyError\u001b[0m: \"The following 'id_vars' are not present in the DataFrame: ['index']\"" 1245 | ] 1246 | } 1247 | ], 1248 | "source": [ 1249 | "datanew=pd.melt(data, id_vars=['index'], value_vars=['Case Presentation','PPT'])\n", 1250 | "datanew" 1251 | ] 1252 | }, 1253 | { 1254 | "cell_type": "code", 1255 | "execution_count": 138, 1256 | "metadata": {}, 1257 | "outputs": [], 1258 | "source": [ 1259 | "#fishers lsd test\n", 1260 | "t=scipy.stats.t.ppf(0.05,20)" 1261 | ] 1262 | }, 1263 | { 1264 | "cell_type": "code", 1265 | "execution_count": 139, 1266 | "metadata": {}, 1267 | "outputs": [ 1268 | { 1269 | "data": { 1270 | "text/plain": [ 1271 | "-1.7247182429207863" 1272 | ] 1273 | }, 1274 | "execution_count": 139, 1275 | "metadata": {}, 1276 | "output_type": "execute_result" 1277 | } 1278 | ], 1279 | "source": [ 1280 | "t" 1281 | ] 1282 | }, 1283 | { 1284 | "cell_type": "code", 1285 | "execution_count": 140, 1286 | "metadata": {}, 1287 | "outputs": [], 1288 | "source": [ 1289 | "t=t*-1" 1290 | ] 1291 | }, 1292 | { 1293 | "cell_type": "code", 1294 | "execution_count": 141, 1295 | "metadata": {}, 1296 | "outputs": [ 1297 | { 1298 | "data": { 1299 | "text/plain": [ 1300 | "1.7247182429207863" 1301 | ] 1302 | }, 1303 | "execution_count": 141, 1304 | "metadata": {}, 1305 | "output_type": "execute_result" 1306 | } 1307 | ], 1308 | "source": [ 1309 | "t\n" 1310 | ] 1311 | }, 1312 | { 1313 | "cell_type": "code", 1314 | "execution_count": 142, 1315 | "metadata": {}, 1316 | "outputs": [], 1317 | "source": [ 1318 | "n=6" 1319 | ] 1320 | }, 1321 | { 1322 | "cell_type": "code", 1323 | "execution_count": 143, 1324 | "metadata": {}, 1325 | "outputs": [], 1326 | "source": [ 1327 | "MSE=6.50833" 1328 | ] 1329 | }, 1330 | { 1331 | "cell_type": "code", 1332 | "execution_count": 144, 1333 | "metadata": {}, 1334 | "outputs": [ 1335 | { 1336 | "data": { 1337 | "text/plain": [ 1338 | "2.540342724459959" 1339 | ] 1340 | }, 1341 | "execution_count": 144, 1342 | "metadata": {}, 1343 | "output_type": "execute_result" 1344 | } 1345 | ], 1346 | "source": [ 1347 | "lsd=t*math.sqrt(2*MSE/n)\n", 1348 | "lsd" 1349 | ] 1350 | }, 1351 | { 1352 | "cell_type": "code", 1353 | "execution_count": 146, 1354 | "metadata": {}, 1355 | "outputs": [ 1356 | { 1357 | "data": { 1358 | "text/html": [ 1359 | "\n", 1360 | "\n", 1361 | "\n", 1362 | " \n", 1363 | "\n", 1364 | "\n", 1365 | " \n", 1366 | "\n", 1367 | "
Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj lower upper reject
Case Presentation PPT -2.0 0.1963 -5.5844 1.5844 False
" 1368 | ], 1369 | "text/plain": [ 1370 | "" 1371 | ] 1372 | }, 1373 | "execution_count": 146, 1374 | "metadata": {}, 1375 | "output_type": "execute_result" 1376 | } 1377 | ], 1378 | "source": [ 1379 | "#tukey krammer test\n", 1380 | "from statsmodels.stats.multicomp import pairwise_tukeyhsd\n", 1381 | "from statsmodels.stats.multicomp import MultiComparison\n", 1382 | "mc=MultiComparison(datanew['value'],datanew['treatments'])\n", 1383 | "mcresult=mc.tukeyhsd(0.05)\n", 1384 | "mcresult.summary()\n" 1385 | ] 1386 | }, 1387 | { 1388 | "cell_type": "code", 1389 | "execution_count": null, 1390 | "metadata": {}, 1391 | "outputs": [], 1392 | "source": [] 1393 | } 1394 | ], 1395 | "metadata": { 1396 | "kernelspec": { 1397 | "display_name": "Python 3", 1398 | "language": "python", 1399 | "name": "python3" 1400 | }, 1401 | "language_info": { 1402 | "codemirror_mode": { 1403 | "name": "ipython", 1404 | "version": 3 1405 | }, 1406 | "file_extension": ".py", 1407 | "mimetype": "text/x-python", 1408 | "name": "python", 1409 | "nbconvert_exporter": "python", 1410 | "pygments_lexer": "ipython3", 1411 | "version": "3.7.4" 1412 | } 1413 | }, 1414 | "nbformat": 4, 1415 | "nbformat_minor": 4 1416 | } 1417 | --------------------------------------------------------------------------------