├── mooc data analysis
├── MLE.xlsx
├── MPG.xlsx
├── RBD.xlsx
├── lrm.xlsx
├── Tyler.xlsx
├── _RBD.xlsx
├── acad.xlsx
├── anova.xlsx
├── binpdf.gif
├── dummy.xlsx
├── rbd2.xlsx
├── reg2.xlsx
├── regr.xlsx
├── HARDNESS.xls
├── Reynolds.xlsx
├── Simmons.xls
├── TRUCKING.xlsx
├── Twoway.xlsx
├── dummy2.xlsx
├── icecream.xlsx
├── oneway.xlsx
├── regcar.xlsx
├── dataLRnew.xlsx
├── IBM-313 Marks.xlsx
├── cotton weight.xlsx
├── P_distribution.xlsx
├── icecream sale data.xlsx
└── Tensile strength of paper.xlsx
├── README.md
├── Week2.ipynb
├── Week3.ipynb
├── Week4.ipynb
├── Week11.ipynb
├── Week8.ipynb
├── Week10.ipynb
└── Week5.ipynb
/mooc data analysis/MLE.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/MLE.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/MPG.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/MPG.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/RBD.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/RBD.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/lrm.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/lrm.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/Tyler.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/Tyler.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/_RBD.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/_RBD.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/acad.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/acad.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/anova.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/anova.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/binpdf.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/binpdf.gif
--------------------------------------------------------------------------------
/mooc data analysis/dummy.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/dummy.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/rbd2.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/rbd2.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/reg2.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/reg2.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/regr.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/regr.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/HARDNESS.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/HARDNESS.xls
--------------------------------------------------------------------------------
/mooc data analysis/Reynolds.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/Reynolds.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/Simmons.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/Simmons.xls
--------------------------------------------------------------------------------
/mooc data analysis/TRUCKING.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/TRUCKING.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/Twoway.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/Twoway.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/dummy2.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/dummy2.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/icecream.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/icecream.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/oneway.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/oneway.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/regcar.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/regcar.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/dataLRnew.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/dataLRnew.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/IBM-313 Marks.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/IBM-313 Marks.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/cotton weight.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/cotton weight.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/P_distribution.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/P_distribution.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/icecream sale data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/icecream sale data.xlsx
--------------------------------------------------------------------------------
/mooc data analysis/Tensile strength of paper.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/garimasingh128/Data-Analytics-with-Python-NPTEL-MOOCS/HEAD/mooc data analysis/Tensile strength of paper.xlsx
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Data-Analysis-with-Python
2 |
3 | ## CODES WRITTEN FOR NPTEL COURSE OF DATA ANALYSIS WITH PYTHON
4 | # COURSE TYPE
5 | Elective
6 | # COURSE LEVEL
7 | Undergraduate/Postgraduate
8 | # COURSE LAYOUT
9 | ## Week 1 : Introduction to data analytics and Python fundamentals
10 | ## Week 2 : Introduction to probability
11 | ## Week 3 : Sampling and sampling distributions
12 | ## Week 4 : Hypothesis testing
13 | ## Week 5 : Two sample testing and introduction to ANOVA
14 | ## Week 6 : Two way ANOVA and linear regression
15 | ## Week 7 : Linear regression and multiple regression
16 | ## Week 8 : Concepts of MLE and Logistic regression
17 | ## Week 9 : ROC and Regression Analysis Model Building
18 | ## Week 10 : c2 Test and introduction to cluster analysis
19 | ## Week 11 : Clustering analysis
20 | ## Week 12 : Classification and Regression Trees (CART)
21 |
22 |
--------------------------------------------------------------------------------
/Week2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import scipy\n",
10 | "import numpy as np\n"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "from scipy.stats import binom"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 3,
25 | "metadata": {},
26 | "outputs": [
27 | {
28 | "name": "stderr",
29 | "output_type": "stream",
30 | "text": [
31 | "'[SegmentLocal]' is not recognized as an internal or external command,\n",
32 | "operable program or batch file.\n"
33 | ]
34 | }
35 | ],
36 | "source": [
37 | ""
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 5,
43 | "metadata": {},
44 | "outputs": [
45 | {
46 | "name": "stderr",
47 | "output_type": "stream",
48 | "text": [
49 | "'[]' is not recognized as an internal or external command,\n",
50 | "operable program or batch file.\n"
51 | ]
52 | }
53 | ],
54 | "source": [
55 | ""
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": 6,
61 | "metadata": {},
62 | "outputs": [
63 | {
64 | "name": "stderr",
65 | "output_type": "stream",
66 | "text": [
67 | "'[ChessUrl]' is not recognized as an internal or external command,\n",
68 | "operable program or batch file.\n"
69 | ]
70 | }
71 | ],
72 | "source": [
73 | ""
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 7,
79 | "metadata": {},
80 | "outputs": [
81 | {
82 | "name": "stdout",
83 | "output_type": "stream",
84 | "text": [
85 | "0.09077799859322791\n"
86 | ]
87 | }
88 | ],
89 | "source": [
90 | "print(binom.pmf(k=19,n=25,p=0.65))"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 11,
96 | "metadata": {},
97 | "outputs": [
98 | {
99 | "name": "stdout",
100 | "output_type": "stream",
101 | "text": [
102 | "0.8850275957378545\n"
103 | ]
104 | }
105 | ],
106 | "source": [
107 | "print(binom.cdf(k=2,n=20,p=0.06))"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": null,
113 | "metadata": {},
114 | "outputs": [],
115 | "source": []
116 | }
117 | ],
118 | "metadata": {
119 | "kernelspec": {
120 | "display_name": "Python 3",
121 | "language": "python",
122 | "name": "python3"
123 | },
124 | "language_info": {
125 | "codemirror_mode": {
126 | "name": "ipython",
127 | "version": 3
128 | },
129 | "file_extension": ".py",
130 | "mimetype": "text/x-python",
131 | "name": "python",
132 | "nbconvert_exporter": "python",
133 | "pygments_lexer": "ipython3",
134 | "version": "3.7.4"
135 | }
136 | },
137 | "nbformat": 4,
138 | "nbformat_minor": 2
139 | }
140 |
--------------------------------------------------------------------------------
/Week3.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "from scipy.stats import binom"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 4,
18 | "metadata": {},
19 | "outputs": [
20 | {
21 | "name": "stdout",
22 | "output_type": "stream",
23 | "text": [
24 | "0.09077799859322791\n"
25 | ]
26 | }
27 | ],
28 | "source": [
29 | "print(binom.pmf(k=19,n=25,p=0.65))"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 5,
35 | "metadata": {},
36 | "outputs": [
37 | {
38 | "name": "stdout",
39 | "output_type": "stream",
40 | "text": [
41 | "5.276857914295109e-07\n"
42 | ]
43 | }
44 | ],
45 | "source": [
46 | "print(binom.cdf(k=2,n=20,p=0.65))"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 6,
52 | "metadata": {},
53 | "outputs": [],
54 | "source": [
55 | "from scipy.stats import poisson"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": 7,
61 | "metadata": {},
62 | "outputs": [
63 | {
64 | "data": {
65 | "text/plain": [
66 | "0.18044704431548356"
67 | ]
68 | },
69 | "execution_count": 7,
70 | "metadata": {},
71 | "output_type": "execute_result"
72 | }
73 | ],
74 | "source": [
75 | "poisson.pmf(3,2)"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": 8,
81 | "metadata": {},
82 | "outputs": [
83 | {
84 | "data": {
85 | "text/plain": [
86 | "0.10081881344492458"
87 | ]
88 | },
89 | "execution_count": 8,
90 | "metadata": {},
91 | "output_type": "execute_result"
92 | }
93 | ],
94 | "source": [
95 | "poisson.pmf(7,3,2)"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 9,
101 | "metadata": {},
102 | "outputs": [
103 | {
104 | "data": {
105 | "text/plain": [
106 | "0.9831701582510425"
107 | ]
108 | },
109 | "execution_count": 9,
110 | "metadata": {},
111 | "output_type": "execute_result"
112 | }
113 | ],
114 | "source": [
115 | "poisson.cdf(7,3.2)"
116 | ]
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": 10,
121 | "metadata": {},
122 | "outputs": [
123 | {
124 | "data": {
125 | "text/plain": [
126 | "array([27, 30, 33, 36, 39])"
127 | ]
128 | },
129 | "execution_count": 10,
130 | "metadata": {},
131 | "output_type": "execute_result"
132 | }
133 | ],
134 | "source": [
135 | "np.arange(27,40,3)"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": 11,
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "from scipy.stats import uniform"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": 12,
150 | "metadata": {},
151 | "outputs": [
152 | {
153 | "data": {
154 | "text/plain": [
155 | "33.0"
156 | ]
157 | },
158 | "execution_count": 12,
159 | "metadata": {},
160 | "output_type": "execute_result"
161 | }
162 | ],
163 | "source": [
164 | "uniform.mean(loc=27,scale=12)"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": 15,
170 | "metadata": {},
171 | "outputs": [
172 | {
173 | "data": {
174 | "text/plain": [
175 | "array([0.25 , 0.33333333, 0.41666667, 0.5 , 0.58333333,\n",
176 | " 0.66666667])"
177 | ]
178 | },
179 | "execution_count": 15,
180 | "metadata": {},
181 | "output_type": "execute_result"
182 | }
183 | ],
184 | "source": [
185 | "uniform.cdf(np.arange(30,36,1),loc=27,scale=12)"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": 16,
191 | "metadata": {},
192 | "outputs": [
193 | {
194 | "data": {
195 | "text/plain": [
196 | "1.4433756729740643"
197 | ]
198 | },
199 | "execution_count": 16,
200 | "metadata": {},
201 | "output_type": "execute_result"
202 | }
203 | ],
204 | "source": [
205 | "uniform.std(loc=200,scale=5)"
206 | ]
207 | },
208 | {
209 | "cell_type": "code",
210 | "execution_count": 17,
211 | "metadata": {},
212 | "outputs": [],
213 | "source": [
214 | "from scipy.stats import norm"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": 19,
220 | "metadata": {},
221 | "outputs": [
222 | {
223 | "data": {
224 | "text/plain": [
225 | "0.8413447460685429"
226 | ]
227 | },
228 | "execution_count": 19,
229 | "metadata": {},
230 | "output_type": "execute_result"
231 | }
232 | ],
233 | "source": [
234 | "val=68\n",
235 | "mean=65.5\n",
236 | "sd=2.5\n",
237 | "norm.cdf(val,mean,sd)"
238 | ]
239 | },
240 | {
241 | "cell_type": "markdown",
242 | "metadata": {},
243 | "source": [
244 | "norm.cdf(68,m,sd)-norm.cdf(63,m,sd)"
245 | ]
246 | },
247 | {
248 | "cell_type": "code",
249 | "execution_count": 21,
250 | "metadata": {},
251 | "outputs": [
252 | {
253 | "data": {
254 | "text/plain": [
255 | "0.6826894921370859"
256 | ]
257 | },
258 | "execution_count": 21,
259 | "metadata": {},
260 | "output_type": "execute_result"
261 | }
262 | ],
263 | "source": [
264 | "norm.cdf(68,mean,sd)-norm.cdf(63,mean,sd)"
265 | ]
266 | },
267 | {
268 | "cell_type": "code",
269 | "execution_count": 22,
270 | "metadata": {},
271 | "outputs": [
272 | {
273 | "data": {
274 | "text/plain": [
275 | "1.6448536269514722"
276 | ]
277 | },
278 | "execution_count": 22,
279 | "metadata": {},
280 | "output_type": "execute_result"
281 | }
282 | ],
283 | "source": [
284 | "norm.ppf(0.95)"
285 | ]
286 | },
287 | {
288 | "cell_type": "code",
289 | "execution_count": 23,
290 | "metadata": {},
291 | "outputs": [
292 | {
293 | "data": {
294 | "text/plain": [
295 | "-1.6448536269514722"
296 | ]
297 | },
298 | "execution_count": 23,
299 | "metadata": {},
300 | "output_type": "execute_result"
301 | }
302 | ],
303 | "source": [
304 | "norm.ppf(1-0.95)"
305 | ]
306 | },
307 | {
308 | "cell_type": "code",
309 | "execution_count": 24,
310 | "metadata": {},
311 | "outputs": [],
312 | "source": [
313 | "from scipy.stats import hypergeom"
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "execution_count": 25,
319 | "metadata": {},
320 | "outputs": [],
321 | "source": [
322 | "pval=hypergeom.sf(0,18,3,12)"
323 | ]
324 | },
325 | {
326 | "cell_type": "code",
327 | "execution_count": 26,
328 | "metadata": {},
329 | "outputs": [
330 | {
331 | "data": {
332 | "text/plain": [
333 | "0.9754901960784306"
334 | ]
335 | },
336 | "execution_count": 26,
337 | "metadata": {},
338 | "output_type": "execute_result"
339 | }
340 | ],
341 | "source": [
342 | "pval"
343 | ]
344 | },
345 | {
346 | "cell_type": "code",
347 | "execution_count": 27,
348 | "metadata": {},
349 | "outputs": [],
350 | "source": [
351 | "pval=hypergeom.cdf(1,18,5,11)"
352 | ]
353 | },
354 | {
355 | "cell_type": "code",
356 | "execution_count": 28,
357 | "metadata": {},
358 | "outputs": [
359 | {
360 | "data": {
361 | "text/plain": [
362 | "0.04738562091503275"
363 | ]
364 | },
365 | "execution_count": 28,
366 | "metadata": {},
367 | "output_type": "execute_result"
368 | }
369 | ],
370 | "source": [
371 | "pval"
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": 31,
377 | "metadata": {},
378 | "outputs": [],
379 | "source": [
380 | "from scipy.stats import expon"
381 | ]
382 | },
383 | {
384 | "cell_type": "code",
385 | "execution_count": 32,
386 | "metadata": {},
387 | "outputs": [
388 | {
389 | "data": {
390 | "text/plain": [
391 | "0.8466450331550716"
392 | ]
393 | },
394 | "execution_count": 32,
395 | "metadata": {},
396 | "output_type": "execute_result"
397 | }
398 | ],
399 | "source": [
400 | "expon.cdf(0.75,0,0.4)"
401 | ]
402 | },
403 | {
404 | "cell_type": "code",
405 | "execution_count": null,
406 | "metadata": {},
407 | "outputs": [],
408 | "source": []
409 | }
410 | ],
411 | "metadata": {
412 | "kernelspec": {
413 | "display_name": "Python 3",
414 | "language": "python",
415 | "name": "python3"
416 | },
417 | "language_info": {
418 | "codemirror_mode": {
419 | "name": "ipython",
420 | "version": 3
421 | },
422 | "file_extension": ".py",
423 | "mimetype": "text/x-python",
424 | "name": "python",
425 | "nbconvert_exporter": "python",
426 | "pygments_lexer": "ipython3",
427 | "version": "3.7.4"
428 | }
429 | },
430 | "nbformat": 4,
431 | "nbformat_minor": 2
432 | }
433 |
--------------------------------------------------------------------------------
/Week4.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 3,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from scipy import stats"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 4,
15 | "metadata": {},
16 | "outputs": [
17 | {
18 | "data": {
19 | "text/plain": [
20 | "0.07214503696589378"
21 | ]
22 | },
23 | "execution_count": 4,
24 | "metadata": {},
25 | "output_type": "execute_result"
26 | }
27 | ],
28 | "source": [
29 | "stats.norm.cdf(-1.46)"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 5,
35 | "metadata": {},
36 | "outputs": [
37 | {
38 | "data": {
39 | "text/plain": [
40 | "0.9750021048517795"
41 | ]
42 | },
43 | "execution_count": 5,
44 | "metadata": {},
45 | "output_type": "execute_result"
46 | }
47 | ],
48 | "source": [
49 | "stats.norm.cdf(1.96)"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 6,
55 | "metadata": {},
56 | "outputs": [
57 | {
58 | "data": {
59 | "text/plain": [
60 | "-1.2815515655446004"
61 | ]
62 | },
63 | "execution_count": 6,
64 | "metadata": {},
65 | "output_type": "execute_result"
66 | }
67 | ],
68 | "source": [
69 | "stats.norm.ppf(0.1)"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 7,
75 | "metadata": {},
76 | "outputs": [
77 | {
78 | "data": {
79 | "text/plain": [
80 | "0.011010658324411393"
81 | ]
82 | },
83 | "execution_count": 7,
84 | "metadata": {},
85 | "output_type": "execute_result"
86 | }
87 | ],
88 | "source": [
89 | "1-stats.norm.cdf(2.29)"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": 8,
95 | "metadata": {},
96 | "outputs": [
97 | {
98 | "data": {
99 | "text/plain": [
100 | "0.040059156863817114"
101 | ]
102 | },
103 | "execution_count": 8,
104 | "metadata": {},
105 | "output_type": "execute_result"
106 | }
107 | ],
108 | "source": [
109 | "1-stats.norm.cdf(1.75)"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": 9,
115 | "metadata": {},
116 | "outputs": [
117 | {
118 | "data": {
119 | "text/plain": [
120 | "-1.2815515655446004"
121 | ]
122 | },
123 | "execution_count": 9,
124 | "metadata": {},
125 | "output_type": "execute_result"
126 | }
127 | ],
128 | "source": [
129 | "stats.norm.ppf(0.1)"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 10,
135 | "metadata": {},
136 | "outputs": [
137 | {
138 | "data": {
139 | "text/plain": [
140 | "1.2815515655446004"
141 | ]
142 | },
143 | "execution_count": 10,
144 | "metadata": {},
145 | "output_type": "execute_result"
146 | }
147 | ],
148 | "source": [
149 | "stats.norm.ppf(1-0.1)"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": 11,
155 | "metadata": {},
156 | "outputs": [],
157 | "source": [
158 | "#t test\n",
159 | "import numpy as np"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": 12,
165 | "metadata": {},
166 | "outputs": [],
167 | "source": [
168 | "x=[10,12,20,21,22,24,18,15]"
169 | ]
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": 13,
174 | "metadata": {},
175 | "outputs": [
176 | {
177 | "data": {
178 | "text/plain": [
179 | "Ttest_1sampResult(statistic=1.5623450931857947, pvalue=0.1621787560592894)"
180 | ]
181 | },
182 | "execution_count": 13,
183 | "metadata": {},
184 | "output_type": "execute_result"
185 | }
186 | ],
187 | "source": [
188 | "stats.ttest_1samp(x,15)"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": 15,
194 | "metadata": {},
195 | "outputs": [
196 | {
197 | "data": {
198 | "text/plain": [
199 | "0.90311273450826"
200 | ]
201 | },
202 | "execution_count": 15,
203 | "metadata": {},
204 | "output_type": "execute_result"
205 | }
206 | ],
207 | "source": [
208 | "stats.t.cdf(1.56,4) #4 is dof"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": 16,
214 | "metadata": {},
215 | "outputs": [],
216 | "source": [
217 | "from statsmodels.stats.proportion import proportions_ztest"
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": 19,
223 | "metadata": {},
224 | "outputs": [
225 | {
226 | "data": {
227 | "text/plain": [
228 | "(1.286806739751111, 0.1981616572238455)"
229 | ]
230 | },
231 | "execution_count": 19,
232 | "metadata": {},
233 | "output_type": "execute_result"
234 | }
235 | ],
236 | "source": [
237 | "proportions_ztest(67,120,0.5) #o/p is z value and p value"
238 | ]
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": 20,
243 | "metadata": {},
244 | "outputs": [],
245 | "source": [
246 | "def z_value(x,mu,sem):\n",
247 | " z=(x-mu)/sem\n",
248 | " if(z<0):\n",
249 | " alfa=stats.norm.cdf(z)\n",
250 | " else:\n",
251 | " alfa=1-stats.norm.cdf(z)\n",
252 | " print(alfa)"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 21,
258 | "metadata": {},
259 | "outputs": [],
260 | "source": [
261 | "x=48.5\n",
262 | "mu=50\n",
263 | "sem=0.79"
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "execution_count": 22,
269 | "metadata": {},
270 | "outputs": [
271 | {
272 | "name": "stdout",
273 | "output_type": "stream",
274 | "text": [
275 | "0.02879971774715278\n"
276 | ]
277 | }
278 | ],
279 | "source": [
280 | "z_value(x,mu,sem) #type 1 error or alfa/2"
281 | ]
282 | },
283 | {
284 | "cell_type": "code",
285 | "execution_count": 23,
286 | "metadata": {},
287 | "outputs": [
288 | {
289 | "data": {
290 | "text/plain": [
291 | "0.26339575390741593"
292 | ]
293 | },
294 | "execution_count": 23,
295 | "metadata": {},
296 | "output_type": "execute_result"
297 | }
298 | ],
299 | "source": [
300 | "#type 2 error or beta\n",
301 | "beta=stats.norm.cdf((51.5-52)/0.79)\n",
302 | "beta"
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": 25,
308 | "metadata": {},
309 | "outputs": [
310 | {
311 | "data": {
312 | "text/plain": [
313 | "0.8972117321157791"
314 | ]
315 | },
316 | "execution_count": 25,
317 | "metadata": {},
318 | "output_type": "execute_result"
319 | }
320 | ],
321 | "source": [
322 | "beta=stats.norm.cdf((51.5-50.5)/0.79)\n",
323 | "beta\n",
324 | "# myu is 50.5"
325 | ]
326 | },
327 | {
328 | "cell_type": "code",
329 | "execution_count": 26,
330 | "metadata": {},
331 | "outputs": [],
332 | "source": [
333 | "b=[89.19,90,95,90.46,93.21,97.19,97.04,91.07,92.75]"
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": 27,
339 | "metadata": {},
340 | "outputs": [],
341 | "source": [
342 | "a=[91.5,94.18,92.18,95.39,91.79,89.07,94.72,89.21]"
343 | ]
344 | },
345 | {
346 | "cell_type": "code",
347 | "execution_count": 28,
348 | "metadata": {},
349 | "outputs": [
350 | {
351 | "data": {
352 | "text/plain": [
353 | "Ttest_indResult(statistic=-0.4712203461464123, pvalue=0.6442636980321892)"
354 | ]
355 | },
356 | "execution_count": 28,
357 | "metadata": {},
358 | "output_type": "execute_result"
359 | }
360 | ],
361 | "source": [
362 | "stats.ttest_ind(a,b, equal_var=True)"
363 | ]
364 | },
365 | {
366 | "cell_type": "code",
367 | "execution_count": 29,
368 | "metadata": {},
369 | "outputs": [
370 | {
371 | "data": {
372 | "text/plain": [
373 | "-2.1447866879169277"
374 | ]
375 | },
376 | "execution_count": 29,
377 | "metadata": {},
378 | "output_type": "execute_result"
379 | }
380 | ],
381 | "source": [
382 | "stats.t.ppf(0.025,14) #critical t value"
383 | ]
384 | },
385 | {
386 | "cell_type": "code",
387 | "execution_count": 30,
388 | "metadata": {},
389 | "outputs": [],
390 | "source": [
391 | "#sigma 1 and sigma 2 known\n",
392 | "import pandas as pd\n",
393 | "import numpy as np\n",
394 | "import math\n",
395 | "from scipy import stats"
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": null,
401 | "metadata": {},
402 | "outputs": [],
403 | "source": [
404 | "def zandp(x1,x2,sigma1,sigma2,n1,n2):\n",
405 | " z=(x1-x2)/(math.sqrt(((sigma**2)/n1)+((sigma2**2))))"
406 | ]
407 | }
408 | ],
409 | "metadata": {
410 | "kernelspec": {
411 | "display_name": "Python 3",
412 | "language": "python",
413 | "name": "python3"
414 | },
415 | "language_info": {
416 | "codemirror_mode": {
417 | "name": "ipython",
418 | "version": 3
419 | },
420 | "file_extension": ".py",
421 | "mimetype": "text/x-python",
422 | "name": "python",
423 | "nbconvert_exporter": "python",
424 | "pygments_lexer": "ipython3",
425 | "version": "3.7.4"
426 | }
427 | },
428 | "nbformat": 4,
429 | "nbformat_minor": 2
430 | }
431 |
--------------------------------------------------------------------------------
/Week11.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 4,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import scipy\n",
10 | "from scipy.spatial import distance"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 5,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "\n",
20 | "import numpy as np\n"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 6,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "data": {
30 | "text/plain": [
31 | "5.196152422706632"
32 | ]
33 | },
34 | "execution_count": 6,
35 | "metadata": {},
36 | "output_type": "execute_result"
37 | }
38 | ],
39 | "source": [
40 | "\n",
41 | "a=[1,2,3]\n",
42 | "b=[4,5,6]\n",
43 | "dist=distance.euclidean(a,b)\n",
44 | "dist"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 7,
50 | "metadata": {},
51 | "outputs": [
52 | {
53 | "data": {
54 | "text/plain": [
55 | "2.0"
56 | ]
57 | },
58 | "execution_count": 7,
59 | "metadata": {},
60 | "output_type": "execute_result"
61 | }
62 | ],
63 | "source": [
64 | "distance.minkowski([1,0,0],[0,1,0],1) #manhattan"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": 8,
70 | "metadata": {},
71 | "outputs": [
72 | {
73 | "data": {
74 | "text/plain": [
75 | "1.4142135623730951"
76 | ]
77 | },
78 | "execution_count": 8,
79 | "metadata": {},
80 | "output_type": "execute_result"
81 | }
82 | ],
83 | "source": [
84 | "distance.minkowski([1,0,0],[0,1,0],2) #euclidean"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 9,
90 | "metadata": {},
91 | "outputs": [
92 | {
93 | "data": {
94 | "text/plain": [
95 | "1.2599210498948732"
96 | ]
97 | },
98 | "execution_count": 9,
99 | "metadata": {},
100 | "output_type": "execute_result"
101 | }
102 | ],
103 | "source": [
104 | "distance.minkowski([1,0,0],[0,1,0],3) #minkowski"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 10,
110 | "metadata": {},
111 | "outputs": [],
112 | "source": [
113 | "import pandas as pd\n",
114 | "from scipy.spatial import distance_matrix"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": 11,
120 | "metadata": {},
121 | "outputs": [
122 | {
123 | "data": {
124 | "text/html": [
125 | "
\n",
126 | "\n",
139 | "
\n",
140 | " \n",
141 | " \n",
142 | " | \n",
143 | " a | \n",
144 | " b | \n",
145 | "
\n",
146 | " \n",
147 | " \n",
148 | " \n",
149 | " | 0 | \n",
150 | " 1 | \n",
151 | " 4 | \n",
152 | "
\n",
153 | " \n",
154 | " | 1 | \n",
155 | " 2 | \n",
156 | " 5 | \n",
157 | "
\n",
158 | " \n",
159 | " | 2 | \n",
160 | " 3 | \n",
161 | " 6 | \n",
162 | "
\n",
163 | " \n",
164 | "
\n",
165 | "
"
166 | ],
167 | "text/plain": [
168 | " a b\n",
169 | "0 1 4\n",
170 | "1 2 5\n",
171 | "2 3 6"
172 | ]
173 | },
174 | "execution_count": 11,
175 | "metadata": {},
176 | "output_type": "execute_result"
177 | }
178 | ],
179 | "source": [
180 | "data=[[1,4],[2,5],[3,6]]\n",
181 | "df=pd.DataFrame(data,columns=['a','b'])\n",
182 | "df"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 12,
188 | "metadata": {},
189 | "outputs": [
190 | {
191 | "data": {
192 | "text/html": [
193 | "\n",
194 | "\n",
207 | "
\n",
208 | " \n",
209 | " \n",
210 | " | \n",
211 | " 0 | \n",
212 | " 1 | \n",
213 | " 2 | \n",
214 | "
\n",
215 | " \n",
216 | " \n",
217 | " \n",
218 | " | 0 | \n",
219 | " 0.000000 | \n",
220 | " 1.414214 | \n",
221 | " 2.828427 | \n",
222 | "
\n",
223 | " \n",
224 | " | 1 | \n",
225 | " 1.414214 | \n",
226 | " 0.000000 | \n",
227 | " 1.414214 | \n",
228 | "
\n",
229 | " \n",
230 | " | 2 | \n",
231 | " 2.828427 | \n",
232 | " 1.414214 | \n",
233 | " 0.000000 | \n",
234 | "
\n",
235 | " \n",
236 | "
\n",
237 | "
"
238 | ],
239 | "text/plain": [
240 | " 0 1 2\n",
241 | "0 0.000000 1.414214 2.828427\n",
242 | "1 1.414214 0.000000 1.414214\n",
243 | "2 2.828427 1.414214 0.000000"
244 | ]
245 | },
246 | "execution_count": 12,
247 | "metadata": {},
248 | "output_type": "execute_result"
249 | }
250 | ],
251 | "source": [
252 | "pd.DataFrame(distance_matrix(df.values,df.values))"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 13,
258 | "metadata": {},
259 | "outputs": [
260 | {
261 | "data": {
262 | "text/html": [
263 | "\n",
264 | "\n",
277 | "
\n",
278 | " \n",
279 | " \n",
280 | " | \n",
281 | " wt | \n",
282 | " ht | \n",
283 | "
\n",
284 | " \n",
285 | " \n",
286 | " \n",
287 | " | a | \n",
288 | " 1 | \n",
289 | " 4 | \n",
290 | "
\n",
291 | " \n",
292 | " | b | \n",
293 | " 2 | \n",
294 | " 5 | \n",
295 | "
\n",
296 | " \n",
297 | " | c | \n",
298 | " 3 | \n",
299 | " 6 | \n",
300 | "
\n",
301 | " \n",
302 | "
\n",
303 | "
"
304 | ],
305 | "text/plain": [
306 | " wt ht\n",
307 | "a 1 4\n",
308 | "b 2 5\n",
309 | "c 3 6"
310 | ]
311 | },
312 | "execution_count": 13,
313 | "metadata": {},
314 | "output_type": "execute_result"
315 | }
316 | ],
317 | "source": [
318 | "data=[[1,4],[2,5],[3,6]]\n",
319 | "ctys=['a','b','c']\n",
320 | "df=pd.DataFrame(data,columns=['wt','ht'],index=ctys)\n",
321 | "df"
322 | ]
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": 14,
327 | "metadata": {},
328 | "outputs": [],
329 | "source": [
330 | "dist_mat=pd.DataFrame(distance_matrix(df.values,df.values),index=df.index,columns=df.index)"
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 15,
336 | "metadata": {},
337 | "outputs": [
338 | {
339 | "data": {
340 | "text/html": [
341 | "\n",
342 | "\n",
355 | "
\n",
356 | " \n",
357 | " \n",
358 | " | \n",
359 | " a | \n",
360 | " b | \n",
361 | " c | \n",
362 | "
\n",
363 | " \n",
364 | " \n",
365 | " \n",
366 | " | a | \n",
367 | " 0.000000 | \n",
368 | " 1.414214 | \n",
369 | " 2.828427 | \n",
370 | "
\n",
371 | " \n",
372 | " | b | \n",
373 | " 1.414214 | \n",
374 | " 0.000000 | \n",
375 | " 1.414214 | \n",
376 | "
\n",
377 | " \n",
378 | " | c | \n",
379 | " 2.828427 | \n",
380 | " 1.414214 | \n",
381 | " 0.000000 | \n",
382 | "
\n",
383 | " \n",
384 | "
\n",
385 | "
"
386 | ],
387 | "text/plain": [
388 | " a b c\n",
389 | "a 0.000000 1.414214 2.828427\n",
390 | "b 1.414214 0.000000 1.414214\n",
391 | "c 2.828427 1.414214 0.000000"
392 | ]
393 | },
394 | "execution_count": 15,
395 | "metadata": {},
396 | "output_type": "execute_result"
397 | }
398 | ],
399 | "source": [
400 | "dist_mat"
401 | ]
402 | },
403 | {
404 | "cell_type": "code",
405 | "execution_count": 16,
406 | "metadata": {},
407 | "outputs": [
408 | {
409 | "data": {
410 | "text/html": [
411 | "\n",
412 | "\n",
425 | "
\n",
426 | " \n",
427 | " \n",
428 | " | \n",
429 | " a | \n",
430 | " b | \n",
431 | " c | \n",
432 | "
\n",
433 | " \n",
434 | " \n",
435 | " \n",
436 | " | a | \n",
437 | " 0.00 | \n",
438 | " 1.41 | \n",
439 | " 2.83 | \n",
440 | "
\n",
441 | " \n",
442 | " | b | \n",
443 | " 1.41 | \n",
444 | " 0.00 | \n",
445 | " 1.41 | \n",
446 | "
\n",
447 | " \n",
448 | " | c | \n",
449 | " 2.83 | \n",
450 | " 1.41 | \n",
451 | " 0.00 | \n",
452 | "
\n",
453 | " \n",
454 | "
\n",
455 | "
"
456 | ],
457 | "text/plain": [
458 | " a b c\n",
459 | "a 0.00 1.41 2.83\n",
460 | "b 1.41 0.00 1.41\n",
461 | "c 2.83 1.41 0.00"
462 | ]
463 | },
464 | "execution_count": 16,
465 | "metadata": {},
466 | "output_type": "execute_result"
467 | }
468 | ],
469 | "source": [
470 | "dist_mat.round(decimals=2,out=None)"
471 | ]
472 | },
473 | {
474 | "cell_type": "code",
475 | "execution_count": 17,
476 | "metadata": {},
477 | "outputs": [
478 | {
479 | "data": {
480 | "text/plain": [
481 | "KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,\n",
482 | " n_clusters=2, n_init=10, n_jobs=None, precompute_distances='auto',\n",
483 | " random_state=None, tol=0.0001, verbose=0)"
484 | ]
485 | },
486 | "execution_count": 17,
487 | "metadata": {},
488 | "output_type": "execute_result"
489 | }
490 | ],
491 | "source": [
492 | "#k means clustering\n",
493 | "#plot scatter plot\n",
494 | "from sklearn.cluster import KMeans\n",
495 | "kmeans=KMeans(n_clusters=2) #no of clusters 2\n",
496 | "kmeans.fit(data)"
497 | ]
498 | },
499 | {
500 | "cell_type": "code",
501 | "execution_count": 18,
502 | "metadata": {},
503 | "outputs": [
504 | {
505 | "data": {
506 | "text/plain": [
507 | "array([[3. , 6. ],\n",
508 | " [1.5, 4.5]])"
509 | ]
510 | },
511 | "execution_count": 18,
512 | "metadata": {},
513 | "output_type": "execute_result"
514 | }
515 | ],
516 | "source": [
517 | "labels=kmeans.predict(data)\n",
518 | "centroids=kmeans.cluster_centers_\n",
519 | "centroids"
520 | ]
521 | },
522 | {
523 | "cell_type": "code",
524 | "execution_count": 19,
525 | "metadata": {},
526 | "outputs": [
527 | {
528 | "ename": "NameError",
529 | "evalue": "name 'centroid' is not defined",
530 | "output_type": "error",
531 | "traceback": [
532 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
533 | "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
534 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[0mfig\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0max\u001b[0m\u001b[1;33m=\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msubplots\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0max\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mscatter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcolor\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcolors1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0malpha\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.5\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0medgecolor\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'k'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[0midx\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcentroid\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcentroid\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 10\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mscatter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mcentroid\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolor\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcolmap\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0midx\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mtxt\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mn\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
535 | "\u001b[1;31mNameError\u001b[0m: name 'centroid' is not defined"
536 | ]
537 | }
538 | ],
539 | "source": [
540 | "#diagram of clusters\n",
541 | "import matplotlib.pyplot as plt\n",
542 | "fig=plt.figure(figsize= (5,5))\n",
543 | "colmap={1:'r',2:'b'}\n",
544 | "colors=map(lambda x: colmap[x+1], labels)\n",
545 | "colors1=list(colors)\n",
546 | "fig,ax= plt.subplots()\n",
547 | "ax.scatter(a,b,color=colors1,alpha=0.5,edgecolor='k')\n",
548 | "for idx,centroid in enumerate(centroid):\n",
549 | " plt.scatter(*centroid, color=colmap[idx+1])\n",
550 | "for i,txt in enumerate(n):\n",
551 | " ax.annotate(txt,(x[i],y[i]))\n",
552 | "plt.grid()\n",
553 | "plt.xlim(0,5)\n",
554 | "plt.ylim(0,5)\n",
555 | "plt.show()"
556 | ]
557 | },
558 | {
559 | "cell_type": "code",
560 | "execution_count": null,
561 | "metadata": {},
562 | "outputs": [],
563 | "source": []
564 | }
565 | ],
566 | "metadata": {
567 | "kernelspec": {
568 | "display_name": "Python 3",
569 | "language": "python",
570 | "name": "python3"
571 | },
572 | "language_info": {
573 | "codemirror_mode": {
574 | "name": "ipython",
575 | "version": 3
576 | },
577 | "file_extension": ".py",
578 | "mimetype": "text/x-python",
579 | "name": "python",
580 | "nbconvert_exporter": "python",
581 | "pygments_lexer": "ipython3",
582 | "version": "3.7.4"
583 | }
584 | },
585 | "nbformat": 4,
586 | "nbformat_minor": 2
587 | }
588 |
--------------------------------------------------------------------------------
/Week8.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 3,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 14,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import matplotlib as mp\n",
19 | "import numpy as np\n",
20 | "from scipy.optimize import minimize\n",
21 | "import scipy.stats as stats\n",
22 | "from scipy import stats\n",
23 | "import statsmodels.api as sm\n",
24 | "from statsmodels.formula.api import ols\n",
25 | "from matplotlib import pyplot as plt\n",
26 | "import pandas as pd\n",
27 | "import numpy as np\n",
28 | "import math\n",
29 | "import scipy\n",
30 | "from scipy import stats\n",
31 | "import pandas as pd\n",
32 | "import numpy as np\n",
33 | "import math\n",
34 | "import scipy\n",
35 | "from scipy import stats\n",
36 | "from scipy import stats\n",
37 | "import statsmodels.api as sm\n",
38 | "\n",
39 | "import statsmodels.formula.api \n",
40 | "import statsmodels.formula.api as smf\n",
41 | "from statsmodels.formula.api import ols\n",
42 | "from matplotlib import pyplot as plt"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 15,
48 | "metadata": {},
49 | "outputs": [
50 | {
51 | "data": {
52 | "text/html": [
53 | "\n",
54 | "\n",
67 | "
\n",
68 | " \n",
69 | " \n",
70 | " | \n",
71 | " Id | \n",
72 | " Y | \n",
73 | " X | \n",
74 | "
\n",
75 | " \n",
76 | " \n",
77 | " \n",
78 | " | 0 | \n",
79 | " 1 | \n",
80 | " 2 | \n",
81 | " 1 | \n",
82 | "
\n",
83 | " \n",
84 | " | 1 | \n",
85 | " 2 | \n",
86 | " 6 | \n",
87 | " 4 | \n",
88 | "
\n",
89 | " \n",
90 | " | 2 | \n",
91 | " 3 | \n",
92 | " 7 | \n",
93 | " 5 | \n",
94 | "
\n",
95 | " \n",
96 | " | 3 | \n",
97 | " 4 | \n",
98 | " 9 | \n",
99 | " 6 | \n",
100 | "
\n",
101 | " \n",
102 | " | 4 | \n",
103 | " 5 | \n",
104 | " 15 | \n",
105 | " 9 | \n",
106 | "
\n",
107 | " \n",
108 | "
\n",
109 | "
"
110 | ],
111 | "text/plain": [
112 | " Id Y X\n",
113 | "0 1 2 1\n",
114 | "1 2 6 4\n",
115 | "2 3 7 5\n",
116 | "3 4 9 6\n",
117 | "4 5 15 9"
118 | ]
119 | },
120 | "execution_count": 15,
121 | "metadata": {},
122 | "output_type": "execute_result"
123 | }
124 | ],
125 | "source": [
126 | "tb1=pd.read_excel('C:/Users/Garima Singh/Desktop/mooc data analysis/MLE.xlsx')\n",
127 | "tb1"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 17,
133 | "metadata": {},
134 | "outputs": [
135 | {
136 | "name": "stdout",
137 | "output_type": "stream",
138 | "text": [
139 | " OLS Regression Results \n",
140 | "==============================================================================\n",
141 | "Dep. Variable: Y R-squared: 0.980\n",
142 | "Model: OLS Adj. R-squared: 0.973\n",
143 | "Method: Least Squares F-statistic: 145.9\n",
144 | "Date: Sun, 29 Mar 2020 Prob (F-statistic): 0.00122\n",
145 | "Time: 16:15:58 Log-Likelihood: -4.5811\n",
146 | "No. Observations: 5 AIC: 13.16\n",
147 | "Df Residuals: 3 BIC: 12.38\n",
148 | "Df Model: 1 \n",
149 | "Covariance Type: nonrobust \n",
150 | "==============================================================================\n",
151 | " coef std err t P>|t| [0.025 0.975]\n",
152 | "------------------------------------------------------------------------------\n",
153 | "const -0.2882 0.755 -0.382 0.728 -2.692 2.115\n",
154 | "X 1.6176 0.134 12.079 0.001 1.191 2.044\n",
155 | "==============================================================================\n",
156 | "Omnibus: nan Durbin-Watson: 1.405\n",
157 | "Prob(Omnibus): nan Jarque-Bera (JB): 0.551\n",
158 | "Skew: 0.089 Prob(JB): 0.759\n",
159 | "Kurtosis: 1.384 Cond. No. 12.5\n",
160 | "==============================================================================\n",
161 | "\n",
162 | "Warnings:\n",
163 | "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
164 | ]
165 | },
166 | {
167 | "name": "stderr",
168 | "output_type": "stream",
169 | "text": [
170 | "C:\\Users\\Garima Singh\\Anaconda3\\lib\\site-packages\\statsmodels\\stats\\stattools.py:71: ValueWarning: omni_normtest is not valid with less than 8 observations; 5 samples were given.\n",
171 | " \"samples were given.\" % int(n), ValueWarning)\n"
172 | ]
173 | }
174 | ],
175 | "source": [
176 | "#lse to get reg eqn\n",
177 | "import statsmodels.api as sm\n",
178 | "\n",
179 | "from statsmodels.formula.api import ols\n",
180 | "x=tb1['X']\n",
181 | "y=tb1['Y']\n",
182 | "x2=sm.add_constant(x)\n",
183 | "mod1=sm.OLS(y,x2)\n",
184 | "mod12=mod1.fit()\n",
185 | "print(mod12.summary())"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": 18,
191 | "metadata": {},
192 | "outputs": [],
193 | "source": [
194 | "#mle to get reg eqn\n",
195 | "import matplotlib.pyplot as plt\n",
196 | "def like(parameters):\n",
197 | " m=parameters[0] #slope\n",
198 | " b=parameters[1] # y-intercept\n",
199 | " sigma=parameters[2] #sd of error term\n",
200 | " for i in np.arange(0,len(x)):\n",
201 | " y_exp=m*x+b\n",
202 | " L=(len(x)/2*np.log(2*np.pi)+len(x)/2*np.log(sigma**2)+1/(2*sigma**2)*sum((y-y_exp)**2))\n",
203 | " return L\n",
204 | " "
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": 21,
210 | "metadata": {},
211 | "outputs": [
212 | {
213 | "data": {
214 | "text/plain": [
215 | " fun: 4.581084072762135\n",
216 | " hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>\n",
217 | " jac: array([1.24344979e-06, 2.84217094e-06, 1.33226763e-06])\n",
218 | " message: b'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'\n",
219 | " nfev: 108\n",
220 | " nit: 17\n",
221 | " status: 0\n",
222 | " success: True\n",
223 | " x: array([ 1.61764689, -0.28823426, 0.60488214])"
224 | ]
225 | },
226 | "execution_count": 21,
227 | "metadata": {},
228 | "output_type": "execute_result"
229 | }
230 | ],
231 | "source": [
232 | "x=np.array([1,4,5,6,9])\n",
233 | "y=np.array([2,6,7,9,15])\n",
234 | "lik_model=minimize(like,np.array([2,2,2]),method=\"L-BFGS-B\")\n",
235 | "lik_model"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": 24,
241 | "metadata": {},
242 | "outputs": [
243 | {
244 | "ename": "NameError",
245 | "evalue": "name 'lik_mode' is not defined",
246 | "output_type": "error",
247 | "traceback": [
248 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
249 | "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
250 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mlik_mode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
251 | "\u001b[1;31mNameError\u001b[0m: name 'lik_mode' is not defined"
252 | ]
253 | }
254 | ],
255 | "source": [
256 | "lik_mode.x"
257 | ]
258 | },
259 | {
260 | "cell_type": "code",
261 | "execution_count": 25,
262 | "metadata": {},
263 | "outputs": [
264 | {
265 | "data": {
266 | "text/plain": [
267 | " final_simplex: (array([[ 1.61765326, -0.28825593, 0.60488098],\n",
268 | " [ 1.61765283, -0.28828724, 0.60488296],\n",
269 | " [ 1.61764444, -0.2882312 , 0.60486363],\n",
270 | " [ 1.61763731, -0.28820599, 0.60489994]]), array([4.58108408, 4.58108408, 4.58108408, 4.58108408]))\n",
271 | " fun: 4.581084075293504\n",
272 | " message: 'Optimization terminated successfully.'\n",
273 | " nfev: 182\n",
274 | " nit: 100\n",
275 | " status: 0\n",
276 | " success: True\n",
277 | " x: array([ 1.61765326, -0.28825593, 0.60488098])"
278 | ]
279 | },
280 | "execution_count": 25,
281 | "metadata": {},
282 | "output_type": "execute_result"
283 | }
284 | ],
285 | "source": [
286 | "\n",
287 | "lik_model=minimize(like,np.array([2,2,2]),method=\"Nelder-Mead\")\n",
288 | "lik_model"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": 29,
294 | "metadata": {},
295 | "outputs": [
296 | {
297 | "data": {
298 | "text/html": [
299 | "\n",
300 | "\n",
313 | "
\n",
314 | " \n",
315 | " \n",
316 | " | \n",
317 | " Customer | \n",
318 | " Spending | \n",
319 | " Card | \n",
320 | " Coupon | \n",
321 | "
\n",
322 | " \n",
323 | " \n",
324 | " \n",
325 | " | 0 | \n",
326 | " 1 | \n",
327 | " 2.291 | \n",
328 | " 1 | \n",
329 | " 0 | \n",
330 | "
\n",
331 | " \n",
332 | " | 1 | \n",
333 | " 2 | \n",
334 | " 3.215 | \n",
335 | " 1 | \n",
336 | " 0 | \n",
337 | "
\n",
338 | " \n",
339 | " | 2 | \n",
340 | " 3 | \n",
341 | " 2.135 | \n",
342 | " 1 | \n",
343 | " 0 | \n",
344 | "
\n",
345 | " \n",
346 | " | 3 | \n",
347 | " 4 | \n",
348 | " 3.924 | \n",
349 | " 0 | \n",
350 | " 0 | \n",
351 | "
\n",
352 | " \n",
353 | " | 4 | \n",
354 | " 5 | \n",
355 | " 2.528 | \n",
356 | " 1 | \n",
357 | " 0 | \n",
358 | "
\n",
359 | " \n",
360 | " | ... | \n",
361 | " ... | \n",
362 | " ... | \n",
363 | " ... | \n",
364 | " ... | \n",
365 | "
\n",
366 | " \n",
367 | " | 95 | \n",
368 | " 96 | \n",
369 | " 3.318 | \n",
370 | " 0 | \n",
371 | " 0 | \n",
372 | "
\n",
373 | " \n",
374 | " | 96 | \n",
375 | " 97 | \n",
376 | " 2.421 | \n",
377 | " 1 | \n",
378 | " 0 | \n",
379 | "
\n",
380 | " \n",
381 | " | 97 | \n",
382 | " 98 | \n",
383 | " 6.073 | \n",
384 | " 0 | \n",
385 | " 0 | \n",
386 | "
\n",
387 | " \n",
388 | " | 98 | \n",
389 | " 99 | \n",
390 | " 2.630 | \n",
391 | " 1 | \n",
392 | " 0 | \n",
393 | "
\n",
394 | " \n",
395 | " | 99 | \n",
396 | " 100 | \n",
397 | " 3.411 | \n",
398 | " 0 | \n",
399 | " 1 | \n",
400 | "
\n",
401 | " \n",
402 | "
\n",
403 | "
100 rows × 4 columns
\n",
404 | "
"
405 | ],
406 | "text/plain": [
407 | " Customer Spending Card Coupon\n",
408 | "0 1 2.291 1 0\n",
409 | "1 2 3.215 1 0\n",
410 | "2 3 2.135 1 0\n",
411 | "3 4 3.924 0 0\n",
412 | "4 5 2.528 1 0\n",
413 | ".. ... ... ... ...\n",
414 | "95 96 3.318 0 0\n",
415 | "96 97 2.421 1 0\n",
416 | "97 98 6.073 0 0\n",
417 | "98 99 2.630 1 0\n",
418 | "99 100 3.411 0 1\n",
419 | "\n",
420 | "[100 rows x 4 columns]"
421 | ]
422 | },
423 | "execution_count": 29,
424 | "metadata": {},
425 | "output_type": "execute_result"
426 | }
427 | ],
428 | "source": [
429 | "tb1=pd.read_excel('C:/Users/Garima Singh/Desktop/mooc data analysis/Simmons.xls')\n",
430 | "tb1"
431 | ]
432 | },
433 | {
434 | "cell_type": "code",
435 | "execution_count": 31,
436 | "metadata": {},
437 | "outputs": [
438 | {
439 | "ename": "ValueError",
440 | "evalue": "Unrecognized marker style 't'",
441 | "output_type": "error",
442 | "traceback": [
443 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
444 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
445 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\markers.py\u001b[0m in \u001b[0;36mset_marker\u001b[1;34m(self, marker)\u001b[0m\n\u001b[0;32m 308\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 309\u001b[1;33m \u001b[0mPath\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmarker\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 310\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_marker_function\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_set_vertices\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
446 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\path.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, vertices, codes, _interpolation_steps, closed, readonly)\u001b[0m\n\u001b[0;32m 126\u001b[0m \"\"\"\n\u001b[1;32m--> 127\u001b[1;33m \u001b[0mvertices\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_to_unmasked_float_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvertices\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 128\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mvertices\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;36m2\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mvertices\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
447 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\cbook\\__init__.py\u001b[0m in \u001b[0;36m_to_unmasked_float_array\u001b[1;34m(x)\u001b[0m\n\u001b[0;32m 1389\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1390\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfloat\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1391\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
448 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\numpy\\core\\numeric.py\u001b[0m in \u001b[0;36masarray\u001b[1;34m(a, dtype, order)\u001b[0m\n\u001b[0;32m 537\u001b[0m \"\"\"\n\u001b[1;32m--> 538\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0morder\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 539\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
449 | "\u001b[1;31mValueError\u001b[0m: could not convert string to float: 't'",
450 | "\nDuring handling of the above exception, another exception occurred:\n",
451 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
452 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mlinear_model\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmetrics\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mmean_squared_error\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mplt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mscatter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtb1\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSpending\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mtb1\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mCoupon\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mmarker\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m't'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcolor\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'red'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
453 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py\u001b[0m in \u001b[0;36mscatter\u001b[1;34m(x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, plotnonfinite, data, **kwargs)\u001b[0m\n\u001b[0;32m 2845\u001b[0m \u001b[0mverts\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mverts\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0medgecolors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0medgecolors\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2846\u001b[0m plotnonfinite=plotnonfinite, **({\"data\": data} if data is not\n\u001b[1;32m-> 2847\u001b[1;33m None else {}), **kwargs)\n\u001b[0m\u001b[0;32m 2848\u001b[0m \u001b[0msci\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m__ret\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2849\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0m__ret\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
454 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\__init__.py\u001b[0m in \u001b[0;36minner\u001b[1;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1599\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0minner\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0max\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1600\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdata\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1601\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0max\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0mmap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msanitize_sequence\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1602\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1603\u001b[0m \u001b[0mbound\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnew_sig\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0max\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
455 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\axes\\_axes.py\u001b[0m in \u001b[0;36mscatter\u001b[1;34m(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, plotnonfinite, **kwargs)\u001b[0m\n\u001b[0;32m 4479\u001b[0m \u001b[0mmarker_obj\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmarker\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4480\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4481\u001b[1;33m \u001b[0mmarker_obj\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmmarkers\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mMarkerStyle\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmarker\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4482\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4483\u001b[0m path = marker_obj.get_path().transformed(\n",
456 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\markers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, marker, fillstyle)\u001b[0m\n\u001b[0;32m 241\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_marker_function\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 242\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_fillstyle\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfillstyle\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 243\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_marker\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmarker\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 244\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 245\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_recache\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
457 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\matplotlib\\markers.py\u001b[0m in \u001b[0;36mset_marker\u001b[1;34m(self, marker)\u001b[0m\n\u001b[0;32m 311\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 312\u001b[0m raise ValueError('Unrecognized marker style {!r}'\n\u001b[1;32m--> 313\u001b[1;33m .format(marker))\n\u001b[0m\u001b[0;32m 314\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 315\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_marker\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmarker\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
458 | "\u001b[1;31mValueError\u001b[0m: Unrecognized marker style 't'"
459 | ]
460 | },
461 | {
462 | "data": {
463 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD8CAYAAAB0IB+mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAANgElEQVR4nO3ccYjfd33H8efLxE6mtY7lBEmi7Vi6Gsqg7ug6hFnRjbR/JP8USaC4SmnArQ5mETocKvWvKUMQsmm2iVPQWv1DD4nkD1fpECO50lmalMAtOnNE6Fm7/lO0Znvvj99P77hcct/e/e4u3vv5gMDv+/t9fr9758PdM798f/f7paqQJG1/r9rqASRJm8PgS1ITBl+SmjD4ktSEwZekJgy+JDWxavCTfC7Jc0meucLtSfLpJHNJnk7ytsmPKUlaryHP8D8PHLjK7XcB+8Z/jgL/tP6xJEmTtmrwq+oJ4GdXWXII+EKNnALekORNkxpQkjQZOyfwGLuBC0uO58fX/WT5wiRHGf0vgNe+9rV/dMstt0zgy0tSH08++eRPq2pqLfedRPCzwnUrfl5DVR0HjgNMT0/X7OzsBL68JPWR5L/Xet9J/JbOPLB3yfEe4OIEHleSNEGTCP4M8N7xb+vcAbxYVZedzpEkba1VT+kk+TJwJ7AryTzwUeDVAFX1GeAEcDcwB7wEvG+jhpUkrd2qwa+qI6vcXsBfTWwiSdKG8J22ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNTEo+EkOJDmXZC7Jwyvc/uYkjyd5KsnTSe6e/KiSpPVYNfhJdgDHgLuA/cCRJPuXLfs74LGqug04DPzjpAeVJK3PkGf4twNzVXW+ql4GHgUOLVtTwOvHl28ALk5uREnSJAwJ/m7gwpLj+fF1S30MuDfJPHAC+MBKD5TkaJLZJLMLCwtrGFeStFZDgp8Vrqtlx0eAz1fVHuBu4ItJLnvsqjpeVdNVNT01NfXKp5UkrdmQ4M8De5cc7+HyUzb3A48BVNX3gNcAuyYxoCRpMoYE/zSwL8lNSa5j9KLszLI1PwbeBZDkrYyC7zkbSbqGrBr8qroEPAicBJ5l9Ns4Z5I8kuTgeNlDwANJfgB8Gbivqpaf9pEkbaGdQxZV1QlGL8Yuve4jSy6fBd4+2dEkSZPkO20lqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0MCn6SA0nOJZlL8vAV1rwnydkkZ5J8abJjSpLWa+dqC5LsAI4BfwbMA6eTzFTV2SVr9gF/C7y9ql5I8saNGliStDZDnuHfDsxV1fmqehl4FDi0bM0DwLGqegGgqp6b7JiSpPUaEvzdwIUlx/Pj65a6Gbg5yXeTnEpyYKUHSnI0yWyS2YWFhbVNLElakyHBzwrX1bLjncA+4E7gCPAvSd5w2Z2qjlfVdFVNT01NvdJZJUnrMCT488DeJcd7gIsrrPlGVf2yqn4InGP0D4Ak6RoxJPingX1JbkpyHXAYmFm25uvAOwGS7GJ0iuf8JAeVJK3PqsGvqkvAg8BJ4Fngsao6k+SRJAfHy04Czyc5CzwOfKiqnt+ooSVJr1yqlp+O3xzT09M1Ozu7JV9bkn5TJXmyqqbXcl/faStJTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITg4Kf5ECSc0nmkjx8lXX3JKkk05MbUZI0CasGP8kO4BhwF7AfOJJk/wrrrgf+Gvj+pIeUJK3fkGf4twNzVXW+ql4GHgUOrbDu48AngJ9PcD5J0oQMCf5u4MKS4/nxdb+W5DZgb1V982oPlORoktkkswsLC694WEnS2g0Jfla4rn59Y/Iq4FPAQ6s9UFUdr6rpqpqempoaPqUkad2GBH8e2LvkeA9wccnx9cCtwHeS/Ai4A5jxhVtJurYMCf5pYF+Sm5JcBxwGZn51Y1W9WFW7qurGqroROAUcrKrZDZlYkrQmqwa/qi4BDwIngWeBx6rqTJJHkhzc6AElSZOxc8iiqjoBnFh23UeusPbO9Y8lSZo032krSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWpiUPCTHEhyLslckodXuP2DSc4meTrJt5O8ZfKjSpLWY9XgJ9kBHAPuAvYDR5LsX7bsKWC6qv4Q+BrwiUkPKklanyHP8G8H5qrqfFW9DDwKHFq6oKoer6qXxoengD2THVOStF5Dgr8buLDkeH583ZXcD3xrpRuSHE0ym2R2YWFh+JSSpHUbEvyscF2tuDC5F5gGPrnS7VV1vKqmq2p6ampq+JSSpHXbOWDNPLB3yfEe4OLyRUneDXwYeEdV/WIy40mSJmXIM/zTwL4kNyW5DjgMzCxdkOQ24LPAwap6bvJjSpLWa9XgV9Ul4EHgJPAs8FhVnUnySJKD42WfBF4HfDXJfyaZucLDSZK2yJBTOlTVCeDEsus+suTyuyc8lyRpwnynrSQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0MCn6SA0nOJZlL8vAKt/9Wkq+Mb/9+khsnPagkaX1WDX6SHcAx4C5gP3Akyf5ly+4HXqiq3wc+Bfz9pAeVJK3PkGf4twNzVXW+ql4GHgUOLVtzCPi38eWvAe9KksmNKUlar50D1uwGLiw5ngf++EprqupSkheB3wV+unRRkqPA0fHhL5I8s5aht6FdLNurxtyLRe7FIvdi0R+s9Y5Dgr/SM/Vawxqq6jhwHCDJbFVND/j62557sci9WOReLHIvFiWZXet9h5zSmQf2LjneA1y80pokO4EbgJ+tdShJ0uQNCf5pYF+Sm5JcBxwGZpatmQH+Ynz5HuDfq+qyZ/iSpK2z6imd8Tn5B4GTwA7gc1V1JskjwGxVzQD/CnwxyRyjZ/aHB3zt4+uYe7txLxa5F4vci0XuxaI170V8Ii5JPfhOW0lqwuBLUhMbHnw/lmHRgL34YJKzSZ5O8u0kb9mKOTfDanuxZN09SSrJtv2VvCF7keQ94++NM0m+tNkzbpYBPyNvTvJ4kqfGPyd3b8WcGy3J55I8d6X3KmXk0+N9ejrJ2wY9cFVt2B9GL/L+F/B7wHXAD4D9y9b8JfCZ8eXDwFc2cqat+jNwL94J/Pb48vs778V43fXAE8ApYHqr597C74t9wFPA74yP37jVc2/hXhwH3j++vB/40VbPvUF78afA24BnrnD73cC3GL0H6g7g+0Med6Of4fuxDItW3YuqeryqXhofnmL0noftaMj3BcDHgU8AP9/M4TbZkL14ADhWVS8AVNVzmzzjZhmyFwW8fnz5Bi5/T9C2UFVPcPX3Mh0CvlAjp4A3JHnTao+70cFf6WMZdl9pTVVdAn71sQzbzZC9WOp+Rv+Cb0er7kWS24C9VfXNzRxsCwz5vrgZuDnJd5OcSnJg06bbXEP24mPAvUnmgRPABzZntGvOK+0JMOyjFdZjYh/LsA0M/nsmuReYBt6xoRNtnavuRZJXMfrU1fs2a6AtNOT7Yiej0zp3Mvpf338kubWq/meDZ9tsQ/biCPD5qvqHJH/C6P0/t1bV/238eNeUNXVzo5/h+7EMi4bsBUneDXwYOFhVv9ik2TbbantxPXAr8J0kP2J0jnJmm75wO/Rn5BtV9cuq+iFwjtE/ANvNkL24H3gMoKq+B7yG0QerdTOoJ8ttdPD9WIZFq+7F+DTGZxnFfruep4VV9qKqXqyqXVV1Y1XdyOj1jINVteYPjbqGDfkZ+TqjF/RJsovRKZ7zmzrl5hiyFz8G3gWQ5K2Mgr+wqVNeG2aA945/W+cO4MWq+slqd9rQUzq1cR/L8Btn4F58Engd8NXx69Y/rqqDWzb0Bhm4Fy0M3IuTwJ8nOQv8L/Chqnp+66beGAP34iHgn5P8DaNTGPdtxyeISb7M6BTervHrFR8FXg1QVZ9h9PrF3cAc8BLwvkGPuw33SpK0At9pK0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDXx/4aZaro1YsjCAAAAAElFTkSuQmCC\n",
464 | "text/plain": [
465 | ""
466 | ]
467 | },
468 | "metadata": {
469 | "needs_background": "light"
470 | },
471 | "output_type": "display_data"
472 | }
473 | ],
474 | "source": [
475 | "#graph between continuous and categorical dep var\n",
476 | "from sklearn import linear_model\n",
477 | "from sklearn.metrics import mean_squared_error\n",
478 | "plt.scatter(tb1.Spending,tb1.Coupon,marker='t',color='red')"
479 | ]
480 | },
481 | {
482 | "cell_type": "code",
483 | "execution_count": 34,
484 | "metadata": {},
485 | "outputs": [
486 | {
487 | "name": "stdout",
488 | "output_type": "stream",
489 | "text": [
490 | "Optimization terminated successfully.\n",
491 | " Current function value: 0.604869\n",
492 | " Iterations 5\n"
493 | ]
494 | },
495 | {
496 | "data": {
497 | "text/html": [
498 | "\n",
499 | "Logit Regression Results\n",
500 | "\n",
501 | " | Dep. Variable: | Coupon | No. Observations: | 100 | \n",
502 | "
\n",
503 | "\n",
504 | " | Model: | Logit | Df Residuals: | 97 | \n",
505 | "
\n",
506 | "\n",
507 | " | Method: | MLE | Df Model: | 2 | \n",
508 | "
\n",
509 | "\n",
510 | " | Date: | Sun, 29 Mar 2020 | Pseudo R-squ.: | 0.1012 | \n",
511 | "
\n",
512 | "\n",
513 | " | Time: | 16:37:44 | Log-Likelihood: | -60.487 | \n",
514 | "
\n",
515 | "\n",
516 | " | converged: | True | LL-Null: | -67.301 | \n",
517 | "
\n",
518 | "\n",
519 | " | Covariance Type: | nonrobust | LLR p-value: | 0.001098 | \n",
520 | "
\n",
521 | "
\n",
522 | "\n",
523 | "\n",
524 | " | coef | std err | z | P>|z| | [0.025 | 0.975] | \n",
525 | "
\n",
526 | "\n",
527 | " | const | -2.1464 | 0.577 | -3.718 | 0.000 | -3.278 | -1.015 | \n",
528 | "
\n",
529 | "\n",
530 | " | Card | 1.0987 | 0.445 | 2.471 | 0.013 | 0.227 | 1.970 | \n",
531 | "
\n",
532 | "\n",
533 | " | Spending | 0.3416 | 0.129 | 2.655 | 0.008 | 0.089 | 0.594 | \n",
534 | "
\n",
535 | "
"
536 | ],
537 | "text/plain": [
538 | "\n",
539 | "\"\"\"\n",
540 | " Logit Regression Results \n",
541 | "==============================================================================\n",
542 | "Dep. Variable: Coupon No. Observations: 100\n",
543 | "Model: Logit Df Residuals: 97\n",
544 | "Method: MLE Df Model: 2\n",
545 | "Date: Sun, 29 Mar 2020 Pseudo R-squ.: 0.1012\n",
546 | "Time: 16:37:44 Log-Likelihood: -60.487\n",
547 | "converged: True LL-Null: -67.301\n",
548 | "Covariance Type: nonrobust LLR p-value: 0.001098\n",
549 | "==============================================================================\n",
550 | " coef std err z P>|z| [0.025 0.975]\n",
551 | "------------------------------------------------------------------------------\n",
552 | "const -2.1464 0.577 -3.718 0.000 -3.278 -1.015\n",
553 | "Card 1.0987 0.445 2.471 0.013 0.227 1.970\n",
554 | "Spending 0.3416 0.129 2.655 0.008 0.089 0.594\n",
555 | "==============================================================================\n",
556 | "\"\"\""
557 | ]
558 | },
559 | "execution_count": 34,
560 | "metadata": {},
561 | "output_type": "execute_result"
562 | }
563 | ],
564 | "source": [
565 | "\n",
566 | "x=tb1[['Card','Spending']]\n",
567 | "y=tb1['Coupon']\n",
568 | "import statsmodels.api as sm\n",
569 | "x1=sm.add_constant(x)\n",
570 | "logit_model=sm.Logit(y,x1)\n",
571 | "result=logit_model.fit()\n",
572 | "result.summary()"
573 | ]
574 | },
575 | {
576 | "cell_type": "code",
577 | "execution_count": 35,
578 | "metadata": {},
579 | "outputs": [
580 | {
581 | "data": {
582 | "text/plain": [
583 | "0.000549145469075383"
584 | ]
585 | },
586 | "execution_count": 35,
587 | "metadata": {},
588 | "output_type": "execute_result"
589 | }
590 | ],
591 | "source": [
592 | "#chi sq value of g statistic\n",
593 | "from scipy.stats import chi2\n",
594 | "chi2.pdf(13.628,2) #g value, dof\n",
595 | "#gives p value"
596 | ]
597 | },
598 | {
599 | "cell_type": "code",
600 | "execution_count": null,
601 | "metadata": {},
602 | "outputs": [],
603 | "source": []
604 | }
605 | ],
606 | "metadata": {
607 | "kernelspec": {
608 | "display_name": "Python 3",
609 | "language": "python",
610 | "name": "python3"
611 | },
612 | "language_info": {
613 | "codemirror_mode": {
614 | "name": "ipython",
615 | "version": 3
616 | },
617 | "file_extension": ".py",
618 | "mimetype": "text/x-python",
619 | "name": "python",
620 | "nbconvert_exporter": "python",
621 | "pygments_lexer": "ipython3",
622 | "version": "3.7.4"
623 | }
624 | },
625 | "nbformat": 4,
626 | "nbformat_minor": 2
627 | }
628 |
--------------------------------------------------------------------------------
/Week10.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 3,
16 | "metadata": {},
17 | "outputs": [
18 | {
19 | "data": {
20 | "text/html": [
21 | "\n",
22 | "\n",
35 | "
\n",
36 | " \n",
37 | " \n",
38 | " | \n",
39 | " Rsp No | \n",
40 | " aa | \n",
41 | " pe | \n",
42 | " sm | \n",
43 | " ae | \n",
44 | " r | \n",
45 | " g | \n",
46 | " c | \n",
47 | "
\n",
48 | " \n",
49 | " \n",
50 | " \n",
51 | " | 0 | \n",
52 | " 1 | \n",
53 | " 99 | \n",
54 | " 19 | \n",
55 | " 1 | \n",
56 | " 2 | \n",
57 | " 0 | \n",
58 | " 0 | \n",
59 | " 1 | \n",
60 | "
\n",
61 | " \n",
62 | " | 1 | \n",
63 | " 2 | \n",
64 | " 46 | \n",
65 | " 12 | \n",
66 | " 0 | \n",
67 | " 0 | \n",
68 | " 0 | \n",
69 | " 0 | \n",
70 | " 0 | \n",
71 | "
\n",
72 | " \n",
73 | " | 2 | \n",
74 | " 3 | \n",
75 | " 57 | \n",
76 | " 15 | \n",
77 | " 1 | \n",
78 | " 1 | \n",
79 | " 0 | \n",
80 | " 0 | \n",
81 | " 0 | \n",
82 | "
\n",
83 | " \n",
84 | " | 3 | \n",
85 | " 4 | \n",
86 | " 94 | \n",
87 | " 18 | \n",
88 | " 2 | \n",
89 | " 2 | \n",
90 | " 1 | \n",
91 | " 1 | \n",
92 | " 1 | \n",
93 | "
\n",
94 | " \n",
95 | " | 4 | \n",
96 | " 5 | \n",
97 | " 82 | \n",
98 | " 13 | \n",
99 | " 2 | \n",
100 | " 1 | \n",
101 | " 1 | \n",
102 | " 1 | \n",
103 | " 1 | \n",
104 | "
\n",
105 | " \n",
106 | " | 5 | \n",
107 | " 6 | \n",
108 | " 59 | \n",
109 | " 12 | \n",
110 | " 0 | \n",
111 | " 0 | \n",
112 | " 2 | \n",
113 | " 0 | \n",
114 | " 0 | \n",
115 | "
\n",
116 | " \n",
117 | " | 6 | \n",
118 | " 7 | \n",
119 | " 61 | \n",
120 | " 12 | \n",
121 | " 1 | \n",
122 | " 2 | \n",
123 | " 0 | \n",
124 | " 0 | \n",
125 | " 0 | \n",
126 | "
\n",
127 | " \n",
128 | " | 7 | \n",
129 | " 8 | \n",
130 | " 29 | \n",
131 | " 9 | \n",
132 | " 0 | \n",
133 | " 0 | \n",
134 | " 1 | \n",
135 | " 1 | \n",
136 | " 0 | \n",
137 | "
\n",
138 | " \n",
139 | " | 8 | \n",
140 | " 9 | \n",
141 | " 36 | \n",
142 | " 13 | \n",
143 | " 1 | \n",
144 | " 1 | \n",
145 | " 0 | \n",
146 | " 0 | \n",
147 | " 0 | \n",
148 | "
\n",
149 | " \n",
150 | " | 9 | \n",
151 | " 10 | \n",
152 | " 91 | \n",
153 | " 16 | \n",
154 | " 2 | \n",
155 | " 2 | \n",
156 | " 1 | \n",
157 | " 1 | \n",
158 | " 0 | \n",
159 | "
\n",
160 | " \n",
161 | " | 10 | \n",
162 | " 11 | \n",
163 | " 55 | \n",
164 | " 10 | \n",
165 | " 0 | \n",
166 | " 0 | \n",
167 | " 1 | \n",
168 | " 0 | \n",
169 | " 0 | \n",
170 | "
\n",
171 | " \n",
172 | " | 11 | \n",
173 | " 12 | \n",
174 | " 58 | \n",
175 | " 11 | \n",
176 | " 0 | \n",
177 | " 1 | \n",
178 | " 0 | \n",
179 | " 0 | \n",
180 | " 0 | \n",
181 | "
\n",
182 | " \n",
183 | " | 12 | \n",
184 | " 13 | \n",
185 | " 67 | \n",
186 | " 14 | \n",
187 | " 1 | \n",
188 | " 1 | \n",
189 | " 0 | \n",
190 | " 1 | \n",
191 | " 1 | \n",
192 | "
\n",
193 | " \n",
194 | " | 13 | \n",
195 | " 14 | \n",
196 | " 77 | \n",
197 | " 14 | \n",
198 | " 1 | \n",
199 | " 2 | \n",
200 | " 2 | \n",
201 | " 1 | \n",
202 | " 0 | \n",
203 | "
\n",
204 | " \n",
205 | " | 14 | \n",
206 | " 15 | \n",
207 | " 71 | \n",
208 | " 12 | \n",
209 | " 0 | \n",
210 | " 0 | \n",
211 | " 2 | \n",
212 | " 1 | \n",
213 | " 0 | \n",
214 | "
\n",
215 | " \n",
216 | " | 15 | \n",
217 | " 16 | \n",
218 | " 83 | \n",
219 | " 16 | \n",
220 | " 2 | \n",
221 | " 2 | \n",
222 | " 1 | \n",
223 | " 0 | \n",
224 | " 1 | \n",
225 | "
\n",
226 | " \n",
227 | " | 16 | \n",
228 | " 17 | \n",
229 | " 96 | \n",
230 | " 15 | \n",
231 | " 2 | \n",
232 | " 2 | \n",
233 | " 2 | \n",
234 | " 0 | \n",
235 | " 1 | \n",
236 | "
\n",
237 | " \n",
238 | " | 17 | \n",
239 | " 18 | \n",
240 | " 87 | \n",
241 | " 12 | \n",
242 | " 1 | \n",
243 | " 1 | \n",
244 | " 0 | \n",
245 | " 0 | \n",
246 | " 1 | \n",
247 | "
\n",
248 | " \n",
249 | " | 18 | \n",
250 | " 19 | \n",
251 | " 62 | \n",
252 | " 11 | \n",
253 | " 0 | \n",
254 | " 0 | \n",
255 | " 0 | \n",
256 | " 0 | \n",
257 | " 0 | \n",
258 | "
\n",
259 | " \n",
260 | " | 19 | \n",
261 | " 20 | \n",
262 | " 52 | \n",
263 | " 9 | \n",
264 | " 0 | \n",
265 | " 1 | \n",
266 | " 2 | \n",
267 | " 1 | \n",
268 | " 0 | \n",
269 | "
\n",
270 | " \n",
271 | " | 20 | \n",
272 | " 21 | \n",
273 | " 46 | \n",
274 | " 10 | \n",
275 | " 1 | \n",
276 | " 0 | \n",
277 | " 0 | \n",
278 | " 1 | \n",
279 | " 0 | \n",
280 | "
\n",
281 | " \n",
282 | " | 21 | \n",
283 | " 22 | \n",
284 | " 91 | \n",
285 | " 20 | \n",
286 | " 2 | \n",
287 | " 2 | \n",
288 | " 1 | \n",
289 | " 0 | \n",
290 | " 0 | \n",
291 | "
\n",
292 | " \n",
293 | " | 22 | \n",
294 | " 23 | \n",
295 | " 85 | \n",
296 | " 12 | \n",
297 | " 2 | \n",
298 | " 1 | \n",
299 | " 1 | \n",
300 | " 1 | \n",
301 | " 1 | \n",
302 | "
\n",
303 | " \n",
304 | " | 23 | \n",
305 | " 24 | \n",
306 | " 48 | \n",
307 | " 11 | \n",
308 | " 1 | \n",
309 | " 1 | \n",
310 | " 2 | \n",
311 | " 0 | \n",
312 | " 0 | \n",
313 | "
\n",
314 | " \n",
315 | " | 24 | \n",
316 | " 25 | \n",
317 | " 81 | \n",
318 | " 17 | \n",
319 | " 1 | \n",
320 | " 1 | \n",
321 | " 1 | \n",
322 | " 1 | \n",
323 | " 1 | \n",
324 | "
\n",
325 | " \n",
326 | " | 25 | \n",
327 | " 26 | \n",
328 | " 74 | \n",
329 | " 16 | \n",
330 | " 2 | \n",
331 | " 1 | \n",
332 | " 2 | \n",
333 | " 1 | \n",
334 | " 0 | \n",
335 | "
\n",
336 | " \n",
337 | " | 26 | \n",
338 | " 27 | \n",
339 | " 68 | \n",
340 | " 12 | \n",
341 | " 2 | \n",
342 | " 1 | \n",
343 | " 1 | \n",
344 | " 1 | \n",
345 | " 1 | \n",
346 | "
\n",
347 | " \n",
348 | " | 27 | \n",
349 | " 28 | \n",
350 | " 63 | \n",
351 | " 12 | \n",
352 | " 1 | \n",
353 | " 0 | \n",
354 | " 0 | \n",
355 | " 0 | \n",
356 | " 1 | \n",
357 | "
\n",
358 | " \n",
359 | " | 28 | \n",
360 | " 29 | \n",
361 | " 72 | \n",
362 | " 14 | \n",
363 | " 0 | \n",
364 | " 2 | \n",
365 | " 0 | \n",
366 | " 0 | \n",
367 | " 0 | \n",
368 | "
\n",
369 | " \n",
370 | " | 29 | \n",
371 | " 30 | \n",
372 | " 99 | \n",
373 | " 19 | \n",
374 | " 1 | \n",
375 | " 1 | \n",
376 | " 1 | \n",
377 | " 0 | \n",
378 | " 0 | \n",
379 | "
\n",
380 | " \n",
381 | " | 30 | \n",
382 | " 31 | \n",
383 | " 64 | \n",
384 | " 13 | \n",
385 | " 1 | \n",
386 | " 1 | \n",
387 | " 0 | \n",
388 | " 0 | \n",
389 | " 0 | \n",
390 | "
\n",
391 | " \n",
392 | " | 31 | \n",
393 | " 32 | \n",
394 | " 77 | \n",
395 | " 13 | \n",
396 | " 1 | \n",
397 | " 0 | \n",
398 | " 1 | \n",
399 | " 1 | \n",
400 | " 1 | \n",
401 | "
\n",
402 | " \n",
403 | " | 32 | \n",
404 | " 33 | \n",
405 | " 88 | \n",
406 | " 16 | \n",
407 | " 2 | \n",
408 | " 2 | \n",
409 | " 0 | \n",
410 | " 1 | \n",
411 | " 0 | \n",
412 | "
\n",
413 | " \n",
414 | " | 33 | \n",
415 | " 34 | \n",
416 | " 54 | \n",
417 | " 9 | \n",
418 | " 0 | \n",
419 | " 1 | \n",
420 | " 1 | \n",
421 | " 0 | \n",
422 | " 0 | \n",
423 | "
\n",
424 | " \n",
425 | " | 34 | \n",
426 | " 35 | \n",
427 | " 86 | \n",
428 | " 17 | \n",
429 | " 1 | \n",
430 | " 2 | \n",
431 | " 1 | \n",
432 | " 0 | \n",
433 | " 1 | \n",
434 | "
\n",
435 | " \n",
436 | " | 35 | \n",
437 | " 36 | \n",
438 | " 73 | \n",
439 | " 15 | \n",
440 | " 1 | \n",
441 | " 1 | \n",
442 | " 0 | \n",
443 | " 1 | \n",
444 | " 0 | \n",
445 | "
\n",
446 | " \n",
447 | " | 36 | \n",
448 | " 37 | \n",
449 | " 79 | \n",
450 | " 15 | \n",
451 | " 2 | \n",
452 | " 1 | \n",
453 | " 0 | \n",
454 | " 0 | \n",
455 | " 1 | \n",
456 | "
\n",
457 | " \n",
458 | " | 37 | \n",
459 | " 38 | \n",
460 | " 85 | \n",
461 | " 14 | \n",
462 | " 2 | \n",
463 | " 1 | \n",
464 | " 2 | \n",
465 | " 1 | \n",
466 | " 1 | \n",
467 | "
\n",
468 | " \n",
469 | " | 38 | \n",
470 | " 39 | \n",
471 | " 96 | \n",
472 | " 16 | \n",
473 | " 0 | \n",
474 | " 1 | \n",
475 | " 1 | \n",
476 | " 0 | \n",
477 | " 1 | \n",
478 | "
\n",
479 | " \n",
480 | " | 39 | \n",
481 | " 40 | \n",
482 | " 59 | \n",
483 | " 12 | \n",
484 | " 1 | \n",
485 | " 0 | \n",
486 | " 0 | \n",
487 | " 1 | \n",
488 | " 0 | \n",
489 | "
\n",
490 | " \n",
491 | " | 40 | \n",
492 | " 41 | \n",
493 | " 84 | \n",
494 | " 14 | \n",
495 | " 1 | \n",
496 | " 0 | \n",
497 | " 1 | \n",
498 | " 0 | \n",
499 | " 1 | \n",
500 | "
\n",
501 | " \n",
502 | " | 41 | \n",
503 | " 42 | \n",
504 | " 71 | \n",
505 | " 15 | \n",
506 | " 2 | \n",
507 | " 1 | \n",
508 | " 1 | \n",
509 | " 0 | \n",
510 | " 0 | \n",
511 | "
\n",
512 | " \n",
513 | " | 42 | \n",
514 | " 43 | \n",
515 | " 89 | \n",
516 | " 15 | \n",
517 | " 0 | \n",
518 | " 1 | \n",
519 | " 0 | \n",
520 | " 1 | \n",
521 | " 1 | \n",
522 | "
\n",
523 | " \n",
524 | " | 43 | \n",
525 | " 44 | \n",
526 | " 38 | \n",
527 | " 12 | \n",
528 | " 1 | \n",
529 | " 0 | \n",
530 | " 1 | \n",
531 | " 1 | \n",
532 | " 0 | \n",
533 | "
\n",
534 | " \n",
535 | " | 44 | \n",
536 | " 45 | \n",
537 | " 62 | \n",
538 | " 11 | \n",
539 | " 1 | \n",
540 | " 1 | \n",
541 | " 2 | \n",
542 | " 0 | \n",
543 | " 1 | \n",
544 | "
\n",
545 | " \n",
546 | " | 45 | \n",
547 | " 46 | \n",
548 | " 93 | \n",
549 | " 16 | \n",
550 | " 1 | \n",
551 | " 0 | \n",
552 | " 1 | \n",
553 | " 0 | \n",
554 | " 1 | \n",
555 | "
\n",
556 | " \n",
557 | " | 46 | \n",
558 | " 47 | \n",
559 | " 71 | \n",
560 | " 13 | \n",
561 | " 2 | \n",
562 | " 1 | \n",
563 | " 1 | \n",
564 | " 0 | \n",
565 | " 0 | \n",
566 | "
\n",
567 | " \n",
568 | " | 47 | \n",
569 | " 48 | \n",
570 | " 55 | \n",
571 | " 11 | \n",
572 | " 0 | \n",
573 | " 1 | \n",
574 | " 0 | \n",
575 | " 0 | \n",
576 | " 0 | \n",
577 | "
\n",
578 | " \n",
579 | " | 48 | \n",
580 | " 49 | \n",
581 | " 74 | \n",
582 | " 15 | \n",
583 | " 1 | \n",
584 | " 2 | \n",
585 | " 0 | \n",
586 | " 1 | \n",
587 | " 0 | \n",
588 | "
\n",
589 | " \n",
590 | " | 49 | \n",
591 | " 50 | \n",
592 | " 76 | \n",
593 | " 20 | \n",
594 | " 0 | \n",
595 | " 1 | \n",
596 | " 1 | \n",
597 | " 0 | \n",
598 | " 1 | \n",
599 | "
\n",
600 | " \n",
601 | "
\n",
602 | "
"
603 | ],
604 | "text/plain": [
605 | " Rsp No aa pe sm ae r g c\n",
606 | "0 1 99 19 1 2 0 0 1\n",
607 | "1 2 46 12 0 0 0 0 0\n",
608 | "2 3 57 15 1 1 0 0 0\n",
609 | "3 4 94 18 2 2 1 1 1\n",
610 | "4 5 82 13 2 1 1 1 1\n",
611 | "5 6 59 12 0 0 2 0 0\n",
612 | "6 7 61 12 1 2 0 0 0\n",
613 | "7 8 29 9 0 0 1 1 0\n",
614 | "8 9 36 13 1 1 0 0 0\n",
615 | "9 10 91 16 2 2 1 1 0\n",
616 | "10 11 55 10 0 0 1 0 0\n",
617 | "11 12 58 11 0 1 0 0 0\n",
618 | "12 13 67 14 1 1 0 1 1\n",
619 | "13 14 77 14 1 2 2 1 0\n",
620 | "14 15 71 12 0 0 2 1 0\n",
621 | "15 16 83 16 2 2 1 0 1\n",
622 | "16 17 96 15 2 2 2 0 1\n",
623 | "17 18 87 12 1 1 0 0 1\n",
624 | "18 19 62 11 0 0 0 0 0\n",
625 | "19 20 52 9 0 1 2 1 0\n",
626 | "20 21 46 10 1 0 0 1 0\n",
627 | "21 22 91 20 2 2 1 0 0\n",
628 | "22 23 85 12 2 1 1 1 1\n",
629 | "23 24 48 11 1 1 2 0 0\n",
630 | "24 25 81 17 1 1 1 1 1\n",
631 | "25 26 74 16 2 1 2 1 0\n",
632 | "26 27 68 12 2 1 1 1 1\n",
633 | "27 28 63 12 1 0 0 0 1\n",
634 | "28 29 72 14 0 2 0 0 0\n",
635 | "29 30 99 19 1 1 1 0 0\n",
636 | "30 31 64 13 1 1 0 0 0\n",
637 | "31 32 77 13 1 0 1 1 1\n",
638 | "32 33 88 16 2 2 0 1 0\n",
639 | "33 34 54 9 0 1 1 0 0\n",
640 | "34 35 86 17 1 2 1 0 1\n",
641 | "35 36 73 15 1 1 0 1 0\n",
642 | "36 37 79 15 2 1 0 0 1\n",
643 | "37 38 85 14 2 1 2 1 1\n",
644 | "38 39 96 16 0 1 1 0 1\n",
645 | "39 40 59 12 1 0 0 1 0\n",
646 | "40 41 84 14 1 0 1 0 1\n",
647 | "41 42 71 15 2 1 1 0 0\n",
648 | "42 43 89 15 0 1 0 1 1\n",
649 | "43 44 38 12 1 0 1 1 0\n",
650 | "44 45 62 11 1 1 2 0 1\n",
651 | "45 46 93 16 1 0 1 0 1\n",
652 | "46 47 71 13 2 1 1 0 0\n",
653 | "47 48 55 11 0 1 0 0 0\n",
654 | "48 49 74 15 1 2 0 1 0\n",
655 | "49 50 76 20 0 1 1 0 1"
656 | ]
657 | },
658 | "execution_count": 3,
659 | "metadata": {},
660 | "output_type": "execute_result"
661 | }
662 | ],
663 | "source": [
664 | "acad=pd.read_excel('C:/Users/Garima Singh/Desktop/mooc data analysis/acad.xlsx')\n",
665 | "acad"
666 | ]
667 | },
668 | {
669 | "cell_type": "code",
670 | "execution_count": 4,
671 | "metadata": {},
672 | "outputs": [],
673 | "source": [
674 | "#poisson gof test\n",
675 | "obs=pd.pivot_table(acad[['g','sm']],index='g',columns='sm',aggfunc=len)"
676 | ]
677 | },
678 | {
679 | "cell_type": "code",
680 | "execution_count": 6,
681 | "metadata": {},
682 | "outputs": [
683 | {
684 | "data": {
685 | "text/html": [
686 | "\n",
687 | "\n",
700 | "
\n",
701 | " \n",
702 | " \n",
703 | " | sm | \n",
704 | " 0 | \n",
705 | " 1 | \n",
706 | " 2 | \n",
707 | "
\n",
708 | " \n",
709 | " | g | \n",
710 | " | \n",
711 | " | \n",
712 | " | \n",
713 | "
\n",
714 | " \n",
715 | " \n",
716 | " \n",
717 | " | 0 | \n",
718 | " 10 | \n",
719 | " 13 | \n",
720 | " 6 | \n",
721 | "
\n",
722 | " \n",
723 | " | 1 | \n",
724 | " 4 | \n",
725 | " 9 | \n",
726 | " 8 | \n",
727 | "
\n",
728 | " \n",
729 | "
\n",
730 | "
"
731 | ],
732 | "text/plain": [
733 | "sm 0 1 2\n",
734 | "g \n",
735 | "0 10 13 6\n",
736 | "1 4 9 8"
737 | ]
738 | },
739 | "execution_count": 6,
740 | "metadata": {},
741 | "output_type": "execute_result"
742 | }
743 | ],
744 | "source": [
745 | "obs #contingency table with obs f"
746 | ]
747 | },
748 | {
749 | "cell_type": "code",
750 | "execution_count": 13,
751 | "metadata": {},
752 | "outputs": [
753 | {
754 | "data": {
755 | "text/plain": [
756 | "2.3649585225939904"
757 | ]
758 | },
759 | "execution_count": 13,
760 | "metadata": {},
761 | "output_type": "execute_result"
762 | }
763 | ],
764 | "source": [
765 | "from scipy.stats import chi2_contingency\n",
766 | "chi2,p,dof,tb1=chi2_contingency(obs)\n",
767 | "chi2"
768 | ]
769 | },
770 | {
771 | "cell_type": "code",
772 | "execution_count": 14,
773 | "metadata": {},
774 | "outputs": [
775 | {
776 | "data": {
777 | "text/plain": [
778 | "0.3065178579178871"
779 | ]
780 | },
781 | "execution_count": 14,
782 | "metadata": {},
783 | "output_type": "execute_result"
784 | }
785 | ],
786 | "source": [
787 | "p"
788 | ]
789 | },
790 | {
791 | "cell_type": "code",
792 | "execution_count": 15,
793 | "metadata": {},
794 | "outputs": [
795 | {
796 | "data": {
797 | "text/plain": [
798 | "2"
799 | ]
800 | },
801 | "execution_count": 15,
802 | "metadata": {},
803 | "output_type": "execute_result"
804 | }
805 | ],
806 | "source": [
807 | "dof"
808 | ]
809 | },
810 | {
811 | "cell_type": "code",
812 | "execution_count": 16,
813 | "metadata": {},
814 | "outputs": [
815 | {
816 | "data": {
817 | "text/plain": [
818 | "array([[ 8.12, 12.76, 8.12],\n",
819 | " [ 5.88, 9.24, 5.88]])"
820 | ]
821 | },
822 | "execution_count": 16,
823 | "metadata": {},
824 | "output_type": "execute_result"
825 | }
826 | ],
827 | "source": [
828 | "tb1 #contingency table of exp f"
829 | ]
830 | },
831 | {
832 | "cell_type": "code",
833 | "execution_count": 37,
834 | "metadata": {},
835 | "outputs": [
836 | {
837 | "data": {
838 | "text/html": [
839 | "\n",
840 | "\n",
853 | "
\n",
854 | " \n",
855 | " \n",
856 | " | \n",
857 | " Arrivals | \n",
858 | " Frequency | \n",
859 | "
\n",
860 | " \n",
861 | " \n",
862 | " \n",
863 | " | 0 | \n",
864 | " 0 | \n",
865 | " 0 | \n",
866 | "
\n",
867 | " \n",
868 | " | 1 | \n",
869 | " 1 | \n",
870 | " 1 | \n",
871 | "
\n",
872 | " \n",
873 | " | 2 | \n",
874 | " 2 | \n",
875 | " 4 | \n",
876 | "
\n",
877 | " \n",
878 | " | 3 | \n",
879 | " 3 | \n",
880 | " 10 | \n",
881 | "
\n",
882 | " \n",
883 | " | 4 | \n",
884 | " 4 | \n",
885 | " 14 | \n",
886 | "
\n",
887 | " \n",
888 | " | 5 | \n",
889 | " 5 | \n",
890 | " 20 | \n",
891 | "
\n",
892 | " \n",
893 | " | 6 | \n",
894 | " 6 | \n",
895 | " 12 | \n",
896 | "
\n",
897 | " \n",
898 | " | 7 | \n",
899 | " 7 | \n",
900 | " 12 | \n",
901 | "
\n",
902 | " \n",
903 | " | 8 | \n",
904 | " 8 | \n",
905 | " 9 | \n",
906 | "
\n",
907 | " \n",
908 | " | 9 | \n",
909 | " 9 | \n",
910 | " 8 | \n",
911 | "
\n",
912 | " \n",
913 | " | 10 | \n",
914 | " 10 | \n",
915 | " 6 | \n",
916 | "
\n",
917 | " \n",
918 | " | 11 | \n",
919 | " 11 | \n",
920 | " 3 | \n",
921 | "
\n",
922 | " \n",
923 | " | 12 | \n",
924 | " 12 | \n",
925 | " 1 | \n",
926 | "
\n",
927 | " \n",
928 | "
\n",
929 | "
"
930 | ],
931 | "text/plain": [
932 | " Arrivals Frequency\n",
933 | "0 0 0\n",
934 | "1 1 1\n",
935 | "2 2 4\n",
936 | "3 3 10\n",
937 | "4 4 14\n",
938 | "5 5 20\n",
939 | "6 6 12\n",
940 | "7 7 12\n",
941 | "8 8 9\n",
942 | "9 9 8\n",
943 | "10 10 6\n",
944 | "11 11 3\n",
945 | "12 12 1"
946 | ]
947 | },
948 | "execution_count": 37,
949 | "metadata": {},
950 | "output_type": "execute_result"
951 | }
952 | ],
953 | "source": [
954 | "import matplotlib as mp\n",
955 | "import numpy as np\n",
956 | "from scipy.optimize import minimize\n",
957 | "import scipy.stats as stats\n",
958 | "from scipy import stats\n",
959 | "import statsmodels.api as sm\n",
960 | "from statsmodels.formula.api import ols\n",
961 | "from matplotlib import pyplot as plt\n",
962 | "import pandas as pd\n",
963 | "import numpy as np\n",
964 | "import math\n",
965 | "import scipy\n",
966 | "from scipy import stats\n",
967 | "import pandas as pd\n",
968 | "import numpy as np\n",
969 | "import math\n",
970 | "import scipy\n",
971 | "from scipy import stats\n",
972 | "from scipy import stats\n",
973 | "import statsmodels.api as sm\n",
974 | "\n",
975 | "import statsmodels.formula.api \n",
976 | "import statsmodels.formula.api as smf\n",
977 | "from statsmodels.formula.api import ols\n",
978 | "from matplotlib import pyplot as plt\n",
979 | "from scipy.stats import chi2\n",
980 | "from scipy.stats import poisson\n",
981 | "data=pd.read_excel('C:/Users/Garima Singh/Desktop/mooc data analysis/P_distribution.xlsx')\n",
982 | "data"
983 | ]
984 | },
985 | {
986 | "cell_type": "code",
987 | "execution_count": 38,
988 | "metadata": {},
989 | "outputs": [
990 | {
991 | "data": {
992 | "text/plain": [
993 | "0 0\n",
994 | "1 1\n",
995 | "2 4\n",
996 | "3 10\n",
997 | "4 14\n",
998 | "5 20\n",
999 | "6 12\n",
1000 | "7 12\n",
1001 | "8 9\n",
1002 | "9 8\n",
1003 | "10 6\n",
1004 | "11 3\n",
1005 | "12 1\n",
1006 | "Name: Frequency, dtype: int64"
1007 | ]
1008 | },
1009 | "execution_count": 38,
1010 | "metadata": {},
1011 | "output_type": "execute_result"
1012 | }
1013 | ],
1014 | "source": [
1015 | "obs_freq=data['Frequency']\n",
1016 | "obs_freq"
1017 | ]
1018 | },
1019 | {
1020 | "cell_type": "code",
1021 | "execution_count": 39,
1022 | "metadata": {},
1023 | "outputs": [
1024 | {
1025 | "data": {
1026 | "text/plain": [
1027 | "6.0"
1028 | ]
1029 | },
1030 | "execution_count": 39,
1031 | "metadata": {},
1032 | "output_type": "execute_result"
1033 | }
1034 | ],
1035 | "source": [
1036 | "total_arrival=600\n",
1037 | "total_time=100\n",
1038 | "mu=total_arrival/total_time\n",
1039 | "mu"
1040 | ]
1041 | },
1042 | {
1043 | "cell_type": "code",
1044 | "execution_count": 40,
1045 | "metadata": {},
1046 | "outputs": [],
1047 | "source": [
1048 | "#finding expected f\n",
1049 | "exp_freq=[]\n",
1050 | "for i in range(len(obs_freq)):\n",
1051 | " e_freq=100*poisson.pmf(i,mu)\n",
1052 | " exp_freq.append(e_freq)"
1053 | ]
1054 | },
1055 | {
1056 | "cell_type": "code",
1057 | "execution_count": 41,
1058 | "metadata": {},
1059 | "outputs": [
1060 | {
1061 | "data": {
1062 | "text/plain": [
1063 | "[0.24787521766663584,\n",
1064 | " 1.4872513059998145,\n",
1065 | " 4.461753917999444,\n",
1066 | " 8.923507835998894,\n",
1067 | " 13.385261753998332,\n",
1068 | " 16.062314104797995,\n",
1069 | " 16.06231410479801,\n",
1070 | " 13.767697804112569,\n",
1071 | " 10.32577335308442,\n",
1072 | " 6.883848902056284,\n",
1073 | " 4.130309341233764,\n",
1074 | " 2.2528960043093247,\n",
1075 | " 1.1264480021546681]"
1076 | ]
1077 | },
1078 | "execution_count": 41,
1079 | "metadata": {},
1080 | "output_type": "execute_result"
1081 | }
1082 | ],
1083 | "source": [
1084 | "exp_freq"
1085 | ]
1086 | },
1087 | {
1088 | "cell_type": "code",
1089 | "execution_count": 42,
1090 | "metadata": {},
1091 | "outputs": [
1092 | {
1093 | "data": {
1094 | "text/plain": [
1095 | "[0.25,\n",
1096 | " 1.49,\n",
1097 | " 4.46,\n",
1098 | " 8.92,\n",
1099 | " 13.39,\n",
1100 | " 16.06,\n",
1101 | " 16.06,\n",
1102 | " 13.77,\n",
1103 | " 10.33,\n",
1104 | " 6.88,\n",
1105 | " 4.13,\n",
1106 | " 2.25,\n",
1107 | " 1.13]"
1108 | ]
1109 | },
1110 | "execution_count": 42,
1111 | "metadata": {},
1112 | "output_type": "execute_result"
1113 | }
1114 | ],
1115 | "source": [
1116 | "exp_freq_round=[round(elem,2) for elem in exp_freq]\n",
1117 | "exp_freq_round #rounding nos"
1118 | ]
1119 | },
1120 | {
1121 | "cell_type": "code",
1122 | "execution_count": 43,
1123 | "metadata": {},
1124 | "outputs": [
1125 | {
1126 | "data": {
1127 | "text/html": [
1128 | "\n",
1129 | "\n",
1142 | "
\n",
1143 | " \n",
1144 | " \n",
1145 | " | \n",
1146 | " Obs freq | \n",
1147 | " Exp freq | \n",
1148 | "
\n",
1149 | " \n",
1150 | " \n",
1151 | " \n",
1152 | " | 0 | \n",
1153 | " 0 | \n",
1154 | " 0.25 | \n",
1155 | "
\n",
1156 | " \n",
1157 | " | 1 | \n",
1158 | " 1 | \n",
1159 | " 1.49 | \n",
1160 | "
\n",
1161 | " \n",
1162 | " | 2 | \n",
1163 | " 4 | \n",
1164 | " 4.46 | \n",
1165 | "
\n",
1166 | " \n",
1167 | " | 3 | \n",
1168 | " 10 | \n",
1169 | " 8.92 | \n",
1170 | "
\n",
1171 | " \n",
1172 | " | 4 | \n",
1173 | " 14 | \n",
1174 | " 13.39 | \n",
1175 | "
\n",
1176 | " \n",
1177 | " | 5 | \n",
1178 | " 20 | \n",
1179 | " 16.06 | \n",
1180 | "
\n",
1181 | " \n",
1182 | " | 6 | \n",
1183 | " 12 | \n",
1184 | " 16.06 | \n",
1185 | "
\n",
1186 | " \n",
1187 | " | 7 | \n",
1188 | " 12 | \n",
1189 | " 13.77 | \n",
1190 | "
\n",
1191 | " \n",
1192 | " | 8 | \n",
1193 | " 9 | \n",
1194 | " 10.33 | \n",
1195 | "
\n",
1196 | " \n",
1197 | " | 9 | \n",
1198 | " 8 | \n",
1199 | " 6.88 | \n",
1200 | "
\n",
1201 | " \n",
1202 | " | 10 | \n",
1203 | " 6 | \n",
1204 | " 4.13 | \n",
1205 | "
\n",
1206 | " \n",
1207 | " | 11 | \n",
1208 | " 3 | \n",
1209 | " 2.25 | \n",
1210 | "
\n",
1211 | " \n",
1212 | " | 12 | \n",
1213 | " 1 | \n",
1214 | " 1.13 | \n",
1215 | "
\n",
1216 | " \n",
1217 | "
\n",
1218 | "
"
1219 | ],
1220 | "text/plain": [
1221 | " Obs freq Exp freq\n",
1222 | "0 0 0.25\n",
1223 | "1 1 1.49\n",
1224 | "2 4 4.46\n",
1225 | "3 10 8.92\n",
1226 | "4 14 13.39\n",
1227 | "5 20 16.06\n",
1228 | "6 12 16.06\n",
1229 | "7 12 13.77\n",
1230 | "8 9 10.33\n",
1231 | "9 8 6.88\n",
1232 | "10 6 4.13\n",
1233 | "11 3 2.25\n",
1234 | "12 1 1.13"
1235 | ]
1236 | },
1237 | "execution_count": 43,
1238 | "metadata": {},
1239 | "output_type": "execute_result"
1240 | }
1241 | ],
1242 | "source": [
1243 | "df=pd.DataFrame(list(zip(obs_freq,exp_freq_round)),columns=['Obs freq','Exp freq'])\n",
1244 | "df"
1245 | ]
1246 | },
1247 | {
1248 | "cell_type": "code",
1249 | "execution_count": 44,
1250 | "metadata": {},
1251 | "outputs": [],
1252 | "source": [
1253 | "obs_freq=[5,10,14,20,12,12,9,8,10]\n",
1254 | "exp_freq=[6.2,5,6,7,8,9,7,6,8]\n"
1255 | ]
1256 | },
1257 | {
1258 | "cell_type": "code",
1259 | "execution_count": 45,
1260 | "metadata": {},
1261 | "outputs": [
1262 | {
1263 | "data": {
1264 | "text/plain": [
1265 | "Power_divergenceResult(statistic=44.77987711213518, pvalue=4.050901717224414e-07)"
1266 | ]
1267 | },
1268 | "execution_count": 45,
1269 | "metadata": {},
1270 | "output_type": "execute_result"
1271 | }
1272 | ],
1273 | "source": [
1274 | " scipy.stats.chisquare(obs_freq,exp_freq) # gives chi square cal and p value"
1275 | ]
1276 | },
1277 | {
1278 | "cell_type": "code",
1279 | "execution_count": 46,
1280 | "metadata": {},
1281 | "outputs": [
1282 | {
1283 | "data": {
1284 | "text/plain": [
1285 | "14.067140449340169"
1286 | ]
1287 | },
1288 | "execution_count": 46,
1289 | "metadata": {},
1290 | "output_type": "execute_result"
1291 | }
1292 | ],
1293 | "source": [
1294 | "from scipy.stats import chi2\n",
1295 | "chi2.ppf(0.95,7) #gives table chi square value"
1296 | ]
1297 | },
1298 | {
1299 | "cell_type": "code",
1300 | "execution_count": 47,
1301 | "metadata": {},
1302 | "outputs": [
1303 | {
1304 | "data": {
1305 | "text/plain": [
1306 | "5032.2"
1307 | ]
1308 | },
1309 | "execution_count": 47,
1310 | "metadata": {},
1311 | "output_type": "execute_result"
1312 | }
1313 | ],
1314 | "source": [
1315 | "#uniform gof test\n",
1316 | "x=[13,343,3432,234,123,45345,23,233,342,234]\n",
1317 | "np.mean(x)"
1318 | ]
1319 | },
1320 | {
1321 | "cell_type": "code",
1322 | "execution_count": 48,
1323 | "metadata": {},
1324 | "outputs": [
1325 | {
1326 | "data": {
1327 | "text/plain": [
1328 | "Power_divergenceResult(statistic=360703.37458765553, pvalue=0.0)"
1329 | ]
1330 | },
1331 | "execution_count": 48,
1332 | "metadata": {},
1333 | "output_type": "execute_result"
1334 | }
1335 | ],
1336 | "source": [
1337 | "exp_f=[5032.2,5032.2,5032.2,5032.2,5032.2,5032.2,5032.2,5032.2,5032.2,5032.2]\n",
1338 | "from scipy.stats import chisquare\n",
1339 | "chisquare(x,exp_f)"
1340 | ]
1341 | },
1342 | {
1343 | "cell_type": "code",
1344 | "execution_count": 49,
1345 | "metadata": {},
1346 | "outputs": [],
1347 | "source": [
1348 | "#normal dis gof test\n",
1349 | "a=[1,2,3,4,5,6,7,8,9]\n",
1350 | "mean=np.mean(a)"
1351 | ]
1352 | },
1353 | {
1354 | "cell_type": "code",
1355 | "execution_count": 50,
1356 | "metadata": {},
1357 | "outputs": [
1358 | {
1359 | "data": {
1360 | "text/plain": [
1361 | "5.0"
1362 | ]
1363 | },
1364 | "execution_count": 50,
1365 | "metadata": {},
1366 | "output_type": "execute_result"
1367 | }
1368 | ],
1369 | "source": [
1370 | "mean"
1371 | ]
1372 | },
1373 | {
1374 | "cell_type": "code",
1375 | "execution_count": 51,
1376 | "metadata": {},
1377 | "outputs": [],
1378 | "source": [
1379 | "std=np.std(a)"
1380 | ]
1381 | },
1382 | {
1383 | "cell_type": "code",
1384 | "execution_count": 52,
1385 | "metadata": {},
1386 | "outputs": [
1387 | {
1388 | "data": {
1389 | "text/plain": [
1390 | "2.581988897471611"
1391 | ]
1392 | },
1393 | "execution_count": 52,
1394 | "metadata": {},
1395 | "output_type": "execute_result"
1396 | }
1397 | ],
1398 | "source": [
1399 | "std"
1400 | ]
1401 | },
1402 | {
1403 | "cell_type": "code",
1404 | "execution_count": 54,
1405 | "metadata": {},
1406 | "outputs": [],
1407 | "source": [
1408 | "x=1/6\n",
1409 | "for j in range(1,6):\n",
1410 | " prob_int=[scipy.stats.norm.ppf(j*x,mean,std)]\n",
1411 | " "
1412 | ]
1413 | },
1414 | {
1415 | "cell_type": "code",
1416 | "execution_count": 55,
1417 | "metadata": {},
1418 | "outputs": [
1419 | {
1420 | "data": {
1421 | "text/plain": [
1422 | "[7.49787174284919]"
1423 | ]
1424 | },
1425 | "execution_count": 55,
1426 | "metadata": {},
1427 | "output_type": "execute_result"
1428 | }
1429 | ],
1430 | "source": [
1431 | "prob_int"
1432 | ]
1433 | },
1434 | {
1435 | "cell_type": "code",
1436 | "execution_count": 56,
1437 | "metadata": {},
1438 | "outputs": [],
1439 | "source": [
1440 | "exp_freq=[5,5,5,5,5,5]\n",
1441 | "obs_freq=[6,3,6,5,4,6]"
1442 | ]
1443 | },
1444 | {
1445 | "cell_type": "code",
1446 | "execution_count": 59,
1447 | "metadata": {},
1448 | "outputs": [
1449 | {
1450 | "data": {
1451 | "text/plain": [
1452 | "Power_divergenceResult(statistic=1.5999999999999999, pvalue=0.9012493445012737)"
1453 | ]
1454 | },
1455 | "execution_count": 59,
1456 | "metadata": {},
1457 | "output_type": "execute_result"
1458 | }
1459 | ],
1460 | "source": [
1461 | "scipy.stats.chisquare(obs_freq,exp_freq)"
1462 | ]
1463 | },
1464 | {
1465 | "cell_type": "code",
1466 | "execution_count": 60,
1467 | "metadata": {},
1468 | "outputs": [
1469 | {
1470 | "data": {
1471 | "text/plain": [
1472 | "7.814727903251179"
1473 | ]
1474 | },
1475 | "execution_count": 60,
1476 | "metadata": {},
1477 | "output_type": "execute_result"
1478 | }
1479 | ],
1480 | "source": [
1481 | "chi2.ppf(0.95,3) #table chi square value"
1482 | ]
1483 | },
1484 | {
1485 | "cell_type": "code",
1486 | "execution_count": null,
1487 | "metadata": {},
1488 | "outputs": [],
1489 | "source": []
1490 | }
1491 | ],
1492 | "metadata": {
1493 | "kernelspec": {
1494 | "display_name": "Python 3",
1495 | "language": "python",
1496 | "name": "python3"
1497 | },
1498 | "language_info": {
1499 | "codemirror_mode": {
1500 | "name": "ipython",
1501 | "version": 3
1502 | },
1503 | "file_extension": ".py",
1504 | "mimetype": "text/x-python",
1505 | "name": "python",
1506 | "nbconvert_exporter": "python",
1507 | "pygments_lexer": "ipython3",
1508 | "version": "3.7.4"
1509 | }
1510 | },
1511 | "nbformat": 4,
1512 | "nbformat_minor": 2
1513 | }
1514 |
--------------------------------------------------------------------------------
/Week5.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 86,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "import math\n",
12 | "import scipy\n",
13 | "from scipy import stats"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 87,
19 | "metadata": {},
20 | "outputs": [
21 | {
22 | "data": {
23 | "text/plain": [
24 | "-3.3724679378582554"
25 | ]
26 | },
27 | "execution_count": 87,
28 | "metadata": {},
29 | "output_type": "execute_result"
30 | }
31 | ],
32 | "source": [
33 | "stats.t.ppf(0.0025,13)"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 88,
39 | "metadata": {},
40 | "outputs": [
41 | {
42 | "data": {
43 | "text/plain": [
44 | "Ttest_indResult(statistic=-6.550432296063072, pvalue=0.0004203925153912486)"
45 | ]
46 | },
47 | "execution_count": 88,
48 | "metadata": {},
49 | "output_type": "execute_result"
50 | }
51 | ],
52 | "source": [
53 | "metro=[3,7,25,10,15]\n",
54 | "rural=[48,44,40,38,35]\n",
55 | "stats.ttest_ind(metro,rural,equal_var=False)\n",
56 | " "
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": 89,
62 | "metadata": {},
63 | "outputs": [
64 | {
65 | "data": {
66 | "text/plain": [
67 | "Ttest_relResult(statistic=-5.303497930706049, pvalue=0.0060728031285194545)"
68 | ]
69 | },
70 | "execution_count": 89,
71 | "metadata": {},
72 | "output_type": "execute_result"
73 | }
74 | ],
75 | "source": [
76 | "metro=[3,7,25,10,15]\n",
77 | "rural=[48,44,40,38,35]\n",
78 | "stats.ttest_rel(metro,rural)\n",
79 | " "
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 90,
85 | "metadata": {},
86 | "outputs": [],
87 | "source": [
88 | "import math\n",
89 | "def two_samp_proportion(p1,p2,n1,n2):\n",
90 | " p_pool=((p1*n1)+(p2*n2))/(n1+n2)\n",
91 | " x=(p_pool*(1-p_pool)*((1/n1)+(1/n2)))\n",
92 | " s=math.sqrt(x)\n",
93 | " z=(p1-p2)/s\n",
94 | " if(z<0):\n",
95 | " p_val=stats.norm.cdf(z)\n",
96 | " else:\n",
97 | " p_val=1-stats.norm.cdf(z)\n",
98 | " return z, p_val*2"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": 91,
104 | "metadata": {},
105 | "outputs": [
106 | {
107 | "data": {
108 | "text/plain": [
109 | "(1.3442056254198995, 0.17888190308175567)"
110 | ]
111 | },
112 | "execution_count": 91,
113 | "metadata": {},
114 | "output_type": "execute_result"
115 | }
116 | ],
117 | "source": [
118 | "two_samp_proportion(0.27,0.19,100,100)"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 92,
124 | "metadata": {},
125 | "outputs": [
126 | {
127 | "data": {
128 | "text/plain": [
129 | "0.910558947366134"
130 | ]
131 | },
132 | "execution_count": 92,
133 | "metadata": {},
134 | "output_type": "execute_result"
135 | }
136 | ],
137 | "source": [
138 | "stats.norm.cdf(1.344205)"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": 93,
144 | "metadata": {},
145 | "outputs": [
146 | {
147 | "data": {
148 | "text/plain": [
149 | "2.8450165269958436"
150 | ]
151 | },
152 | "execution_count": 93,
153 | "metadata": {},
154 | "output_type": "execute_result"
155 | }
156 | ],
157 | "source": [
158 | "\n",
159 | "from scipy import stats\n",
160 | "from scipy.stats import f\n",
161 | "from scipy.stats import poisson\n",
162 | "scipy.stats.f.ppf(q=1-0.05, dfn=15, dfd=10)"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": 94,
168 | "metadata": {},
169 | "outputs": [],
170 | "source": [
171 | "f1=scipy.stats.f.ppf(q=0.05, dfn=15, dfd=15)"
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "execution_count": 95,
177 | "metadata": {},
178 | "outputs": [],
179 | "source": [
180 | "x=[3,9,3,4,5,6]\n",
181 | "y=[1,2,3,4,5,6]\n",
182 | "f=np.var(x)/np.var(y)"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 96,
188 | "metadata": {},
189 | "outputs": [],
190 | "source": [
191 | "dfn=len(x)-1\n",
192 | "dfd=len(y)-1"
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": 97,
198 | "metadata": {},
199 | "outputs": [],
200 | "source": [
201 | "p_value=scipy.stats.f.cdf(f,dfn,dfd)"
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": 98,
207 | "metadata": {},
208 | "outputs": [
209 | {
210 | "data": {
211 | "text/plain": [
212 | "0.6627143533357427"
213 | ]
214 | },
215 | "execution_count": 98,
216 | "metadata": {},
217 | "output_type": "execute_result"
218 | }
219 | ],
220 | "source": [
221 | "p_value\n"
222 | ]
223 | },
224 | {
225 | "cell_type": "code",
226 | "execution_count": 99,
227 | "metadata": {},
228 | "outputs": [],
229 | "source": [
230 | "def samplesize(alfa,beta,mu1,mu2,sigma):\n",
231 | " z1=-1*stats.norm.ppf(alfa)\n",
232 | " z2=-1*stats.norm.ppf(beta)\n",
233 | " n=((((z1+z2)**2)*(sigma**2))/(mu1-mu2)**2)\n",
234 | " print(n)"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": 100,
240 | "metadata": {},
241 | "outputs": [
242 | {
243 | "name": "stdout",
244 | "output_type": "stream",
245 | "text": [
246 | "9.302043647889692\n"
247 | ]
248 | }
249 | ],
250 | "source": [
251 | "samplesize(0.05,0.08,5,7,2)"
252 | ]
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": 101,
257 | "metadata": {},
258 | "outputs": [],
259 | "source": [
260 | "a=[4,3,2]\n",
261 | "b=[2,4,6]\n",
262 | "c=[2,1,3]"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": 102,
268 | "metadata": {},
269 | "outputs": [
270 | {
271 | "data": {
272 | "text/plain": [
273 | "F_onewayResult(statistic=1.5, pvalue=0.2962962962962962)"
274 | ]
275 | },
276 | "execution_count": 102,
277 | "metadata": {},
278 | "output_type": "execute_result"
279 | }
280 | ],
281 | "source": [
282 | "stats.f_oneway(a,b,c)"
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "execution_count": 103,
288 | "metadata": {},
289 | "outputs": [],
290 | "source": [
291 | "from scipy import stats\n",
292 | "import statsmodels.api as sm\n",
293 | "from statsmodels.formula.api import ols\n",
294 | "from matplotlib import pyplot as plt"
295 | ]
296 | },
297 | {
298 | "cell_type": "code",
299 | "execution_count": 104,
300 | "metadata": {},
301 | "outputs": [],
302 | "source": [
303 | "data=pd.read_excel('C:/Users/Garima Singh/Desktop/mooc data analysis/oneway.xlsx')"
304 | ]
305 | },
306 | {
307 | "cell_type": "code",
308 | "execution_count": 105,
309 | "metadata": {},
310 | "outputs": [
311 | {
312 | "data": {
313 | "text/html": [
314 | "\n",
315 | "\n",
328 | "
\n",
329 | " \n",
330 | " \n",
331 | " | \n",
332 | " Black Board | \n",
333 | " Case Presentation | \n",
334 | " PPT | \n",
335 | "
\n",
336 | " \n",
337 | " \n",
338 | " \n",
339 | " | 0 | \n",
340 | " 4 | \n",
341 | " 2 | \n",
342 | " 2 | \n",
343 | "
\n",
344 | " \n",
345 | " | 1 | \n",
346 | " 3 | \n",
347 | " 4 | \n",
348 | " 1 | \n",
349 | "
\n",
350 | " \n",
351 | " | 2 | \n",
352 | " 2 | \n",
353 | " 6 | \n",
354 | " 3 | \n",
355 | "
\n",
356 | " \n",
357 | "
\n",
358 | "
"
359 | ],
360 | "text/plain": [
361 | " Black Board Case Presentation PPT \n",
362 | "0 4 2 2\n",
363 | "1 3 4 1\n",
364 | "2 2 6 3"
365 | ]
366 | },
367 | "execution_count": 105,
368 | "metadata": {},
369 | "output_type": "execute_result"
370 | }
371 | ],
372 | "source": [
373 | "data"
374 | ]
375 | },
376 | {
377 | "cell_type": "code",
378 | "execution_count": 130,
379 | "metadata": {},
380 | "outputs": [
381 | {
382 | "data": {
383 | "text/html": [
384 | "\n",
385 | "\n",
398 | "
\n",
399 | " \n",
400 | " \n",
401 | " | \n",
402 | " Black Board | \n",
403 | " Case Presentation | \n",
404 | " PPT | \n",
405 | "
\n",
406 | " \n",
407 | " \n",
408 | " \n",
409 | " | 0 | \n",
410 | " 4 | \n",
411 | " 2 | \n",
412 | " 2 | \n",
413 | "
\n",
414 | " \n",
415 | " | 1 | \n",
416 | " 3 | \n",
417 | " 4 | \n",
418 | " 1 | \n",
419 | "
\n",
420 | " \n",
421 | " | 2 | \n",
422 | " 2 | \n",
423 | " 6 | \n",
424 | " 3 | \n",
425 | "
\n",
426 | " \n",
427 | "
\n",
428 | "
"
429 | ],
430 | "text/plain": [
431 | " Black Board Case Presentation PPT\n",
432 | "0 4 2 2\n",
433 | "1 3 4 1\n",
434 | "2 2 6 3"
435 | ]
436 | },
437 | "execution_count": 130,
438 | "metadata": {},
439 | "output_type": "execute_result"
440 | }
441 | ],
442 | "source": [
443 | "data = pd.DataFrame({'Black Board': {0: 4, 1: 3, 2: 2}, \n",
444 | " 'Case Presentation': {0: 2, 1: 4, 2: 6}, \n",
445 | " 'PPT': {0: 2, 1: 1, 2: 3}}) \n",
446 | "data"
447 | ]
448 | },
449 | {
450 | "cell_type": "code",
451 | "execution_count": 131,
452 | "metadata": {},
453 | "outputs": [
454 | {
455 | "name": "stdout",
456 | "output_type": "stream",
457 | "text": [
458 | " Black Board variable value\n",
459 | "0 4 Case Presentation 2\n",
460 | "1 3 Case Presentation 4\n",
461 | "2 2 Case Presentation 6\n",
462 | "3 4 PPT 2\n",
463 | "4 3 PPT 1\n",
464 | "5 2 PPT 3\n"
465 | ]
466 | }
467 | ],
468 | "source": [
469 | "datanew=pd.melt(data, id_vars=['Black Board'], value_vars=['Case Presentation','PPT'])\n",
470 | "print(datanew)"
471 | ]
472 | },
473 | {
474 | "cell_type": "code",
475 | "execution_count": 132,
476 | "metadata": {},
477 | "outputs": [],
478 | "source": [
479 | "datanew.columns=['index','treatments','value']"
480 | ]
481 | },
482 | {
483 | "cell_type": "code",
484 | "execution_count": 133,
485 | "metadata": {},
486 | "outputs": [
487 | {
488 | "data": {
489 | "text/html": [
490 | "\n",
491 | "\n",
504 | "
\n",
505 | " \n",
506 | " \n",
507 | " | \n",
508 | " index | \n",
509 | " treatments | \n",
510 | " value | \n",
511 | "
\n",
512 | " \n",
513 | " \n",
514 | " \n",
515 | " | 0 | \n",
516 | " 4 | \n",
517 | " Case Presentation | \n",
518 | " 2 | \n",
519 | "
\n",
520 | " \n",
521 | " | 1 | \n",
522 | " 3 | \n",
523 | " Case Presentation | \n",
524 | " 4 | \n",
525 | "
\n",
526 | " \n",
527 | " | 2 | \n",
528 | " 2 | \n",
529 | " Case Presentation | \n",
530 | " 6 | \n",
531 | "
\n",
532 | " \n",
533 | " | 3 | \n",
534 | " 4 | \n",
535 | " PPT | \n",
536 | " 2 | \n",
537 | "
\n",
538 | " \n",
539 | " | 4 | \n",
540 | " 3 | \n",
541 | " PPT | \n",
542 | " 1 | \n",
543 | "
\n",
544 | " \n",
545 | " | 5 | \n",
546 | " 2 | \n",
547 | " PPT | \n",
548 | " 3 | \n",
549 | "
\n",
550 | " \n",
551 | "
\n",
552 | "
"
553 | ],
554 | "text/plain": [
555 | " index treatments value\n",
556 | "0 4 Case Presentation 2\n",
557 | "1 3 Case Presentation 4\n",
558 | "2 2 Case Presentation 6\n",
559 | "3 4 PPT 2\n",
560 | "4 3 PPT 1\n",
561 | "5 2 PPT 3"
562 | ]
563 | },
564 | "execution_count": 133,
565 | "metadata": {},
566 | "output_type": "execute_result"
567 | }
568 | ],
569 | "source": [
570 | "datanew"
571 | ]
572 | },
573 | {
574 | "cell_type": "code",
575 | "execution_count": 149,
576 | "metadata": {},
577 | "outputs": [],
578 | "source": [
579 | "model=ols('value~C(treatments)',data=datanew).fit()"
580 | ]
581 | },
582 | {
583 | "cell_type": "code",
584 | "execution_count": 148,
585 | "metadata": {},
586 | "outputs": [
587 | {
588 | "name": "stderr",
589 | "output_type": "stream",
590 | "text": [
591 | "C:\\Users\\Garima Singh\\Anaconda3\\lib\\site-packages\\statsmodels\\stats\\stattools.py:71: ValueWarning: omni_normtest is not valid with less than 8 observations; 6 samples were given.\n",
592 | " \"samples were given.\" % int(n), ValueWarning)\n"
593 | ]
594 | },
595 | {
596 | "data": {
597 | "text/html": [
598 | "\n",
599 | "OLS Regression Results\n",
600 | "\n",
601 | " | Dep. Variable: | value | R-squared: | 0.375 | \n",
602 | "
\n",
603 | "\n",
604 | " | Model: | OLS | Adj. R-squared: | 0.219 | \n",
605 | "
\n",
606 | "\n",
607 | " | Method: | Least Squares | F-statistic: | 2.400 | \n",
608 | "
\n",
609 | "\n",
610 | " | Date: | Sun, 29 Mar 2020 | Prob (F-statistic): | 0.196 | \n",
611 | "
\n",
612 | "\n",
613 | " | Time: | 03:24:15 | Log-Likelihood: | -10.046 | \n",
614 | "
\n",
615 | "\n",
616 | " | No. Observations: | 6 | AIC: | 24.09 | \n",
617 | "
\n",
618 | "\n",
619 | " | Df Residuals: | 4 | BIC: | 23.68 | \n",
620 | "
\n",
621 | "\n",
622 | " | Df Model: | 1 | | | \n",
623 | "
\n",
624 | "\n",
625 | " | Covariance Type: | nonrobust | | | \n",
626 | "
\n",
627 | "
\n",
628 | "\n",
629 | "\n",
630 | " | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
631 | "
\n",
632 | "\n",
633 | " | Intercept | 4.0000 | 0.913 | 4.382 | 0.012 | 1.465 | 6.535 | \n",
634 | "
\n",
635 | "\n",
636 | " | C(treatments)[T.PPT] | -2.0000 | 1.291 | -1.549 | 0.196 | -5.584 | 1.584 | \n",
637 | "
\n",
638 | "
\n",
639 | "\n",
640 | "\n",
641 | " | Omnibus: | nan | Durbin-Watson: | 1.700 | \n",
642 | "
\n",
643 | "\n",
644 | " | Prob(Omnibus): | nan | Jarque-Bera (JB): | 0.230 | \n",
645 | "
\n",
646 | "\n",
647 | " | Skew: | -0.000 | Prob(JB): | 0.891 | \n",
648 | "
\n",
649 | "\n",
650 | " | Kurtosis: | 2.040 | Cond. No. | 2.62 | \n",
651 | "
\n",
652 | "
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
653 | ],
654 | "text/plain": [
655 | "\n",
656 | "\"\"\"\n",
657 | " OLS Regression Results \n",
658 | "==============================================================================\n",
659 | "Dep. Variable: value R-squared: 0.375\n",
660 | "Model: OLS Adj. R-squared: 0.219\n",
661 | "Method: Least Squares F-statistic: 2.400\n",
662 | "Date: Sun, 29 Mar 2020 Prob (F-statistic): 0.196\n",
663 | "Time: 03:24:15 Log-Likelihood: -10.046\n",
664 | "No. Observations: 6 AIC: 24.09\n",
665 | "Df Residuals: 4 BIC: 23.68\n",
666 | "Df Model: 1 \n",
667 | "Covariance Type: nonrobust \n",
668 | "========================================================================================\n",
669 | " coef std err t P>|t| [0.025 0.975]\n",
670 | "----------------------------------------------------------------------------------------\n",
671 | "Intercept 4.0000 0.913 4.382 0.012 1.465 6.535\n",
672 | "C(treatments)[T.PPT] -2.0000 1.291 -1.549 0.196 -5.584 1.584\n",
673 | "==============================================================================\n",
674 | "Omnibus: nan Durbin-Watson: 1.700\n",
675 | "Prob(Omnibus): nan Jarque-Bera (JB): 0.230\n",
676 | "Skew: -0.000 Prob(JB): 0.891\n",
677 | "Kurtosis: 2.040 Cond. No. 2.62\n",
678 | "==============================================================================\n",
679 | "\n",
680 | "Warnings:\n",
681 | "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
682 | "\"\"\""
683 | ]
684 | },
685 | "execution_count": 148,
686 | "metadata": {},
687 | "output_type": "execute_result"
688 | }
689 | ],
690 | "source": [
691 | "model.summary()"
692 | ]
693 | },
694 | {
695 | "cell_type": "markdown",
696 | "metadata": {},
697 | "source": [
698 | "model.summary()"
699 | ]
700 | },
701 | {
702 | "cell_type": "code",
703 | "execution_count": 136,
704 | "metadata": {},
705 | "outputs": [
706 | {
707 | "data": {
708 | "text/html": [
709 | "\n",
710 | "\n",
723 | "
\n",
724 | " \n",
725 | " \n",
726 | " | \n",
727 | " Name | \n",
728 | " Course | \n",
729 | " Age | \n",
730 | "
\n",
731 | " \n",
732 | " \n",
733 | " \n",
734 | " | 0 | \n",
735 | " John | \n",
736 | " Masters | \n",
737 | " 27 | \n",
738 | "
\n",
739 | " \n",
740 | " | 1 | \n",
741 | " Bob | \n",
742 | " Graduate | \n",
743 | " 23 | \n",
744 | "
\n",
745 | " \n",
746 | " | 2 | \n",
747 | " Shiela | \n",
748 | " Graduate | \n",
749 | " 21 | \n",
750 | "
\n",
751 | " \n",
752 | "
\n",
753 | "
"
754 | ],
755 | "text/plain": [
756 | " Name Course Age\n",
757 | "0 John Masters 27\n",
758 | "1 Bob Graduate 23\n",
759 | "2 Shiela Graduate 21"
760 | ]
761 | },
762 | "execution_count": 136,
763 | "metadata": {},
764 | "output_type": "execute_result"
765 | }
766 | ],
767 | "source": [
768 | "\n",
769 | "# Create a simple dataframe \n",
770 | " \n",
771 | "# importing pandas as pd \n",
772 | "import pandas as pd \n",
773 | " \n",
774 | "# creating a dataframe \n",
775 | "df = pd.DataFrame({'Name': {0: 'John', 1: 'Bob', 2: 'Shiela'}, \n",
776 | " 'Course': {0: 'Masters', 1: 'Graduate', 2: 'Graduate'}, \n",
777 | " 'Age': {0: 27, 1: 23, 2: 21}}) \n",
778 | "df "
779 | ]
780 | },
781 | {
782 | "cell_type": "code",
783 | "execution_count": 112,
784 | "metadata": {},
785 | "outputs": [
786 | {
787 | "data": {
788 | "text/html": [
789 | "\n",
790 | "\n",
803 | "
\n",
804 | " \n",
805 | " \n",
806 | " | \n",
807 | " Name | \n",
808 | " variable | \n",
809 | " value | \n",
810 | "
\n",
811 | " \n",
812 | " \n",
813 | " \n",
814 | " | 0 | \n",
815 | " John | \n",
816 | " Course | \n",
817 | " Masters | \n",
818 | "
\n",
819 | " \n",
820 | " | 1 | \n",
821 | " Bob | \n",
822 | " Course | \n",
823 | " Graduate | \n",
824 | "
\n",
825 | " \n",
826 | " | 2 | \n",
827 | " Shiela | \n",
828 | " Course | \n",
829 | " Graduate | \n",
830 | "
\n",
831 | " \n",
832 | "
\n",
833 | "
"
834 | ],
835 | "text/plain": [
836 | " Name variable value\n",
837 | "0 John Course Masters\n",
838 | "1 Bob Course Graduate\n",
839 | "2 Shiela Course Graduate"
840 | ]
841 | },
842 | "execution_count": 112,
843 | "metadata": {},
844 | "output_type": "execute_result"
845 | }
846 | ],
847 | "source": [
848 | "# Name is id_vars and Course is value_vars \n",
849 | "pd.melt(df, id_vars =['Name'], value_vars =['Course']) "
850 | ]
851 | },
852 | {
853 | "cell_type": "code",
854 | "execution_count": 113,
855 | "metadata": {},
856 | "outputs": [],
857 | "source": [
858 | "anova_table=sm.stats.anova_lm(model,type=1)"
859 | ]
860 | },
861 | {
862 | "cell_type": "code",
863 | "execution_count": 114,
864 | "metadata": {},
865 | "outputs": [
866 | {
867 | "data": {
868 | "text/html": [
869 | "\n",
870 | "\n",
883 | "
\n",
884 | " \n",
885 | " \n",
886 | " | \n",
887 | " df | \n",
888 | " sum_sq | \n",
889 | " mean_sq | \n",
890 | " F | \n",
891 | " PR(>F) | \n",
892 | "
\n",
893 | " \n",
894 | " \n",
895 | " \n",
896 | " | C(treatments) | \n",
897 | " 1.0 | \n",
898 | " 6.0 | \n",
899 | " 6.0 | \n",
900 | " 2.4 | \n",
901 | " 0.196261 | \n",
902 | "
\n",
903 | " \n",
904 | " | Residual | \n",
905 | " 4.0 | \n",
906 | " 10.0 | \n",
907 | " 2.5 | \n",
908 | " NaN | \n",
909 | " NaN | \n",
910 | "
\n",
911 | " \n",
912 | "
\n",
913 | "
"
914 | ],
915 | "text/plain": [
916 | " df sum_sq mean_sq F PR(>F)\n",
917 | "C(treatments) 1.0 6.0 6.0 2.4 0.196261\n",
918 | "Residual 4.0 10.0 2.5 NaN NaN"
919 | ]
920 | },
921 | "execution_count": 114,
922 | "metadata": {},
923 | "output_type": "execute_result"
924 | }
925 | ],
926 | "source": [
927 | "anova_table"
928 | ]
929 | },
930 | {
931 | "cell_type": "code",
932 | "execution_count": 115,
933 | "metadata": {},
934 | "outputs": [],
935 | "source": [
936 | "fivep=[7,8,95,11,9,10]\n",
937 | "tenp=[74,8,95,141,0,30]\n",
938 | "fifteenp=[27,48,95,161,98,10]\n",
939 | "twentyp=[74,84,9,11,95,160]\n",
940 | "box_plot_data=[fivep,tenp,fifteenp,twentyp]"
941 | ]
942 | },
943 | {
944 | "cell_type": "code",
945 | "execution_count": 116,
946 | "metadata": {},
947 | "outputs": [
948 | {
949 | "data": {
950 | "text/plain": [
951 | "{'whiskers': [,\n",
952 | " ,\n",
953 | " ,\n",
954 | " ,\n",
955 | " ,\n",
956 | " ,\n",
957 | " ,\n",
958 | " ],\n",
959 | " 'caps': [,\n",
960 | " ,\n",
961 | " ,\n",
962 | " ,\n",
963 | " ,\n",
964 | " ,\n",
965 | " ,\n",
966 | " ],\n",
967 | " 'boxes': [,\n",
968 | " ,\n",
969 | " ,\n",
970 | " ],\n",
971 | " 'medians': [,\n",
972 | " ,\n",
973 | " ,\n",
974 | " ],\n",
975 | " 'fliers': [,\n",
976 | " ,\n",
977 | " ,\n",
978 | " ],\n",
979 | " 'means': []}"
980 | ]
981 | },
982 | "execution_count": 116,
983 | "metadata": {},
984 | "output_type": "execute_result"
985 | },
986 | {
987 | "data": {
988 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAARWUlEQVR4nO3dbYylZX3H8e+vuyhag+y6g6G70KXNahc3PmVKabWNYhtBjcsLTSA+bOwmm7YUtdqqdJMuviDRtlGrbU22QsHUrFK1hRj7QHEt2UTAwUdwtWywwgi6YwCxNSLgvy/OjR1nz+7MeZg9M9d+P8nJnPu6r3Pu/94785trrnM/pKqQJLXl5yZdgCRp/Ax3SWqQ4S5JDTLcJalBhrskNWjtpAsA2LBhQ23evHnSZUjSqnLbbbd9r6qm+q1bEeG+efNmZmZmJl2GJK0qSb51tHVOy0hSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGLRruSa5KcjjJ7QvaL03yjSR3JPnzee2XJTnUrXvpchQtSTq2pRznfjXw18CHH29I8mJgO/Dsqno4yWld+9nARcCzgF8A/iPJM6rqsXEXLkk6ukVH7lV1E3D/gubfB95VVQ93fQ537duBj1bVw1X1TeAQcM4Y65VOOEnG+tCJYdg592cAv5nkliT/meRXu/aNwD3z+s12bUdIsivJTJKZubm5IcuQ2ldViz6W2s+b85w4vyyHDfe1wDrgXOBPgGvT+1f2+5f2/W6qqr1VNV1V01NTfS+NIEljd6L8shw23GeBT1bPrcBPgA1d+xnz+m0C7h2tREnSoIYN938GzgNI8gzgCcD3gOuBi5I8MclZwBbg1nEUKklaukWPlkmyD3gRsCHJLLAHuAq4qjs88sfAjur9fXJHkmuBrwGPApd4pIwkHX+LhntVXXyUVa89Sv8rgCtGKUqSNBrPUJWkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNWjRcE9yVZLD3V2XFq774ySVZEO3nCTvT3IoyVeSPH85ipYkHdtSRu5XA+cvbExyBvA7wN3zmi+gd9/ULcAu4IOjlyhJGtSi4V5VNwH391n1XuBtQM1r2w58uHpuBk5NcvpYKtWqkWSsD0mDW/Qeqv0keSXw7ar68oIfvo3APfOWZ7u2+/q8xy56o3vOPPPMYcrQCtW7V/rikiy5r6TBDPyBapInA7uBP+u3uk9b35/eqtpbVdNVNT01NTVoGZKkYxhm5P7LwFnA46P2TcAXkpxDb6R+xry+m4B7Ry1SkjSYgUfuVfXVqjqtqjZX1WZ6gf78qvoOcD3w+u6omXOB71fVEVMykqTltZRDIfcBnwOemWQ2yc5jdP80cBdwCPg74A/GUqUkaSCLTstU1cWLrN8873kBl4xeliRpFJ6hKkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoOWcrOOq5IcTnL7vLa/SPL1JF9J8k9JTp237rIkh5J8I8lLl6twSdLRLWXkfjVw/oK2G4BtVfVs4L+AywCSnA1cBDyre83fJlkztmolSUuyaLhX1U3A/Qva/r2qHu0Wb6Z3I2yA7cBHq+rhqvomvdvtnTPGeiVJSzCOOfffBf6le74RuGfeutmuTZJ0HI0U7kl2A48CH3m8qU+3OsprdyWZSTIzNzc3ShmSpAWGDvckO4BXAK/pbowNvZH6GfO6bQLu7ff6qtpbVdNVNT01NTVsGZKkPoYK9yTnA28HXllVP5y36nrgoiRPTHIWsAW4dfQyJUmDWLtYhyT7gBcBG5LMAnvoHR3zROCGJAA3V9XvVdUdSa4FvkZvuuaSqnpsuYqXJPW3aLhX1cV9mq88Rv8rgCtGKUqSNBrPUJWkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNWjRcE9yVZLDSW6f17Y+yQ1J7uy+ruvak+T9SQ4l+UqS5y9n8ZKk/pYycr8aOH9B2zuAG6tqC3BjtwxwAb37pm4BdgEfHE+ZkqRBLBruVXUTcP+C5u3ANd3za4AL57V/uHpuBk5Ncvq4ipUkLc2wc+5Pr6r7ALqvp3XtG4F75vWb7dqOkGRXkpkkM3Nzc0OWIUnqZ9wfqKZPW/XrWFV7q2q6qqanpqbGXIYkndiGDffvPj7d0n093LXPAmfM67cJuHf48iRJwxg23K8HdnTPdwDXzWt/fXfUzLnA9x+fvlnN9u3bx7Zt21izZg3btm1j3759ky5Jko5p7WIdkuwDXgRsSDIL7AHeBVybZCdwN/DqrvungZcBh4AfAm9YhpqPq3379rF7926uvPJKXvjCF3LgwAF27twJwMUXXzzh6rTarV+/ngceeGAs75X0mxUd3Lp167j//oXHUGi1SVXfKfHjanp6umZmZiZdRl/btm3jAx/4AC9+8Yt/2rZ//34uvfRSbr/99mO8UotJwkr4/puklbgPVmJNx9tq2QdJbquq6b7rVsI/YCWH+5o1a/jRj37ESSed9NO2Rx55hJNPPpnHHntsgpWtfqvlB2g5rcR9sBJrOt5Wyz44Vrh7+YFFbN26lQMHDvxM24EDB9i6deuEKpKkxRnui9i9ezc7d+5k//79PPLII+zfv5+dO3eye/fuSZcmSUe16AeqJ7rHPzS99NJLOXjwIFu3buWKK67ww1RJK5pz7pqY1TKvuZxW4j5YiTUtxTiPPBqn5Tz66Fhz7o7cJTXhgQceWJG/lMZ1iOqgnHOXpAY5ctdAxv2nryfeSMvDcNdA/NNXWh2clpGkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUEjhXuSP0pyR5Lbk+xLcnKSs5LckuTOJB9L8oRxFStJWpqhwz3JRuCNwHRVbQPWABcB7wbeW1VbgAeAneMoVJK0dKNOy6wFnpRkLfBk4D7gPODj3fprgAtH3IYkaUBDh3tVfRv4S3o3yL4P+D5wG/BgVT3adZsFNvZ7fZJdSWaSzMzNzQ1bhiSpj6GvLZNkHbAdOAt4EPhH4II+XfteiKSq9gJ7oXc992HrkCSA2nMKXP7USZdxhNpzykS2O8qFw34b+GZVzQEk+STwG8CpSdZ2o/dNwL2jlylJx5Z3PrRiL2pXlx//7Y4y5343cG6SJ6d3Sb6XAF8D9gOv6vrsAK4brURJ0qBGmXO/hd4Hp18Avtq9117g7cBbkhwCngZcOYY6JUkDGOl67lW1B9izoPku4JxR3leSNBrPUJWkBhnuktQgw12SGmS4S1KDvEG2NEEr8cSbSZ10o/Ey3KUJWokn3kzqpBuNl9MyktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoJHCPcmpST6e5OtJDib59STrk9yQ5M7u67pxFStJWppRR+5/BfxrVf0K8BzgIPAO4Maq2gLc2C1Lko6jocM9ySnAb9HdRq+qflxVDwLbgWu6btcAF45apCRpMKOM3H8JmAP+PskXk3woyc8DT6+q+wC6r6f1e3GSXUlmkszMzc2NUIYkaaFRwn0t8Hzgg1X1POB/GWAKpqr2VtV0VU1PTU2NUIYkaaFRwn0WmK2qW7rlj9ML++8mOR2g+3p4tBIlSYMaOtyr6jvAPUme2TW9BPgacD2wo2vbAVw3UoWSpIGNerOOS4GPJHkCcBfwBnq/MK5NshO4G3j1iNuQJA1opHCvqi8B031WvWSU95UkjcYzVCWpQYa7JDXIcJekBhnuktSgUY+W0Qmm9pwClz910mUcofacMukSpBXFcNdA8s6HqKpJl3GEJNTlk65CWjmclpGkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoJHDPcmaJF9M8qlu+awktyS5M8nHurs0SZKOo3GM3N8EHJy3/G7gvVW1BXgA2DmGbUiSBjBSuCfZBLwc+FC3HOA84ONdl2uAC0fZhiRpcKOO3N8HvA34Sbf8NODBqnq0W54FNvZ7YZJdSWaSzMzNzY1YhiRpvqHDPckrgMNVddv85j5d+14ftqr2VtV0VU1PTU0NW4YkqY9Rruf+AuCVSV4GnAycQm8kf2qStd3ofRNw7+hlSpIGMfTIvaouq6pNVbUZuAj4TFW9BtgPvKrrtgO4buQqJUkDWY7j3N8OvCXJIXpz8FcuwzYkSccwltvsVdVngc92z+8CzhnH+0qShuMZqpLUIMNdkhpkuEtSg8Yy5y5peL0Tu1eOdevWTboEjYHhLk1QVd9z/AaWZGzvpTY4LSNJDTLcJalBhrskNchwl6QGGe6S1CCPlpHUjJV2WClM7tBSw11SE8Z5KGgLh5Y6LSNJDTLcJalBhrskNWiUe6iekWR/koNJ7kjypq59fZIbktzZffVCFZJ0nI0ycn8UeGtVbQXOBS5JcjbwDuDGqtoC3NgtS5KOo1HuoXpfVX2he/4D4CCwEdgOXNN1uwa4cNQiJUmDGcuce5LNwPOAW4CnV9V90PsFAJx2lNfsSjKTZGZubm4cZUiSOiOHe5KnAJ8A3lxVDy31dVW1t6qmq2p6ampq1DIkSfOMFO5JTqIX7B+pqk92zd9Ncnq3/nTg8GglSpIGNcrRMgGuBA5W1Xvmrboe2NE93wFcN3x5kqRhjHL5gRcArwO+muRLXdufAu8Crk2yE7gbePVoJUqSBjV0uFfVAeBoV+l5ybDvK0kanWeoSlKDDHdJapCX/NXAvGa2tPIZ7hqI18yWVgenZSSpQY7cO+OcanA0KmnSmh+5r1+/niSLPsZpKdtbv379WLcpSfM1P3K//42PAadMuow+Hpt0AVolljr4WGo//7I8MTQf7nnnQyvymzkJdfmkq9BqsBK/f1ezE+WXZfPhDh66J+n/rdQwHrfmw32p/5F+oCqpJc2H+1IZyJJa0vzRMpJ0IjLcJalBhrskNchwl6QGLVu4Jzk/yTeSHEryjuXajiTpSMsS7knWAH8DXACcDVyc5Ozl2JYk6UjLNXI/BzhUVXdV1Y+BjwLbl2lbkqQFlus4943APfOWZ4Ffm98hyS5gF8CZZ565TGVoEgY5IWwpfT0HQRrcco3c+/3E/sxPaFXtrarpqpqemppapjI0CVU11oekwS1XuM8CZ8xb3gTcu0zbkiQtsFzh/nlgS5KzkjwBuAi4fpm2JUlaYFnm3Kvq0SR/CPwbsAa4qqruWI5tSZKOtGwXDquqTwOfXq73lyQdnWeoSlKDDHdJapDhLkkNMtwlqUFZCSeJJJkDvjXpOpZgA/C9SRfREPfn+Lgvx2u17M9frKq+Z4GuiHBfLZLMVNX0pOtohftzfNyX49XC/nRaRpIaZLhLUoMM98HsnXQBjXF/jo/7crxW/f50zl2SGuTIXZIaZLhLUoMM9yVIclWSw0lun3Qtq12SM5LsT3IwyR1J3jTpmlazJCcnuTXJl7v9+c5J17TaJVmT5ItJPjXpWkZhuC/N1cD5ky6iEY8Cb62qrcC5wCXePH0kDwPnVdVzgOcC5yc5d8I1rXZvAg5OuohRGe5LUFU3AfdPuo4WVNV9VfWF7vkP6P0QbZxsVatX9fxPt3hS9/AoiSEl2QS8HPjQpGsZleGuiUmyGXgecMtkK1ndummELwGHgRuqyv05vPcBbwN+MulCRmW4ayKSPAX4BPDmqnpo0vWsZlX1WFU9l969is9Jsm3SNa1GSV4BHK6q2yZdyzgY7jrukpxEL9g/UlWfnHQ9raiqB4HP4udDw3oB8Mok/w18FDgvyT9MtqThGe46rpIEuBI4WFXvmXQ9q12SqSSnds+fBPw28PXJVrU6VdVlVbWpqjYDFwGfqarXTrisoRnuS5BkH/A54JlJZpPsnHRNq9gLgNfRGxV9qXu8bNJFrWKnA/uTfAX4PL0591V9CJ/Gw8sPSFKDHLlLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSg/wMRdz+k+y/ZTQAAAABJRU5ErkJggg==\n",
989 | "text/plain": [
990 | ""
991 | ]
992 | },
993 | "metadata": {
994 | "needs_background": "light"
995 | },
996 | "output_type": "display_data"
997 | }
998 | ],
999 | "source": [
1000 | "plt.boxplot(box_plot_data)"
1001 | ]
1002 | },
1003 | {
1004 | "cell_type": "code",
1005 | "execution_count": 117,
1006 | "metadata": {},
1007 | "outputs": [],
1008 | "source": [
1009 | "plt.show()"
1010 | ]
1011 | },
1012 | {
1013 | "cell_type": "code",
1014 | "execution_count": 118,
1015 | "metadata": {},
1016 | "outputs": [
1017 | {
1018 | "data": {
1019 | "text/plain": [
1020 | "0.6795458900175544"
1021 | ]
1022 | },
1023 | "execution_count": 118,
1024 | "metadata": {},
1025 | "output_type": "execute_result"
1026 | }
1027 | ],
1028 | "source": [
1029 | "#p value\n",
1030 | "1-scipy.stats.f.cdf(0.6,4,5)"
1031 | ]
1032 | },
1033 | {
1034 | "cell_type": "code",
1035 | "execution_count": 119,
1036 | "metadata": {},
1037 | "outputs": [
1038 | {
1039 | "data": {
1040 | "text/plain": [
1041 | "11.39192807134976"
1042 | ]
1043 | },
1044 | "execution_count": 119,
1045 | "metadata": {},
1046 | "output_type": "execute_result"
1047 | }
1048 | ],
1049 | "source": [
1050 | "#f value\n",
1051 | "scipy.stats.f.ppf(1-0.01,4,5)"
1052 | ]
1053 | },
1054 | {
1055 | "cell_type": "code",
1056 | "execution_count": 120,
1057 | "metadata": {},
1058 | "outputs": [
1059 | {
1060 | "data": {
1061 | "text/plain": [
1062 | "F_onewayResult(statistic=1.2265003217482984, pvalue=0.32610743788671676)"
1063 | ]
1064 | },
1065 | "execution_count": 120,
1066 | "metadata": {},
1067 | "output_type": "execute_result"
1068 | }
1069 | ],
1070 | "source": [
1071 | "scipy.stats.f_oneway(fivep,tenp,fifteenp,twentyp)\n",
1072 | "#gives f and p value"
1073 | ]
1074 | },
1075 | {
1076 | "cell_type": "code",
1077 | "execution_count": 121,
1078 | "metadata": {},
1079 | "outputs": [
1080 | {
1081 | "data": {
1082 | "text/plain": [
1083 | "3.599599239012541e-06"
1084 | ]
1085 | },
1086 | "execution_count": 121,
1087 | "metadata": {},
1088 | "output_type": "execute_result"
1089 | }
1090 | ],
1091 | "source": [
1092 | "1-scipy.stats.f.cdf(19.6,3,20)"
1093 | ]
1094 | },
1095 | {
1096 | "cell_type": "code",
1097 | "execution_count": 123,
1098 | "metadata": {},
1099 | "outputs": [
1100 | {
1101 | "data": {
1102 | "text/plain": [
1103 | "4.938193382310539"
1104 | ]
1105 | },
1106 | "execution_count": 123,
1107 | "metadata": {},
1108 | "output_type": "execute_result"
1109 | }
1110 | ],
1111 | "source": [
1112 | "scipy.stats.f.ppf(1-0.01,dfn=3,dfd=20)"
1113 | ]
1114 | },
1115 | {
1116 | "cell_type": "code",
1117 | "execution_count": 124,
1118 | "metadata": {},
1119 | "outputs": [],
1120 | "source": [
1121 | "data=pd.read_excel('C:/Users/Garima Singh/Desktop/mooc data analysis/Tensile strength of paper.xlsx')"
1122 | ]
1123 | },
1124 | {
1125 | "cell_type": "code",
1126 | "execution_count": 129,
1127 | "metadata": {},
1128 | "outputs": [
1129 | {
1130 | "data": {
1131 | "text/html": [
1132 | "\n",
1133 | "\n",
1146 | "
\n",
1147 | " \n",
1148 | " \n",
1149 | " | \n",
1150 | " hardwood concentration 5% | \n",
1151 | " hardwood concentration 10% | \n",
1152 | " hardwood concentration 15% | \n",
1153 | " hardwood concentration 20% | \n",
1154 | "
\n",
1155 | " \n",
1156 | " \n",
1157 | " \n",
1158 | " | 0 | \n",
1159 | " 7 | \n",
1160 | " 12 | \n",
1161 | " 14 | \n",
1162 | " 19 | \n",
1163 | "
\n",
1164 | " \n",
1165 | " | 1 | \n",
1166 | " 8 | \n",
1167 | " 17 | \n",
1168 | " 18 | \n",
1169 | " 25 | \n",
1170 | "
\n",
1171 | " \n",
1172 | " | 2 | \n",
1173 | " 15 | \n",
1174 | " 13 | \n",
1175 | " 19 | \n",
1176 | " 22 | \n",
1177 | "
\n",
1178 | " \n",
1179 | " | 3 | \n",
1180 | " 11 | \n",
1181 | " 18 | \n",
1182 | " 17 | \n",
1183 | " 23 | \n",
1184 | "
\n",
1185 | " \n",
1186 | " | 4 | \n",
1187 | " 9 | \n",
1188 | " 19 | \n",
1189 | " 16 | \n",
1190 | " 18 | \n",
1191 | "
\n",
1192 | " \n",
1193 | " | 5 | \n",
1194 | " 10 | \n",
1195 | " 15 | \n",
1196 | " 18 | \n",
1197 | " 20 | \n",
1198 | "
\n",
1199 | " \n",
1200 | "
\n",
1201 | "
"
1202 | ],
1203 | "text/plain": [
1204 | " hardwood concentration 5% hardwood concentration 10% \\\n",
1205 | "0 7 12 \n",
1206 | "1 8 17 \n",
1207 | "2 15 13 \n",
1208 | "3 11 18 \n",
1209 | "4 9 19 \n",
1210 | "5 10 15 \n",
1211 | "\n",
1212 | " hardwood concentration 15% hardwood concentration 20% \n",
1213 | "0 14 19 \n",
1214 | "1 18 25 \n",
1215 | "2 19 22 \n",
1216 | "3 17 23 \n",
1217 | "4 16 18 \n",
1218 | "5 18 20 "
1219 | ]
1220 | },
1221 | "execution_count": 129,
1222 | "metadata": {},
1223 | "output_type": "execute_result"
1224 | }
1225 | ],
1226 | "source": [
1227 | "data"
1228 | ]
1229 | },
1230 | {
1231 | "cell_type": "code",
1232 | "execution_count": 126,
1233 | "metadata": {},
1234 | "outputs": [
1235 | {
1236 | "ename": "KeyError",
1237 | "evalue": "\"The following 'id_vars' are not present in the DataFrame: ['index']\"",
1238 | "output_type": "error",
1239 | "traceback": [
1240 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
1241 | "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
1242 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdatanew\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmelt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mid_vars\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'index'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue_vars\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Case Presentation'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'PPT'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mdatanew\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
1243 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\reshape\\melt.py\u001b[0m in \u001b[0;36mmelt\u001b[1;34m(frame, id_vars, value_vars, var_name, value_name, col_level)\u001b[0m\n\u001b[0;32m 50\u001b[0m \u001b[1;34m\"The following 'id_vars' are not present\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;34m\" in the DataFrame: {missing}\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 52\u001b[1;33m \u001b[1;34m\"\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmissing\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmissing\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 53\u001b[0m )\n\u001b[0;32m 54\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
1244 | "\u001b[1;31mKeyError\u001b[0m: \"The following 'id_vars' are not present in the DataFrame: ['index']\""
1245 | ]
1246 | }
1247 | ],
1248 | "source": [
1249 | "datanew=pd.melt(data, id_vars=['index'], value_vars=['Case Presentation','PPT'])\n",
1250 | "datanew"
1251 | ]
1252 | },
1253 | {
1254 | "cell_type": "code",
1255 | "execution_count": 138,
1256 | "metadata": {},
1257 | "outputs": [],
1258 | "source": [
1259 | "#fishers lsd test\n",
1260 | "t=scipy.stats.t.ppf(0.05,20)"
1261 | ]
1262 | },
1263 | {
1264 | "cell_type": "code",
1265 | "execution_count": 139,
1266 | "metadata": {},
1267 | "outputs": [
1268 | {
1269 | "data": {
1270 | "text/plain": [
1271 | "-1.7247182429207863"
1272 | ]
1273 | },
1274 | "execution_count": 139,
1275 | "metadata": {},
1276 | "output_type": "execute_result"
1277 | }
1278 | ],
1279 | "source": [
1280 | "t"
1281 | ]
1282 | },
1283 | {
1284 | "cell_type": "code",
1285 | "execution_count": 140,
1286 | "metadata": {},
1287 | "outputs": [],
1288 | "source": [
1289 | "t=t*-1"
1290 | ]
1291 | },
1292 | {
1293 | "cell_type": "code",
1294 | "execution_count": 141,
1295 | "metadata": {},
1296 | "outputs": [
1297 | {
1298 | "data": {
1299 | "text/plain": [
1300 | "1.7247182429207863"
1301 | ]
1302 | },
1303 | "execution_count": 141,
1304 | "metadata": {},
1305 | "output_type": "execute_result"
1306 | }
1307 | ],
1308 | "source": [
1309 | "t\n"
1310 | ]
1311 | },
1312 | {
1313 | "cell_type": "code",
1314 | "execution_count": 142,
1315 | "metadata": {},
1316 | "outputs": [],
1317 | "source": [
1318 | "n=6"
1319 | ]
1320 | },
1321 | {
1322 | "cell_type": "code",
1323 | "execution_count": 143,
1324 | "metadata": {},
1325 | "outputs": [],
1326 | "source": [
1327 | "MSE=6.50833"
1328 | ]
1329 | },
1330 | {
1331 | "cell_type": "code",
1332 | "execution_count": 144,
1333 | "metadata": {},
1334 | "outputs": [
1335 | {
1336 | "data": {
1337 | "text/plain": [
1338 | "2.540342724459959"
1339 | ]
1340 | },
1341 | "execution_count": 144,
1342 | "metadata": {},
1343 | "output_type": "execute_result"
1344 | }
1345 | ],
1346 | "source": [
1347 | "lsd=t*math.sqrt(2*MSE/n)\n",
1348 | "lsd"
1349 | ]
1350 | },
1351 | {
1352 | "cell_type": "code",
1353 | "execution_count": 146,
1354 | "metadata": {},
1355 | "outputs": [
1356 | {
1357 | "data": {
1358 | "text/html": [
1359 | "\n",
1360 | "Multiple Comparison of Means - Tukey HSD, FWER=0.05\n",
1361 | "\n",
1362 | " | group1 | group2 | meandiff | p-adj | lower | upper | reject | \n",
1363 | "
\n",
1364 | "\n",
1365 | " | Case Presentation | PPT | -2.0 | 0.1963 | -5.5844 | 1.5844 | False | \n",
1366 | "
\n",
1367 | "
"
1368 | ],
1369 | "text/plain": [
1370 | ""
1371 | ]
1372 | },
1373 | "execution_count": 146,
1374 | "metadata": {},
1375 | "output_type": "execute_result"
1376 | }
1377 | ],
1378 | "source": [
1379 | "#tukey krammer test\n",
1380 | "from statsmodels.stats.multicomp import pairwise_tukeyhsd\n",
1381 | "from statsmodels.stats.multicomp import MultiComparison\n",
1382 | "mc=MultiComparison(datanew['value'],datanew['treatments'])\n",
1383 | "mcresult=mc.tukeyhsd(0.05)\n",
1384 | "mcresult.summary()\n"
1385 | ]
1386 | },
1387 | {
1388 | "cell_type": "code",
1389 | "execution_count": null,
1390 | "metadata": {},
1391 | "outputs": [],
1392 | "source": []
1393 | }
1394 | ],
1395 | "metadata": {
1396 | "kernelspec": {
1397 | "display_name": "Python 3",
1398 | "language": "python",
1399 | "name": "python3"
1400 | },
1401 | "language_info": {
1402 | "codemirror_mode": {
1403 | "name": "ipython",
1404 | "version": 3
1405 | },
1406 | "file_extension": ".py",
1407 | "mimetype": "text/x-python",
1408 | "name": "python",
1409 | "nbconvert_exporter": "python",
1410 | "pygments_lexer": "ipython3",
1411 | "version": "3.7.4"
1412 | }
1413 | },
1414 | "nbformat": 4,
1415 | "nbformat_minor": 4
1416 | }
1417 |
--------------------------------------------------------------------------------