├── Data
└── empty
├── PPT
├── empty
├── PS.pptx
├── PS_intro.pptx
└── Mathematical Derivations for Math Lovers (Optional).pptx
└── codes
├── empty
├── Spam Detection Project using Naive Bayes Algorithm.ipynb
└── Distributions in Python with Sleep analysis Project.ipynb
/Data/empty:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/PPT/empty:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/codes/empty:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/PPT/PS.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AISPUBLISHING/Course-Mastering-Stat-proba/HEAD/PPT/PS.pptx
--------------------------------------------------------------------------------
/PPT/PS_intro.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AISPUBLISHING/Course-Mastering-Stat-proba/HEAD/PPT/PS_intro.pptx
--------------------------------------------------------------------------------
/PPT/Mathematical Derivations for Math Lovers (Optional).pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AISPUBLISHING/Course-Mastering-Stat-proba/HEAD/PPT/Mathematical Derivations for Math Lovers (Optional).pptx
--------------------------------------------------------------------------------
/codes/Spam Detection Project using Naive Bayes Algorithm.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "db956ebd",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "spam = [\n",
11 | " \"To use your credit, click the new WAP link in the next years txt message or click here\", \n",
12 | " \"Thanks for your subscription to New Ringtone UK your new mobile will be charged £5/month Please confirm annoncement by replying\", \n",
13 | " \"As a valued customer, I am pleased to advise you that following recent delivery waiting review of your Mob No. you are awarded with. Call us to review.\", \n",
14 | " \"Please call our new customer service representative on\", \n",
15 | " \"We are trying to contact you. Last weekends customer draw shows that you won a £1000 prize GUARANTEED. Calling years\", \n",
16 | "]"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "id": "e4785122",
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "spam_test = [\"Customer service annoncement. You have a New Years delivery waiting for you. click\"]"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 3,
32 | "id": "ab52c5e8",
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "non = [\n",
37 | " \"I don't think he goes to usf, he lives around here though\", \n",
38 | " \"New car and house for my parents. i have only new job in hand\", \n",
39 | " \"Great escape. I fancy the bridge but needs her lager. See you tomorrow\", \n",
40 | " \"Tired. I haven't slept well the past few nights.\",\n",
41 | " \"Too late. I said i have the website. I didn't i have or dont have the slippers\", \n",
42 | " \"I might come by tonight then if my class lets out early\", \n",
43 | " \"Jos ask if u wana meet up?\", \n",
44 | " \"That would be great. We'll be at the Guild. We can try meeting with the customer on Bristol road or somewhere\"\n",
45 | " ]"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 4,
51 | "id": "6f5f8809",
52 | "metadata": {},
53 | "outputs": [],
54 | "source": [
55 | "spam_test_2 = [\"That would be great. We'll be at the Guild. We can try meeting with the customer on Bristol road or somewhere\"]"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": null,
61 | "id": "4f1147b4",
62 | "metadata": {},
63 | "outputs": [],
64 | "source": [
65 | "# !pip install gensim"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": 5,
71 | "id": "df3f9c8b",
72 | "metadata": {},
73 | "outputs": [
74 | {
75 | "name": "stderr",
76 | "output_type": "stream",
77 | "text": [
78 | "C:\\Users\\lenovo\\anaconda3\\envs\\saad\\lib\\site-packages\\gensim\\similarities\\__init__.py:15: UserWarning: The gensim.similarities.levenshtein submodule is disabled, because the optional Levenshtein package is unavailable. Install Levenhstein (e.g. `pip install python-Levenshtein`) to suppress this warning.\n",
79 | " warnings.warn(msg)\n"
80 | ]
81 | }
82 | ],
83 | "source": [
84 | "from gensim.parsing.preprocessing import remove_stopwords\n",
85 | "from gensim.parsing.porter import PorterStemmer\n",
86 | "from gensim.utils import tokenize"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 6,
92 | "id": "445e69a1",
93 | "metadata": {},
94 | "outputs": [
95 | {
96 | "name": "stdout",
97 | "output_type": "stream",
98 | "text": [
99 | "Thanks for your subscription to New Ringtone UK your new mobile will be charged £5/month Please confirm annoncement by replying\n"
100 | ]
101 | }
102 | ],
103 | "source": [
104 | "test_sentence = spam[1]\n",
105 | "print(test_sentence)"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": 8,
111 | "id": "b050f58c",
112 | "metadata": {},
113 | "outputs": [
114 | {
115 | "name": "stdout",
116 | "output_type": "stream",
117 | "text": [
118 | "Thanks subscription New Ringtone UK new mobile charged £5/month Please confirm annoncement replying\n"
119 | ]
120 | }
121 | ],
122 | "source": [
123 | "stops = remove_stopwords(test_sentence)\n",
124 | "print(stops)"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 9,
130 | "id": "0e37ec0e",
131 | "metadata": {},
132 | "outputs": [
133 | {
134 | "name": "stdout",
135 | "output_type": "stream",
136 | "text": [
137 | "thanks subscription new ringtone uk new mobile charged £5/month please confirm annoncement repli\n"
138 | ]
139 | }
140 | ],
141 | "source": [
142 | "p=PorterStemmer()\n",
143 | "stemmed = p.stem(stops)\n",
144 | "print(stemmed)"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": 10,
150 | "id": "b04750b4",
151 | "metadata": {},
152 | "outputs": [
153 | {
154 | "name": "stdout",
155 | "output_type": "stream",
156 | "text": [
157 | "['thanks', 'subscription', 'new', 'ringtone', 'uk', 'new', 'mobile', 'charged', 'month', 'please', 'confirm', 'annoncement', 'repli']\n"
158 | ]
159 | }
160 | ],
161 | "source": [
162 | "tokens = tokenize(stemmed)\n",
163 | "print(list(tokens))"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 11,
169 | "id": "2072364f",
170 | "metadata": {},
171 | "outputs": [],
172 | "source": [
173 | "def tokenize_sentence(sentence): \n",
174 | " p = PorterStemmer()\n",
175 | " removed_stops = remove_stopwords(sentence)\n",
176 | " stemmed = p.stem(removed_stops)\n",
177 | " tokens = tokenize(stemmed)\n",
178 | " return list(tokens)"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": 12,
184 | "id": "87129bf1",
185 | "metadata": {},
186 | "outputs": [],
187 | "source": [
188 | "dictionary = set()\n",
189 | "spams_tokenized = []\n",
190 | "nons_tokenized = []\n",
191 | "\n",
192 | "for sentence in spam:\n",
193 | " sentence_tokens = tokenize_sentence(sentence)\n",
194 | " spams_tokenized.append(sentence_tokens)\n",
195 | " dictionary = dictionary.union(sentence_tokens)\n",
196 | " \n",
197 | "for sentence in non: \n",
198 | " sentence_tokens = tokenize_sentence(sentence)\n",
199 | " nons_tokenized.append(sentence_tokens)\n",
200 | " dictionary = dictionary.union(sentence_tokens)"
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": 13,
206 | "id": "e6bd192b",
207 | "metadata": {},
208 | "outputs": [
209 | {
210 | "name": "stdout",
211 | "output_type": "stream",
212 | "text": [
213 | "[['to', 'use', 'credit', 'click', 'new', 'wap', 'link', 'years', 'txt', 'message', 'click'], ['thanks', 'subscription', 'new', 'ringtone', 'uk', 'new', 'mobile', 'charged', 'month', 'please', 'confirm', 'annoncement', 'repli'], ['as', 'valued', 'customer', 'i', 'pleased', 'advise', 'following', 'recent', 'delivery', 'waiting', 'review', 'mob', 'no', 'awarded', 'with', 'call', 'review'], ['please', 'new', 'customer', 'service', 'repres'], ['we', 'trying', 'contact', 'you', 'last', 'weekends', 'customer', 'draw', 'shows', 'won', 'prize', 'guaranteed', 'calling', 'year']]\n"
214 | ]
215 | }
216 | ],
217 | "source": [
218 | "print(spams_tokenized)"
219 | ]
220 | },
221 | {
222 | "cell_type": "code",
223 | "execution_count": 15,
224 | "id": "43d8384a",
225 | "metadata": {},
226 | "outputs": [
227 | {
228 | "name": "stdout",
229 | "output_type": "stream",
230 | "text": [
231 | "[['i', 'don', 't', 'think', 'goes', 'usf', 'l'], ['new', 'car', 'house', 'parents', 'new', 'job', 'hand'], ['great', 'escape', 'i', 'fancy', 'bridge', 'needs', 'lager', 'see', 'tomorrow'], ['tired', 'i', 'haven', 't', 'slept', 'past', 'nights'], ['too', 'late', 'i', 'said', 'website', 'i', 'didn', 't', 'dont', 'slipp'], ['i', 'come', 'tonight', 'class', 'lets', 'earli'], ['jos', 'ask', 'u', 'wana', 'meet', 'up'], ['that', 'great', 'we', 'll', 'guild', 'we', 'try', 'meeting', 'customer', 'bristol', 'road']]\n"
232 | ]
233 | }
234 | ],
235 | "source": [
236 | "print(nons_tokenized)"
237 | ]
238 | },
239 | {
240 | "cell_type": "code",
241 | "execution_count": 16,
242 | "id": "51904cff",
243 | "metadata": {},
244 | "outputs": [
245 | {
246 | "name": "stdout",
247 | "output_type": "stream",
248 | "text": [
249 | "{'escape', 'tired', 'great', 'll', 'wap', 'txt', 'following', 'charged', 'parents', 'link', 'slept', 'to', 'house', 'nights', 'ringtone', 'no', 'with', 'you', 'use', 'credit', 'ask', 'new', 'thanks', 't', 'hand', 'valued', 'awarded', 'website', 'jos', 'waiting', 'come', 'needs', 'i', 'usf', 'lager', 'see', 'mobile', 'dont', 'pleased', 'contact', 'guaranteed', 'lets', 'draw', 'too', 'up', 'uk', 'meet', 'customer', 'late', 'earli', 'weekends', 'annoncement', 'as', 'we', 'tomorrow', 'slipp', 'year', 'bridge', 'won', 'repres', 'said', 'car', 'last', 'tonight', 'mob', 'calling', 'past', 'u', 'prize', 'fancy', 'recent', 'think', 'try', 'trying', 'delivery', 'that', 'meeting', 'haven', 'month', 'years', 'advise', 'repli', 'review', 'service', 'call', 'don', 'shows', 'bristol', 'goes', 'confirm', 'click', 'please', 'road', 'message', 'job', 'wana', 'l', 'class', 'guild', 'subscription', 'didn'}\n"
250 | ]
251 | }
252 | ],
253 | "source": [
254 | "print(dictionary)"
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": 17,
260 | "id": "cb80dab2",
261 | "metadata": {},
262 | "outputs": [
263 | {
264 | "name": "stdout",
265 | "output_type": "stream",
266 | "text": [
267 | "101\n"
268 | ]
269 | }
270 | ],
271 | "source": [
272 | "total_word_counts = len(dictionary)\n",
273 | "total_spam_messages = len(spams_tokenized)\n",
274 | "total_all_messages = len(spams_tokenized) + len(nons_tokenized)\n",
275 | "print(total_word_counts)"
276 | ]
277 | },
278 | {
279 | "cell_type": "code",
280 | "execution_count": 18,
281 | "id": "aa417e3f",
282 | "metadata": {},
283 | "outputs": [
284 | {
285 | "name": "stdout",
286 | "output_type": "stream",
287 | "text": [
288 | "0.38461538461538464\n"
289 | ]
290 | }
291 | ],
292 | "source": [
293 | "p_spam = total_spam_messages/total_all_messages\n",
294 | "print(p_spam)"
295 | ]
296 | },
297 | {
298 | "cell_type": "code",
299 | "execution_count": 19,
300 | "id": "3b89b3fc",
301 | "metadata": {},
302 | "outputs": [],
303 | "source": [
304 | "def count_word_in_messages(word, messages): \n",
305 | " total_count = 0\n",
306 | " for msg in messages: \n",
307 | " if word in msg: # notice this ensured uniqueness automatically \n",
308 | " total_count += 1 \n",
309 | " \n",
310 | " return total_count "
311 | ]
312 | },
313 | {
314 | "cell_type": "code",
315 | "execution_count": 20,
316 | "id": "1e2bc0e1",
317 | "metadata": {},
318 | "outputs": [
319 | {
320 | "name": "stdout",
321 | "output_type": "stream",
322 | "text": [
323 | "['that', 'great', 'we', 'll', 'guild', 'we', 'try', 'meeting', 'customer', 'bristol', 'road']\n",
324 | "----------------\n",
325 | "Runnig for word: that\n",
326 | "P( w | spam) = 0.0\n",
327 | "P( w ) = 0.07692307692307693\n",
328 | "P( spam ) = 0.38461538461538464\n",
329 | "P( spam | w ) = 0.0\n",
330 | "\n",
331 | "----------------\n",
332 | "Runnig for word: great\n",
333 | "P( w | spam) = 0.0\n",
334 | "P( w ) = 0.15384615384615385\n",
335 | "P( spam ) = 0.38461538461538464\n",
336 | "P( spam | w ) = 0.0\n",
337 | "\n",
338 | "----------------\n",
339 | "Runnig for word: we\n",
340 | "P( w | spam) = 0.2\n",
341 | "P( w ) = 0.15384615384615385\n",
342 | "P( spam ) = 0.38461538461538464\n",
343 | "P( spam | w ) = 0.5\n",
344 | "\n",
345 | "----------------\n",
346 | "Runnig for word: ll\n",
347 | "P( w | spam) = 0.0\n",
348 | "P( w ) = 0.07692307692307693\n",
349 | "P( spam ) = 0.38461538461538464\n",
350 | "P( spam | w ) = 0.0\n",
351 | "\n",
352 | "----------------\n",
353 | "Runnig for word: guild\n",
354 | "P( w | spam) = 0.0\n",
355 | "P( w ) = 0.07692307692307693\n",
356 | "P( spam ) = 0.38461538461538464\n",
357 | "P( spam | w ) = 0.0\n",
358 | "\n",
359 | "----------------\n",
360 | "Runnig for word: we\n",
361 | "P( w | spam) = 0.2\n",
362 | "P( w ) = 0.15384615384615385\n",
363 | "P( spam ) = 0.38461538461538464\n",
364 | "P( spam | w ) = 0.5\n",
365 | "\n",
366 | "----------------\n",
367 | "Runnig for word: try\n",
368 | "P( w | spam) = 0.0\n",
369 | "P( w ) = 0.07692307692307693\n",
370 | "P( spam ) = 0.38461538461538464\n",
371 | "P( spam | w ) = 0.0\n",
372 | "\n",
373 | "----------------\n",
374 | "Runnig for word: meeting\n",
375 | "P( w | spam) = 0.0\n",
376 | "P( w ) = 0.07692307692307693\n",
377 | "P( spam ) = 0.38461538461538464\n",
378 | "P( spam | w ) = 0.0\n",
379 | "\n",
380 | "----------------\n",
381 | "Runnig for word: customer\n",
382 | "P( w | spam) = 0.6\n",
383 | "P( w ) = 0.3076923076923077\n",
384 | "P( spam ) = 0.38461538461538464\n",
385 | "P( spam | w ) = 0.75\n",
386 | "\n",
387 | "----------------\n",
388 | "Runnig for word: bristol\n",
389 | "P( w | spam) = 0.0\n",
390 | "P( w ) = 0.07692307692307693\n",
391 | "P( spam ) = 0.38461538461538464\n",
392 | "P( spam | w ) = 0.0\n",
393 | "\n",
394 | "----------------\n",
395 | "Runnig for word: road\n",
396 | "P( w | spam) = 0.0\n",
397 | "P( w ) = 0.07692307692307693\n",
398 | "P( spam ) = 0.38461538461538464\n",
399 | "P( spam | w ) = 0.0\n",
400 | "\n",
401 | "P( spam | all_words ) = 0.0\n"
402 | ]
403 | }
404 | ],
405 | "source": [
406 | "final_prob = 1 # can't start from 0 \n",
407 | "\n",
408 | "\n",
409 | "for test_sentence in spam_test_2: \n",
410 | " test_sentence = tokenize_sentence(test_sentence)\n",
411 | " print(test_sentence)\n",
412 | " \n",
413 | " # let's run this for each word separately \n",
414 | " for word in test_sentence: \n",
415 | " print(\"----------------\")\n",
416 | " print(\"Runnig for word:\", word)\n",
417 | " \n",
418 | " # Find P( w | spam)\n",
419 | " spam_count = count_word_in_messages(word, spams_tokenized)\n",
420 | " p_w_spam = spam_count / total_spam_messages \n",
421 | " print(\"P( w | spam) = \", p_w_spam)\n",
422 | " \n",
423 | " # Find P( w )\n",
424 | " w_count = count_word_in_messages(word, spams_tokenized)\n",
425 | " w_count += count_word_in_messages(word, nons_tokenized)\n",
426 | " p_w = w_count / total_all_messages\n",
427 | " print(\"P( w ) = \", p_w)\n",
428 | " \n",
429 | " \n",
430 | " # Find P( spam | w )\n",
431 | " p_spam_w = (p_w_spam * p_spam) / p_w\n",
432 | " print(\"P( spam ) = \", p_spam)\n",
433 | " print(\"P( spam | w ) = \", p_spam_w)\n",
434 | " print(\"\")\n",
435 | " final_prob *= p_spam_w\n",
436 | " \n",
437 | " \n",
438 | " print(\"P( spam | all_words ) = \", final_prob)"
439 | ]
440 | },
441 | {
442 | "cell_type": "code",
443 | "execution_count": null,
444 | "id": "13bc303f",
445 | "metadata": {},
446 | "outputs": [],
447 | "source": []
448 | },
449 | {
450 | "cell_type": "code",
451 | "execution_count": null,
452 | "id": "974e434d",
453 | "metadata": {},
454 | "outputs": [],
455 | "source": []
456 | },
457 | {
458 | "cell_type": "code",
459 | "execution_count": null,
460 | "id": "276dce54",
461 | "metadata": {},
462 | "outputs": [],
463 | "source": []
464 | },
465 | {
466 | "cell_type": "code",
467 | "execution_count": null,
468 | "id": "8ebe786e",
469 | "metadata": {},
470 | "outputs": [],
471 | "source": []
472 | },
473 | {
474 | "cell_type": "code",
475 | "execution_count": null,
476 | "id": "4dc394df",
477 | "metadata": {},
478 | "outputs": [],
479 | "source": []
480 | },
481 | {
482 | "cell_type": "code",
483 | "execution_count": null,
484 | "id": "ca1c7c6c",
485 | "metadata": {},
486 | "outputs": [],
487 | "source": []
488 | },
489 | {
490 | "cell_type": "code",
491 | "execution_count": null,
492 | "id": "0dd15659",
493 | "metadata": {},
494 | "outputs": [],
495 | "source": []
496 | }
497 | ],
498 | "metadata": {
499 | "kernelspec": {
500 | "display_name": "Python 3 (ipykernel)",
501 | "language": "python",
502 | "name": "python3"
503 | },
504 | "language_info": {
505 | "codemirror_mode": {
506 | "name": "ipython",
507 | "version": 3
508 | },
509 | "file_extension": ".py",
510 | "mimetype": "text/x-python",
511 | "name": "python",
512 | "nbconvert_exporter": "python",
513 | "pygments_lexer": "ipython3",
514 | "version": "3.8.11"
515 | }
516 | },
517 | "nbformat": 4,
518 | "nbformat_minor": 5
519 | }
520 |
--------------------------------------------------------------------------------
/codes/Distributions in Python with Sleep analysis Project.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "1587ffcb",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import matplotlib.pyplot as plt\n",
11 | "%matplotlib inline\n",
12 | "\n",
13 | "import seaborn as sns\n",
14 | "sns.set(color_codes = True)"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 2,
20 | "id": "4d2f974f",
21 | "metadata": {},
22 | "outputs": [],
23 | "source": [
24 | "from scipy.stats import bernoulli"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 4,
30 | "id": "a9a0893f",
31 | "metadata": {},
32 | "outputs": [
33 | {
34 | "data": {
35 | "text/plain": [
36 | "[Text(0.5, 0, 'Bernoulli Distribution'), Text(0, 0.5, 'Frequency')]"
37 | ]
38 | },
39 | "execution_count": 4,
40 | "metadata": {},
41 | "output_type": "execute_result"
42 | },
43 | {
44 | "data": {
45 | "image/png": "\n",
46 | "text/plain": [
47 | ""
48 | ]
49 | },
50 | "metadata": {},
51 | "output_type": "display_data"
52 | }
53 | ],
54 | "source": [
55 | "data_bern = bernoulli.rvs(size=10000, p=0.6)\n",
56 | "ax = sns.distplot(data_bern, bins=100, kde=True)\n",
57 | "ax.set(xlabel=\"Bernoulli Distribution\", ylabel='Frequency')"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 5,
63 | "id": "75d18e0b",
64 | "metadata": {},
65 | "outputs": [],
66 | "source": [
67 | "from scipy.stats import binom"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": 6,
73 | "id": "ef5e8ae5",
74 | "metadata": {},
75 | "outputs": [
76 | {
77 | "name": "stderr",
78 | "output_type": "stream",
79 | "text": [
80 | "C:\\Users\\lenovo\\anaconda3\\envs\\saad\\lib\\site-packages\\seaborn\\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
81 | " warnings.warn(msg, FutureWarning)\n"
82 | ]
83 | },
84 | {
85 | "data": {
86 | "image/png": "\n",
87 | "text/plain": [
88 | ""
89 | ]
90 | },
91 | "metadata": {},
92 | "output_type": "display_data"
93 | }
94 | ],
95 | "source": [
96 | "data_binom = binom.rvs(n=10, p=0.6, size=1000)\n",
97 | "ax = sns.distplot(data_binom,\n",
98 | " bins=100,\n",
99 | " kde=False,\n",
100 | " )\n",
101 | "_ = ax.set(xlabel='Binomial Distribution ', ylabel='Frequency')"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 8,
107 | "id": "dda8b83f",
108 | "metadata": {},
109 | "outputs": [],
110 | "source": [
111 | "from scipy.stats import uniform"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 10,
117 | "id": "a72fb1db",
118 | "metadata": {},
119 | "outputs": [
120 | {
121 | "data": {
122 | "text/plain": [
123 | "[Text(0.5, 0, 'Uniform Distribution'), Text(0, 0.5, 'Frequency')]"
124 | ]
125 | },
126 | "execution_count": 10,
127 | "metadata": {},
128 | "output_type": "execute_result"
129 | },
130 | {
131 | "data": {
132 | "image/png": "\n",
133 | "text/plain": [
134 | ""
135 | ]
136 | },
137 | "metadata": {},
138 | "output_type": "display_data"
139 | }
140 | ],
141 | "source": [
142 | "n = 10000\n",
143 | "n *=100\n",
144 | "start =10\n",
145 | "width=20\n",
146 | "\n",
147 | "data_uniform = uniform.rvs(size=n,loc=start,scale=width)\n",
148 | "ax=sns.distplot(data_uniform,bins=100,kde=False)\n",
149 | "ax.set(xlabel='Uniform Distribution', ylabel='Frequency')"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": 11,
155 | "id": "947b6ca3",
156 | "metadata": {},
157 | "outputs": [],
158 | "source": [
159 | "import numpy as np\n",
160 | "import pandas as pd\n",
161 | "from IPython.core.pylabtools import figsize\n",
162 | "\n",
163 | "figsize(16,6)"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 12,
169 | "id": "c6c1e192",
170 | "metadata": {},
171 | "outputs": [],
172 | "source": [
173 | "sleep_data=pd.read_csv('data/sleep_data.csv')"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": 13,
179 | "id": "3f9c3010",
180 | "metadata": {},
181 | "outputs": [
182 | {
183 | "data": {
184 | "text/html": [
185 | "\n",
186 | "\n",
199 | "
\n",
200 | " \n",
201 | " \n",
202 | " | \n",
203 | " Unnamed: 0 | \n",
204 | " indicator | \n",
205 | " time_offset | \n",
206 | "
\n",
207 | " \n",
208 | " \n",
209 | " \n",
210 | " | 0 | \n",
211 | " 2017-12-01 21:00:00 | \n",
212 | " 0 | \n",
213 | " -60 | \n",
214 | "
\n",
215 | " \n",
216 | " | 1 | \n",
217 | " 2017-12-01 21:01:00 | \n",
218 | " 0 | \n",
219 | " -59 | \n",
220 | "
\n",
221 | " \n",
222 | " | 2 | \n",
223 | " 2017-12-01 21:02:00 | \n",
224 | " 0 | \n",
225 | " -58 | \n",
226 | "
\n",
227 | " \n",
228 | " | 3 | \n",
229 | " 2017-12-01 21:03:00 | \n",
230 | " 0 | \n",
231 | " -57 | \n",
232 | "
\n",
233 | " \n",
234 | " | 4 | \n",
235 | " 2017-12-01 21:04:00 | \n",
236 | " 0 | \n",
237 | " -56 | \n",
238 | "
\n",
239 | " \n",
240 | " | ... | \n",
241 | " ... | \n",
242 | " ... | \n",
243 | " ... | \n",
244 | "
\n",
245 | " \n",
246 | " | 11335 | \n",
247 | " 2018-02-01 23:55:00 | \n",
248 | " 1 | \n",
249 | " 115 | \n",
250 | "
\n",
251 | " \n",
252 | " | 11336 | \n",
253 | " 2018-02-01 23:56:00 | \n",
254 | " 1 | \n",
255 | " 116 | \n",
256 | "
\n",
257 | " \n",
258 | " | 11337 | \n",
259 | " 2018-02-01 23:57:00 | \n",
260 | " 1 | \n",
261 | " 117 | \n",
262 | "
\n",
263 | " \n",
264 | " | 11338 | \n",
265 | " 2018-02-01 23:58:00 | \n",
266 | " 1 | \n",
267 | " 118 | \n",
268 | "
\n",
269 | " \n",
270 | " | 11339 | \n",
271 | " 2018-02-01 23:59:00 | \n",
272 | " 1 | \n",
273 | " 119 | \n",
274 | "
\n",
275 | " \n",
276 | "
\n",
277 | "
11340 rows × 3 columns
\n",
278 | "
"
279 | ],
280 | "text/plain": [
281 | " Unnamed: 0 indicator time_offset\n",
282 | "0 2017-12-01 21:00:00 0 -60\n",
283 | "1 2017-12-01 21:01:00 0 -59\n",
284 | "2 2017-12-01 21:02:00 0 -58\n",
285 | "3 2017-12-01 21:03:00 0 -57\n",
286 | "4 2017-12-01 21:04:00 0 -56\n",
287 | "... ... ... ...\n",
288 | "11335 2018-02-01 23:55:00 1 115\n",
289 | "11336 2018-02-01 23:56:00 1 116\n",
290 | "11337 2018-02-01 23:57:00 1 117\n",
291 | "11338 2018-02-01 23:58:00 1 118\n",
292 | "11339 2018-02-01 23:59:00 1 119\n",
293 | "\n",
294 | "[11340 rows x 3 columns]"
295 | ]
296 | },
297 | "execution_count": 13,
298 | "metadata": {},
299 | "output_type": "execute_result"
300 | }
301 | ],
302 | "source": [
303 | "sleep_data"
304 | ]
305 | },
306 | {
307 | "cell_type": "code",
308 | "execution_count": 15,
309 | "id": "f2c6dfc1",
310 | "metadata": {},
311 | "outputs": [
312 | {
313 | "data": {
314 | "image/png": "\n",
315 | "text/plain": [
316 | ""
317 | ]
318 | },
319 | "metadata": {},
320 | "output_type": "display_data"
321 | }
322 | ],
323 | "source": [
324 | "sleep_labels=['9:00','9:30','10:00','10:30','11:00','11:30','12:00']\n",
325 | "plt.scatter(sleep_data['time_offset'],sleep_data['indicator'],s=60,alpha=0.01,facecolor='b',edgecolor='b')\n",
326 | "\n",
327 | "plt.yticks([0,1],['Awake','Asleep']);plt.xlabel('PM time')\n",
328 | "plt.title('Falling sleep data',size=18)\n",
329 | "plt.xticks([-60,-30,0,30,60,90,120],sleep_labels);"
330 | ]
331 | },
332 | {
333 | "cell_type": "code",
334 | "execution_count": 16,
335 | "id": "e2950e07",
336 | "metadata": {},
337 | "outputs": [],
338 | "source": [
339 | "sleep_data.sort_values('time_offset', inplace=True)\n",
340 | "time = np.array(sleep_data.loc[:,'time_offset'])\n",
341 | "sleep_obs = np.array(sleep_data.loc[:,'indicator'])"
342 | ]
343 | },
344 | {
345 | "cell_type": "code",
346 | "execution_count": 17,
347 | "id": "90d38ab7",
348 | "metadata": {},
349 | "outputs": [
350 | {
351 | "data": {
352 | "text/plain": [
353 | "array([0, 0, 0, ..., 1, 1, 1], dtype=int64)"
354 | ]
355 | },
356 | "execution_count": 17,
357 | "metadata": {},
358 | "output_type": "execute_result"
359 | }
360 | ],
361 | "source": [
362 | "sleep_obs"
363 | ]
364 | },
365 | {
366 | "cell_type": "code",
367 | "execution_count": 18,
368 | "id": "53e28f37",
369 | "metadata": {},
370 | "outputs": [
371 | {
372 | "data": {
373 | "text/plain": [
374 | "array([-60, -60, -60, ..., 119, 119, 119], dtype=int64)"
375 | ]
376 | },
377 | "execution_count": 18,
378 | "metadata": {},
379 | "output_type": "execute_result"
380 | }
381 | ],
382 | "source": [
383 | "time"
384 | ]
385 | },
386 | {
387 | "cell_type": "code",
388 | "execution_count": 19,
389 | "id": "44b58c78",
390 | "metadata": {},
391 | "outputs": [
392 | {
393 | "data": {
394 | "text/html": [
395 | "\n",
396 | "\n",
409 | "
\n",
410 | " \n",
411 | " \n",
412 | " | \n",
413 | " Unnamed: 0 | \n",
414 | " indicator | \n",
415 | " time_offset | \n",
416 | "
\n",
417 | " \n",
418 | " \n",
419 | " \n",
420 | " | 0 | \n",
421 | " 2017-12-01 21:00:00 | \n",
422 | " 0 | \n",
423 | " -60 | \n",
424 | "
\n",
425 | " \n",
426 | " | 4320 | \n",
427 | " 2017-12-25 21:00:00 | \n",
428 | " 0 | \n",
429 | " -60 | \n",
430 | "
\n",
431 | " \n",
432 | " | 1260 | \n",
433 | " 2017-12-08 21:00:00 | \n",
434 | " 0 | \n",
435 | " -60 | \n",
436 | "
\n",
437 | " \n",
438 | " | 10080 | \n",
439 | " 2018-01-26 21:00:00 | \n",
440 | " 0 | \n",
441 | " -60 | \n",
442 | "
\n",
443 | " \n",
444 | " | 6840 | \n",
445 | " 2018-01-08 21:00:00 | \n",
446 | " 0 | \n",
447 | " -60 | \n",
448 | "
\n",
449 | " \n",
450 | " | ... | \n",
451 | " ... | \n",
452 | " ... | \n",
453 | " ... | \n",
454 | "
\n",
455 | " \n",
456 | " | 8999 | \n",
457 | " 2018-01-19 23:59:00 | \n",
458 | " 1 | \n",
459 | " 119 | \n",
460 | "
\n",
461 | " \n",
462 | " | 2159 | \n",
463 | " 2017-12-12 23:59:00 | \n",
464 | " 1 | \n",
465 | " 119 | \n",
466 | "
\n",
467 | " \n",
468 | " | 9179 | \n",
469 | " 2018-01-20 23:59:00 | \n",
470 | " 1 | \n",
471 | " 119 | \n",
472 | "
\n",
473 | " \n",
474 | " | 5579 | \n",
475 | " 2017-12-31 23:59:00 | \n",
476 | " 1 | \n",
477 | " 119 | \n",
478 | "
\n",
479 | " \n",
480 | " | 11339 | \n",
481 | " 2018-02-01 23:59:00 | \n",
482 | " 1 | \n",
483 | " 119 | \n",
484 | "
\n",
485 | " \n",
486 | "
\n",
487 | "
11340 rows × 3 columns
\n",
488 | "
"
489 | ],
490 | "text/plain": [
491 | " Unnamed: 0 indicator time_offset\n",
492 | "0 2017-12-01 21:00:00 0 -60\n",
493 | "4320 2017-12-25 21:00:00 0 -60\n",
494 | "1260 2017-12-08 21:00:00 0 -60\n",
495 | "10080 2018-01-26 21:00:00 0 -60\n",
496 | "6840 2018-01-08 21:00:00 0 -60\n",
497 | "... ... ... ...\n",
498 | "8999 2018-01-19 23:59:00 1 119\n",
499 | "2159 2017-12-12 23:59:00 1 119\n",
500 | "9179 2018-01-20 23:59:00 1 119\n",
501 | "5579 2017-12-31 23:59:00 1 119\n",
502 | "11339 2018-02-01 23:59:00 1 119\n",
503 | "\n",
504 | "[11340 rows x 3 columns]"
505 | ]
506 | },
507 | "execution_count": 19,
508 | "metadata": {},
509 | "output_type": "execute_result"
510 | }
511 | ],
512 | "source": [
513 | "sleep_data"
514 | ]
515 | },
516 | {
517 | "cell_type": "code",
518 | "execution_count": 20,
519 | "id": "b8184785",
520 | "metadata": {},
521 | "outputs": [
522 | {
523 | "data": {
524 | "text/plain": [
525 | "array([0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], dtype=int64)"
526 | ]
527 | },
528 | "execution_count": 20,
529 | "metadata": {},
530 | "output_type": "execute_result"
531 | }
532 | ],
533 | "source": [
534 | "sleep_obs[3260:3275]"
535 | ]
536 | },
537 | {
538 | "cell_type": "code",
539 | "execution_count": 21,
540 | "id": "6e966ca1",
541 | "metadata": {},
542 | "outputs": [],
543 | "source": [
544 | "time_offset=15"
545 | ]
546 | },
547 | {
548 | "cell_type": "code",
549 | "execution_count": 22,
550 | "id": "f2017f31",
551 | "metadata": {},
552 | "outputs": [],
553 | "source": [
554 | "awake_vals = sleep_data.loc[(sleep_data['time_offset']==time_offset)\n",
555 | " &(sleep_data['indicator']==0)]"
556 | ]
557 | },
558 | {
559 | "cell_type": "code",
560 | "execution_count": 23,
561 | "id": "35b7f108",
562 | "metadata": {},
563 | "outputs": [],
564 | "source": [
565 | "sleep_vals = sleep_data.loc[(sleep_data['time_offset'] == time_offset) \n",
566 | " & (sleep_data['indicator'] == 1)]"
567 | ]
568 | },
569 | {
570 | "cell_type": "code",
571 | "execution_count": 24,
572 | "id": "6895bd42",
573 | "metadata": {},
574 | "outputs": [
575 | {
576 | "name": "stdout",
577 | "output_type": "stream",
578 | "text": [
579 | "31 32\n"
580 | ]
581 | }
582 | ],
583 | "source": [
584 | "print(len(sleep_vals), len(awake_vals))"
585 | ]
586 | },
587 | {
588 | "cell_type": "code",
589 | "execution_count": 25,
590 | "id": "6eca6a13",
591 | "metadata": {},
592 | "outputs": [],
593 | "source": [
594 | "p_sleep_at_offset=len(sleep_vals)/(len(sleep_vals)+len(awake_vals))"
595 | ]
596 | },
597 | {
598 | "cell_type": "code",
599 | "execution_count": 26,
600 | "id": "8c86bdcb",
601 | "metadata": {},
602 | "outputs": [
603 | {
604 | "data": {
605 | "text/plain": [
606 | "0.49206349206349204"
607 | ]
608 | },
609 | "execution_count": 26,
610 | "metadata": {},
611 | "output_type": "execute_result"
612 | }
613 | ],
614 | "source": [
615 | "p_sleep_at_offset"
616 | ]
617 | },
618 | {
619 | "cell_type": "code",
620 | "execution_count": 27,
621 | "id": "12b162f3",
622 | "metadata": {},
623 | "outputs": [],
624 | "source": [
625 | "alpha = 0.977400\n",
626 | "beta = -0.067270\n",
627 | "\n",
628 | "def calculate_prior(time, alpha, beta):\n",
629 | " p = 1.0 / (1.0 + np.exp(np.dot(beta, time) + alpha))\n",
630 | " return p\n",
631 | "\n",
632 | " \n",
633 | "time_est = np.linspace(time.min()- 5, time.max() + 5, 1000)[:, None]\n",
634 | "\n",
635 | "\n",
636 | "sleep_est = calculate_prior(time_est, alpha, beta)"
637 | ]
638 | },
639 | {
640 | "cell_type": "code",
641 | "execution_count": 28,
642 | "id": "560613cb",
643 | "metadata": {},
644 | "outputs": [
645 | {
646 | "data": {
647 | "image/png": "\n",
648 | "text/plain": [
649 | ""
650 | ]
651 | },
652 | "metadata": {},
653 | "output_type": "display_data"
654 | }
655 | ],
656 | "source": [
657 | "plt.plot(time_est,sleep_est,color='green',lw=1,label=\"Model\")\n",
658 | "plt.scatter(time,sleep_obs,edgecolor='navy',s=10,alpha=0.1,label='Observations')\n",
659 | "plt.title('Prior Probability Distribution for Sleep as Function of Time');\n",
660 | "plt.legend()\n",
661 | "plt.ylabel('Probability')\n",
662 | "plt.xlabel('PM Time');\n",
663 | "plt.xticks([-60, -30, 0, 30, 60, 90, 120], sleep_labels);"
664 | ]
665 | },
666 | {
667 | "cell_type": "code",
668 | "execution_count": 30,
669 | "id": "189abf70",
670 | "metadata": {},
671 | "outputs": [
672 | {
673 | "name": "stdout",
674 | "output_type": "stream",
675 | "text": [
676 | "9:30 PM probability of being asleep: 4.76%.\n",
677 | "10:00 PM probability of being asleep: 27.34%.\n",
678 | "10:30 PM probability of being asleep: 50.79%.\n"
679 | ]
680 | }
681 | ],
682 | "source": [
683 | "print('9:30 PM probability of being asleep: {:.2f}%.'.\n",
684 | " format(100 * calculate_prior(-30, alpha, beta)))\n",
685 | "print('10:00 PM probability of being asleep: {:.2f}%.'.\n",
686 | " format(100 * calculate_prior(0, alpha, beta)))\n",
687 | "print('10:30 PM probability of being asleep: {:.2f}%.'.\n",
688 | " format(100 * calculate_prior(15, alpha, beta)))"
689 | ]
690 | },
691 | {
692 | "cell_type": "code",
693 | "execution_count": null,
694 | "id": "c85d5d5d",
695 | "metadata": {},
696 | "outputs": [],
697 | "source": []
698 | },
699 | {
700 | "cell_type": "code",
701 | "execution_count": null,
702 | "id": "8f35b47b",
703 | "metadata": {},
704 | "outputs": [],
705 | "source": []
706 | },
707 | {
708 | "cell_type": "code",
709 | "execution_count": null,
710 | "id": "a8231cfa",
711 | "metadata": {},
712 | "outputs": [],
713 | "source": []
714 | }
715 | ],
716 | "metadata": {
717 | "kernelspec": {
718 | "display_name": "Python 3 (ipykernel)",
719 | "language": "python",
720 | "name": "python3"
721 | },
722 | "language_info": {
723 | "codemirror_mode": {
724 | "name": "ipython",
725 | "version": 3
726 | },
727 | "file_extension": ".py",
728 | "mimetype": "text/x-python",
729 | "name": "python",
730 | "nbconvert_exporter": "python",
731 | "pygments_lexer": "ipython3",
732 | "version": "3.8.11"
733 | }
734 | },
735 | "nbformat": 4,
736 | "nbformat_minor": 5
737 | }
738 |
--------------------------------------------------------------------------------