├── Case Study 1 - Caesar Cipher ├── Exercise 1.py ├── Exercise 2.py ├── Exercise 3.py ├── Exercise 4.py └── README.md ├── Case Study 2 - Translations of Hamlet ├── Exercise 1.py ├── Exercise 2.py ├── Exercise 3.py ├── Exercise 4.py └── README.md ├── Case Study 3 - Practice with Classification ├── Exercise 1.py ├── Exercise 2.py ├── Exercise 3.py ├── Exercise 4.py ├── Exercise 5.py ├── Exercise 6.py ├── Exercise 7.py ├── Exercise 8.py ├── Exercise 9.py └── README.md ├── Case Study 4 - Visualizing Whisky Classification ├── Exercise 1.py ├── Exercise 2.py ├── Exercise 3.py ├── Exercise 4.py ├── Exercise 5.py ├── Exercise 6.py ├── Exercise 7.py └── README.md ├── Case Study 5 - Bird Migration ├── Exercise 1.py ├── Exercise 2.py ├── Exercise 3.py ├── Exercise 4.py └── README.md ├── Case Study 6 - Social Network Analysis ├── Exercise 1.py ├── Exercise 2.py ├── Exercise 3.py ├── Exercise 4.py ├── Exercise 5.py ├── Exercise 6.py ├── Exercise 7.py └── README.md ├── Homework 1 ├── Exercise 1a.py ├── Exercise 1b.py ├── Exercise 1c.py ├── Exercise 1d.py ├── Exercise 1e.py ├── Exercise 2a.py ├── Exercise 2b.py ├── Exercise 2c.py ├── Exercise 2d.py ├── Exercise 2e.py ├── Exercise 2f.py ├── Exercise 3a.py ├── Exercise 3b.py ├── Exercise 3c.py └── README.md ├── Homework 2 ├── Exercise 1.py ├── Exercise 10.py ├── Exercise 11.py ├── Exercise 12.py ├── Exercise 13.py ├── Exercise 2.py ├── Exercise 3.py ├── Exercise 4.py ├── Exercise 5.py ├── Exercise 6.py ├── Exercise 7.py ├── Exercise 8.py ├── Exercise 9.py └── README.md └── README.md /Case Study 1 - Caesar Cipher/Exercise 1.py: -------------------------------------------------------------------------------- 1 | # Let's look at the lowercase letters. 2 | import string 3 | string.ascii_lowercase 4 | 5 | # We will consider the alphabet to be these letters, along with a space. 6 | alphabet = string.ascii_lowercase + " " 7 | 8 | # create `letters` here! 9 | letters = {i: alphabet[i] for i in range(27)} -------------------------------------------------------------------------------- /Case Study 1 - Caesar Cipher/Exercise 2.py: -------------------------------------------------------------------------------- 1 | alphabet = string.ascii_lowercase + " " 2 | letters = dict(enumerate(alphabet)) 3 | 4 | encryption_key = 3 5 | 6 | # define `encoding` here! 7 | encoding = {alphabet[i] : (i + encryption_key) % 27 for i in range(27)} -------------------------------------------------------------------------------- /Case Study 1 - Caesar Cipher/Exercise 3.py: -------------------------------------------------------------------------------- 1 | message = "hi my name is caesar" 2 | def caesar(message, encryption_key): 3 | encoding = {alphabet[i] : (i + encryption_key) % 27 for i in range(27)} 4 | string = "" 5 | for i in range(len(message)): 6 | value = encoding[message[i]] 7 | string += letters[value] 8 | return string 9 | 10 | encoded_message = caesar(message, encryption_key = 3) 11 | print(encoded_message) -------------------------------------------------------------------------------- /Case Study 1 - Caesar Cipher/Exercise 4.py: -------------------------------------------------------------------------------- 1 | decoded_message = caesar(encoded_message, encryption_key = -3) 2 | print(decoded_message) -------------------------------------------------------------------------------- /Case Study 1 - Caesar Cipher/README.md: -------------------------------------------------------------------------------- 1 | # Case Study 1 - Caesar Cipher 2 | A cipher is a secret code for a language. In this case study, we will explore a cipher that is reported by contemporary Greek historians to have been used by Julius Caesar to send secret messages to generals during times of war. 3 | -------------------------------------------------------------------------------- /Case Study 2 - Translations of Hamlet/Exercise 1.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | 3 | def word_count_distribution(text): 4 | word_count = count_words_fast(text) 5 | count_distribution = Counter(word_count.values()) 6 | return count_distribution 7 | 8 | distribution = word_count_distribution(text) -------------------------------------------------------------------------------- /Case Study 2 - Translations of Hamlet/Exercise 2.py: -------------------------------------------------------------------------------- 1 | def more_frequent(distribution): 2 | counts = list(distribution.keys()) 3 | frequency_of_counts = list(distribution.values()) 4 | cumulative_frequencies = np.cumsum(frequency_of_counts) 5 | more_frequent = 1 - cumulative_frequencies / cumulative_frequencies[-1] 6 | return dict(zip(counts, more_frequent)) 7 | 8 | cumulative = more_frequent(distribution) -------------------------------------------------------------------------------- /Case Study 2 - Translations of Hamlet/Exercise 3.py: -------------------------------------------------------------------------------- 1 | hamlets = pd.DataFrame(columns = ("language", "distribution")) 2 | book_dir = "Books" 3 | title_num = 1 4 | for language in book_titles: 5 | for author in book_titles[language]: 6 | for title in book_titles[language][author]: 7 | if title == "Hamlet": 8 | inputfile = data_filepath+"Books/"+language+"/"+author+"/"+title+".txt" 9 | text = read_book(inputfile) 10 | distribution = word_count_distribution(text) 11 | hamlets.loc[title_num] = language, distribution 12 | title_num += 1 13 | 14 | print(hamlets.head()) -------------------------------------------------------------------------------- /Case Study 2 - Translations of Hamlet/Exercise 4.py: -------------------------------------------------------------------------------- 1 | colors = ["crimson", "forestgreen", "blueviolet"] 2 | handles, hamlet_languages = [], [] 3 | for index in range(hamlets.shape[0]): 4 | language, distribution = hamlets.language[index+1], hamlets.distribution[index+1] 5 | dist = more_frequent(distribution) 6 | plot, = plt.loglog(sorted(list(dist.keys())),sorted(list(dist.values()), 7 | reverse = True), color = colors[index], linewidth = 2) 8 | handles.append(plot) 9 | hamlet_languages.append(language) 10 | plt.title("Word Frequencies in Hamlet Translations") 11 | xlim = [0, 2e3] 12 | xlabel = "Frequency of Word $W$" 13 | ylabel = "Fraction of Words\nWith Greater Frequency than $W$" 14 | plt.xlim(xlim); plt.xlabel(xlabel); plt.ylabel(ylabel) 15 | plt.legend(handles, hamlet_languages, loc = "upper right", numpoints = 1) 16 | plt.show() -------------------------------------------------------------------------------- /Case Study 2 - Translations of Hamlet/README.md: -------------------------------------------------------------------------------- 1 | # Case Study 2 - Translations of Hamlet 2 | In this case study, we will find and plot the distribution of word frequencies for each translation of Hamlet. Perhaps the distribution of word frequencies of Hamlet depends on the translation - let's find out! 3 | -------------------------------------------------------------------------------- /Case Study 3 - Practice with Classification/Exercise 1.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | data = pd.read_csv("https://s3.amazonaws.com/demo-datasets/wine.csv") -------------------------------------------------------------------------------- /Case Study 3 - Practice with Classification/Exercise 2.py: -------------------------------------------------------------------------------- 1 | numeric_data = data.drop('color', 1) -------------------------------------------------------------------------------- /Case Study 3 - Practice with Classification/Exercise 3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Aug 5 17:02:04 2017 4 | 5 | @author: illusory_time 6 | """ 7 | 8 | numeric_data = (numeric_data - np.mean(numeric_data)) / np.std(numeric_data) 9 | 10 | import sklearn.decomposition 11 | pca = sklearn.decomposition.PCA(n_components=2) 12 | principal_components = pca.fit(numeric_data).transform(numeric_data) -------------------------------------------------------------------------------- /Case Study 3 - Practice with Classification/Exercise 4.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib.colors import ListedColormap 3 | from matplotlib.backends.backend_pdf import PdfPages 4 | observation_colormap = ListedColormap(['red', 'blue']) 5 | x = principal_components[:,0] 6 | y = principal_components[:,1] 7 | 8 | plt.title("Principal Components of Wine") 9 | plt.scatter(x, y, alpha = 0.2, 10 | c = data['high_quality'], cmap = observation_colormap, edgecolors = 'none') 11 | plt.xlim(-8, 8); plt.ylim(-8, 8) 12 | plt.xlabel("Principal Component 1"); plt.ylabel("Principal Component 2") 13 | plt.show() -------------------------------------------------------------------------------- /Case Study 3 - Practice with Classification/Exercise 5.py: -------------------------------------------------------------------------------- 1 | def accuracy(predictions, outcomes): 2 | return np.mean(predictions == outcomes) * 100 3 | 4 | x = np.array([1,2,3]) 5 | y = np.array([1,2,4]) 6 | print(accuracy(x, y)) -------------------------------------------------------------------------------- /Case Study 3 - Practice with Classification/Exercise 6.py: -------------------------------------------------------------------------------- 1 | print(accuracy(0, data["high_quality"])) -------------------------------------------------------------------------------- /Case Study 3 - Practice with Classification/Exercise 7.py: -------------------------------------------------------------------------------- 1 | from sklearn.neighbors import KNeighborsClassifier 2 | knn = KNeighborsClassifier(n_neighbors = 5) 3 | knn.fit(numeric_data, data['high_quality']) 4 | library_predictions = knn.predict(numeric_data) 5 | print(accuracy(library_predictions, data["high_quality"])) -------------------------------------------------------------------------------- /Case Study 3 - Practice with Classification/Exercise 8.py: -------------------------------------------------------------------------------- 1 | n_rows = data.shape[0] 2 | random.seed(123) 3 | selection = random.sample(range(n_rows), 10) -------------------------------------------------------------------------------- /Case Study 3 - Practice with Classification/Exercise 9.py: -------------------------------------------------------------------------------- 1 | predictors = np.array(numeric_data) 2 | training_indices = [i for i in range(len(predictors)) if i not in selection] 3 | outcomes = np.array(data["high_quality"]) 4 | 5 | my_predictions = [knn_predict(p, predictors[training_indices,:], outcomes[training_indices], k=5) for p in predictors[selection]] 6 | 7 | percentage = accuracy(my_predictions, data.high_quality[selection]) 8 | print(percentage) -------------------------------------------------------------------------------- /Case Study 3 - Practice with Classification/README.md: -------------------------------------------------------------------------------- 1 | # Case Study 3 - Practice with Classification 2 | In this case study, we will analyze a dataset consisting of an assortment of wines classified into "high quality" and "low quality", and will use k-Nearest Neighbors to predict whether or not other information about the wine helps us correctly guess whether a new wine will be of high quality. 3 | -------------------------------------------------------------------------------- /Case Study 4 - Visualizing Whisky Classification/Exercise 1.py: -------------------------------------------------------------------------------- 1 | # First, we import a tool to allow text to pop up on a plot when the cursor 2 | # hovers over it. Also, we import a data structure used to store arguments 3 | # of what to plot in Bokeh. Finally, we will use numpy for this section as well! 4 | 5 | from bokeh.models import HoverTool, ColumnDataSource 6 | import numpy as np 7 | 8 | # Let's plot a simple 5x5 grid of squares, alternating in color as red and blue. 9 | 10 | plot_values = [1,2,3,4,5] 11 | plot_colors = ["red", "blue"] 12 | 13 | # How do we tell Bokeh to plot each point in a grid? Let's use a function that 14 | # finds each combination of values from 1-5. 15 | from itertools import product 16 | 17 | grid = list(product(plot_values, plot_values)) 18 | print(grid) 19 | 20 | # The first value is the x coordinate, and the second value is the y coordinate. 21 | # Let's store these in separate lists. 22 | 23 | xs, ys = zip(*grid) 24 | print(xs) 25 | print(ys) 26 | 27 | # Now we will make a list of colors, alternating between red and blue. 28 | 29 | colors = [plot_colors[i%2] for i in range(len(grid))] 30 | print(colors) 31 | 32 | # Finally, let's determine the strength of transparency (alpha) for each point, 33 | # where 0 is completely transparent. 34 | 35 | alphas = np.linspace(0, 1, len(grid)) 36 | 37 | # Bokeh likes each of these to be stored in a special dataframe, called 38 | # ColumnDataSource. Let's store our coordinates, colors, and alpha values. 39 | 40 | source = ColumnDataSource( 41 | data={ 42 | "x": xs, 43 | "y": ys, 44 | "colors": colors, 45 | "alphas": alphas, 46 | } 47 | ) 48 | # We are ready to make our interactive Bokeh plot! 49 | 50 | output_file("Basic_Example.html", title="Basic Example") 51 | fig = figure(tools="resize, hover, save") 52 | fig.rect("x", "y", 0.9, 0.9, source=source, color="colors",alpha="alphas") 53 | hover = fig.select(dict(type=HoverTool)) 54 | hover.tooltips = { 55 | "Value": "@x, @y", 56 | } 57 | show(fig) -------------------------------------------------------------------------------- /Case Study 4 - Visualizing Whisky Classification/Exercise 2.py: -------------------------------------------------------------------------------- 1 | cluster_colors = ["red", "orange", "green", "blue", "purple", "gray"] 2 | regions = ["Speyside", "Highlands", "Lowlands", "Islands", "Campbelltown", "Islay"] 3 | 4 | region_colors = dict(zip(regions, cluster_colors)) 5 | 6 | print(region_colors) -------------------------------------------------------------------------------- /Case Study 4 - Visualizing Whisky Classification/Exercise 3.py: -------------------------------------------------------------------------------- 1 | distilleries = list(whisky.Distillery) 2 | correlation_colors = [] 3 | for i in range(len(distilleries)): 4 | for j in range(len(distilleries)): 5 | if correlations[i][j] < .70: # if low correlation, 6 | correlation_colors.append('white') # just use white. 7 | else: # otherwise, 8 | if whisky.Group[i] == whisky.Group[j]: # if the groups match, 9 | correlation_colors.append(cluster_colors[whisky.Group[i]]) # color them by their mutual group. 10 | else: # otherwise 11 | correlation_colors.append('lightgray') # color them lightgray. -------------------------------------------------------------------------------- /Case Study 4 - Visualizing Whisky Classification/Exercise 4.py: -------------------------------------------------------------------------------- 1 | source = ColumnDataSource( 2 | data = { 3 | "x": np.repeat(distilleries,len(distilleries)), 4 | "y": list(distilleries)*len(distilleries), 5 | "colors": correlation_colors, 6 | "alphas": list(correlations.flatten()), 7 | "correlations": list(correlations.flatten()), 8 | } 9 | ) 10 | 11 | output_file("Whisky Correlations.html", title="Whisky Correlations") 12 | fig = figure(title="Whisky Correlations", 13 | x_axis_location="above", tools="resize,hover,save", 14 | x_range=list(reversed(distilleries)), y_range=distilleries) 15 | fig.grid.grid_line_color = None 16 | fig.axis.axis_line_color = None 17 | fig.axis.major_tick_line_color = None 18 | fig.axis.major_label_text_font_size = "5pt" 19 | fig.xaxis.major_label_orientation = np.pi / 3 20 | 21 | fig.rect('x', 'y', .9, .9, source=source, 22 | color='colors', alpha='alphas') 23 | hover = fig.select(dict(type=HoverTool)) 24 | hover.tooltips = { 25 | "Whiskies": "@x, @y", 26 | "Correlation": "@correlations", 27 | } 28 | show(fig) -------------------------------------------------------------------------------- /Case Study 4 - Visualizing Whisky Classification/Exercise 5.py: -------------------------------------------------------------------------------- 1 | points = [(0,0), (1,2), (3,1)] 2 | xs, ys = zip(*points) 3 | colors = ["red", "blue", "green"] 4 | 5 | output_file("Spatial_Example.html", title="Regional Example") 6 | location_source = ColumnDataSource( 7 | data={ 8 | "x": xs, 9 | "y": ys, 10 | "colors": colors, 11 | } 12 | ) 13 | 14 | fig = figure(title = "Title", 15 | x_axis_location = "above", tools="resize, hover, save") 16 | fig.plot_width = 300 17 | fig.plot_height = 380 18 | fig.circle("x", "y", 10, 10, size=10, source=location_source, 19 | color='colors', line_color = None) 20 | 21 | hover = fig.select(dict(type = HoverTool)) 22 | hover.tooltips = { 23 | "Location": "(@x, @y)" 24 | } 25 | show(fig) -------------------------------------------------------------------------------- /Case Study 4 - Visualizing Whisky Classification/Exercise 6.py: -------------------------------------------------------------------------------- 1 | def location_plot(title, colors): 2 | 3 | output_file(title+".html") 4 | location_source = ColumnDataSource( 5 | data={ 6 | "x": whisky[" Latitude"], 7 | "y": whisky[" Longitude"], 8 | "colors": colors, 9 | "regions": whisky.Region, 10 | "distilleries": whisky.Distillery 11 | } 12 | ) 13 | 14 | fig = figure(title = title, 15 | x_axis_location = "above", tools="resize, hover, save") 16 | fig.plot_width = 400 17 | fig.plot_height = 500 18 | fig.circle("x", "y", 10, 10, size=9, source=location_source, 19 | color='colors', line_color = None) 20 | fig.xaxis.major_label_orientation = np.pi / 3 21 | hover = fig.select(dict(type = HoverTool)) 22 | hover.tooltips = { 23 | "Distillery": "@distilleries", 24 | "Location": "(@x, @y)" 25 | } 26 | show(fig) 27 | 28 | region_cols = [region_colors[i] for i in list(whisky["Region"])] 29 | location_plot("Whisky Locations and Regions", region_cols) -------------------------------------------------------------------------------- /Case Study 4 - Visualizing Whisky Classification/Exercise 7.py: -------------------------------------------------------------------------------- 1 | region_cols = [region_colors[i] for i in list(whisky["Region"])] 2 | classification_cols = [cluster_colors[i] for i in list(whisky["Group"])] 3 | 4 | location_plot("Whisky Locations and Regions", region_cols) 5 | location_plot("Whisky Locations and Groups", classification_cols) -------------------------------------------------------------------------------- /Case Study 4 - Visualizing Whisky Classification/README.md: -------------------------------------------------------------------------------- 1 | # Case Study 4 - Visualizing Whisky Classification 2 | In this case study, we have prepared step-by-step instructions for you on how to prepare plots in Bokeh, a library designed for simple and interactive plotting. We will demonstrate Bokeh by continuing the analysis of Scotch whiskies. 3 | -------------------------------------------------------------------------------- /Case Study 5 - Bird Migration/Exercise 1.py: -------------------------------------------------------------------------------- 1 | # First, use `groupby` to group up the data. 2 | grouped_birds = birddata.groupby("bird_name") 3 | 4 | # Now operations are performed on each group. 5 | mean_speeds = grouped_birds.speed_2d.mean() 6 | 7 | # The `head` method prints the first 5 lines of each bird. 8 | grouped_birds.head() 9 | 10 | # Find the mean `altitude` for each bird. 11 | # Assign this to `mean_altitudes`. 12 | mean_altitudes = grouped_birds.altitude.mean() -------------------------------------------------------------------------------- /Case Study 5 - Bird Migration/Exercise 2.py: -------------------------------------------------------------------------------- 1 | # Convert birddata.date_time to the `pd.datetime` format. 2 | birddata.date_time = pd.to_datetime(birddata.date_time) 3 | 4 | # Create a new column of day of observation 5 | birddata["date"] = birddata.date_time.dt.date 6 | 7 | # Check the head of the column. 8 | birddata.date.head() 9 | 10 | grouped_bydates = birddata.groupby("date") 11 | mean_altitudes_perday = grouped_bydates.altitude.mean() -------------------------------------------------------------------------------- /Case Study 5 - Bird Migration/Exercise 3.py: -------------------------------------------------------------------------------- 1 | grouped_birdday = birddata.groupby(["bird_name", "date"]) 2 | mean_altitudes_perday = grouped_birdday.altitude.mean() 3 | 4 | # look at the head of `mean_altitudes_perday`. 5 | mean_altitudes_perday.head() -------------------------------------------------------------------------------- /Case Study 5 - Bird Migration/Exercise 4.py: -------------------------------------------------------------------------------- 1 | grouped_birdday = birddata.groupby(["bird_name", "date"]) 2 | mean_speeds = grouped_birdday.speed_2d.mean() 3 | 4 | 5 | eric_daily_speed = mean_speeds["Eric"] 6 | sanne_daily_speed = mean_speeds["Sanne"] 7 | nico_daily_speed = mean_speeds["Nico"] 8 | 9 | eric_daily_speed.plot(label="Eric") 10 | sanne_daily_speed.plot(label="Sanne") 11 | nico_daily_speed.plot(label="Nico") 12 | plt.legend(loc="upper left") 13 | plt.show() -------------------------------------------------------------------------------- /Case Study 5 - Bird Migration/README.md: -------------------------------------------------------------------------------- 1 | # Case Study 5 - Bird Migration 2 | In this case study, we will continue taking a look at patterns of flight for each of the three birds in our dataset. 3 | -------------------------------------------------------------------------------- /Case Study 6 - Social Network Analysis/Exercise 1.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | df = pd.read_stata(data_filepath + "individual_characteristics.dta") 3 | df1 = df.loc[df.village == 1] 4 | df2 = df.loc[df.village == 2] 5 | df1.head() -------------------------------------------------------------------------------- /Case Study 6 - Social Network Analysis/Exercise 2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | pid1 = pd.read_csv(data_filepath + "key_vilno_1.csv", header = None) 3 | pid2 = pd.read_csv(data_filepath + "key_vilno_2.csv", header = None) -------------------------------------------------------------------------------- /Case Study 6 - Social Network Analysis/Exercise 3.py: -------------------------------------------------------------------------------- 1 | sex1 = dict(zip(df1.pid, df1.resp_gend)) 2 | caste1 = dict(zip(df1.pid, df1.caste)) 3 | religion1 = dict(zip(df1.pid, df1.religion)) 4 | 5 | sex2 = dict(zip(df2.pid, df2.resp_gend)) 6 | caste2 = dict(zip(df2.pid, df2.caste)) 7 | religion2 = dict(zip(df2.pid, df2.religion)) -------------------------------------------------------------------------------- /Case Study 6 - Social Network Analysis/Exercise 4.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | def chance_homophily(chars): 3 | chars_counts = np.array(list(Counter(chars.values()).values())) 4 | chars_frequency = chars_counts / sum(chars_counts) 5 | return sum(chars_frequency**2) 6 | 7 | favorite_colors = { 8 | "ankit": "red", 9 | "xiaoyu": "blue", 10 | "mary": "blue" 11 | } 12 | 13 | color_homophily = chance_homophily(favorite_colors) 14 | print(color_homophily) 15 | 16 | k = dict(Counter(favorite_colors)) -------------------------------------------------------------------------------- /Case Study 6 - Social Network Analysis/Exercise 5.py: -------------------------------------------------------------------------------- 1 | print("Village 1 chance of same sex:", chance_homophily(sex1)) 2 | print("Village 2 chance of same sex:", chance_homophily(sex2)) 3 | print("Village 1 chance of same caste:", chance_homophily(caste1)) 4 | print("Village 2 chance of same caste:", chance_homophily(caste2)) 5 | print("Village 1 chance of same religion:", chance_homophily(religion1)) 6 | print("Village 2 chance of same religion:", chance_homophily(religion2)) -------------------------------------------------------------------------------- /Case Study 6 - Social Network Analysis/Exercise 6.py: -------------------------------------------------------------------------------- 1 | def homophily(G, chars, IDs): 2 | """ 3 | Given a network G, a dict of characteristics chars for node IDs, 4 | and dict of node IDs for each node in the network, 5 | find the homophily of the network. 6 | """ 7 | num_same_ties, num_ties = 0, 0 8 | for n1 in G.nodes(): 9 | for n2 in G.nodes(): 10 | if n1 > n2: # do not double-count edges! 11 | if IDs[n1] in chars and IDs[n2] in chars: 12 | if G.has_edge(n1, n2): 13 | num_ties += 1 14 | if chars[IDs[n1]] == chars[IDs[n2]]: 15 | num_same_ties += 1 16 | return (num_same_ties / num_ties) 17 | -------------------------------------------------------------------------------- /Case Study 6 - Social Network Analysis/Exercise 7.py: -------------------------------------------------------------------------------- 1 | print("Village 1 observed proportion of same sex:", homophily(G1, sex1, pid1)) 2 | print("Village 2 observed proportion of same sex:", homophily(G2, sex2, pid2)) 3 | print("Village 1 observed proportion of same caste:", homophily(G1, caste1, pid1)) 4 | print("Village 2 observed proportion of same caste:", homophily(G2, caste2, pid2)) 5 | print("Village 1 observed proportion of same religion :", homophily(G1, religion1, pid1)) 6 | print("Village 2 observed proportion of same religion :", homophily(G2, religion2, pid2)) -------------------------------------------------------------------------------- /Case Study 6 - Social Network Analysis/README.md: -------------------------------------------------------------------------------- 1 | # Case Study 6 - Social Network Analysis 2 | Homophily is a network characteristic. Homophily occurs when nodes that share an edge share a characteristic more often than nodes that do not share an edge. In this case study, we will investigate homophily of several characteristics of individuals connected in social networks in rural India. 3 | -------------------------------------------------------------------------------- /Homework 1/Exercise 1a.py: -------------------------------------------------------------------------------- 1 | import string 2 | alphabet = string.ascii_letters -------------------------------------------------------------------------------- /Homework 1/Exercise 1b.py: -------------------------------------------------------------------------------- 1 | import string 2 | alphabet = string.ascii_letters 3 | sentence = 'Jim quickly realized that the beautiful gowns are expensive' 4 | count_letters = {} 5 | for i in sentence: 6 | if i in alphabet: 7 | count_letters[i] = sentence.count(i) 8 | print(count_letters) -------------------------------------------------------------------------------- /Homework 1/Exercise 1c.py: -------------------------------------------------------------------------------- 1 | import string 2 | alphabet = string.ascii_letters 3 | 4 | def counter(sentence): 5 | count_letters = {} 6 | for i in sentence: 7 | if i in alphabet: 8 | count_letters[i] = sentence.count(i) 9 | return count_letters; 10 | 11 | sentence = 'Jim quickly realized that the beautiful gowns are expensive' 12 | count_letter = counter(sentence) -------------------------------------------------------------------------------- /Homework 1/Exercise 1d.py: -------------------------------------------------------------------------------- 1 | address_count = counter(address) 2 | print(address_count) -------------------------------------------------------------------------------- /Homework 1/Exercise 1e.py: -------------------------------------------------------------------------------- 1 | most_frequent_letter = max(address_count, key=address_count.get) 2 | print(most_frequent_letter) 3 | 4 | -------------------------------------------------------------------------------- /Homework 1/Exercise 2a.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | pi = math.pi 4 | print(pi/4.0) -------------------------------------------------------------------------------- /Homework 1/Exercise 2b.py: -------------------------------------------------------------------------------- 1 | import random 2 | random.seed(1) 3 | def rand(): 4 | random.uniform(-1, 1) 5 | rand() -------------------------------------------------------------------------------- /Homework 1/Exercise 2c.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | def distance(x, y): 4 | X = (x[0]-y[0])**2 5 | Y = (x[1]-y[1])**2 6 | ans = math.sqrt(X + Y) 7 | return ans 8 | 9 | x = (0, 0) 10 | y = (1, 1) 11 | 12 | print(distance(x, y)) -------------------------------------------------------------------------------- /Homework 1/Exercise 2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | x = np.array([0, 0]) 4 | y = np.array([1, 1]) 5 | 6 | def in_circle(x, origin=[0]*2): 7 | return distance(x, origin) < 1 8 | 9 | print(in_circle(y, x)) -------------------------------------------------------------------------------- /Homework 1/Exercise 2e.py: -------------------------------------------------------------------------------- 1 | R = 10000 2 | x = [] 3 | inside = [] 4 | for i in range(R): 5 | point = [rand(), rand()] 6 | x.append(point) 7 | inside.append(in_circle(point, origin=[0]*2)) 8 | 9 | cnt = 0 10 | for i in inside: 11 | if i: 12 | cnt = cnt + 1 13 | 14 | print(cnt/R) -------------------------------------------------------------------------------- /Homework 1/Exercise 2f.py: -------------------------------------------------------------------------------- 1 | T = 0 2 | for i in inside: 3 | if i: 4 | T+=1 5 | print(math.pi/4 - T/R) -------------------------------------------------------------------------------- /Homework 1/Exercise 3a.py: -------------------------------------------------------------------------------- 1 | def moving_window_average(x, n_neighbors=1): 2 | n = len(x) 3 | width = n_neighbors*2 + 1 4 | x = [x[0]]*n_neighbors + x + [x[-1]]*n_neighbors 5 | ans = [] 6 | for i in range(n): 7 | ans.append(sum(x[i:i+width])/width) 8 | return ans 9 | 10 | x=[0,10,5,3,1,5] 11 | print(moving_window_average(x, 1)) -------------------------------------------------------------------------------- /Homework 1/Exercise 3b.py: -------------------------------------------------------------------------------- 1 | R = 1000 2 | x = [random.uniform(0, 1) for i in range(1000)] 3 | Y = [x] + [moving_window_average(x, n_neighbors) for n_neighbors in range(1, 10)] -------------------------------------------------------------------------------- /Homework 1/Exercise 3c.py: -------------------------------------------------------------------------------- 1 | ranges = [max(Y[i]) - min(Y[i]) for i in range(10)] 2 | print(ranges) -------------------------------------------------------------------------------- /Homework 1/README.md: -------------------------------------------------------------------------------- 1 | # Homework 1: Basics of Python 3 2 | Exercises for homework (Week 1). In this homework, we will use objects, functions, and randomness to find the length of documents, approximate pi, and smooth out random noise 3 | -------------------------------------------------------------------------------- /Homework 2/Exercise 1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def create_board(): 4 | return np.zeros((3, 3)) 5 | 6 | board = create_board() -------------------------------------------------------------------------------- /Homework 2/Exercise 10.py: -------------------------------------------------------------------------------- 1 | def play_game(): 2 | board, winner = create_board(), 0 3 | while winner == 0: 4 | for player in [1, 2]: 5 | random_place(board, player) 6 | evaluate(board) 7 | if winner != 0: 8 | break 9 | return winner 10 | 11 | play_game() -------------------------------------------------------------------------------- /Homework 2/Exercise 11.py: -------------------------------------------------------------------------------- 1 | import time 2 | winner = [] 3 | def play_game(): 4 | board = create_board() 5 | for i in range(5): 6 | for player in [1, 2]: 7 | random_place(board, player) 8 | winner.append(evaluate(board)) 9 | if(evaluate(board) == 1 or 2 or -1): 10 | break 11 | 12 | start = time.time() 13 | for i in range(1000): 14 | play_game() 15 | end = time.time() 16 | print(start - end) 17 | plt.hist(winner) 18 | plt.show() -------------------------------------------------------------------------------- /Homework 2/Exercise 12.py: -------------------------------------------------------------------------------- 1 | def play_strategic_game(): 2 | board, winner = create_board(), 0 3 | board[1,1] = 1 4 | 5 | play_strategic_game() -------------------------------------------------------------------------------- /Homework 2/Exercise 13.py: -------------------------------------------------------------------------------- 1 | import time 2 | winner = [] 3 | start = time.time() 4 | for i in range(1000): 5 | winner.append(play_strategic_game()) 6 | end = time.time() 7 | print(start - end) 8 | plt.hist(winner) 9 | plt.show() -------------------------------------------------------------------------------- /Homework 2/Exercise 2.py: -------------------------------------------------------------------------------- 1 | def place(board, player, position): 2 | if(board[position] == 0): 3 | board[position] = player 4 | board = create_board() 5 | place(board, 1, (0, 0)) -------------------------------------------------------------------------------- /Homework 2/Exercise 3.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | def possibilities(board): 4 | return numpy.where(board == 0) 5 | 6 | possibilities(board) -------------------------------------------------------------------------------- /Homework 2/Exercise 4.py: -------------------------------------------------------------------------------- 1 | import random 2 | def random_place(board, player): 3 | selection = possibilities(board) 4 | idx = random.choice(selection) 5 | board[idx] = player 6 | random_place(board, 2) -------------------------------------------------------------------------------- /Homework 2/Exercise 5.py: -------------------------------------------------------------------------------- 1 | board = create_board() 2 | for i in range(3): 3 | for player in [1, 2]: 4 | random_place(board, player) 5 | 6 | print(board) -------------------------------------------------------------------------------- /Homework 2/Exercise 6.py: -------------------------------------------------------------------------------- 1 | def row_win(board, player): 2 | for i in range(3): 3 | return board[i][0] == player and board[i].count(board[i][0]) == len(board[i]) 4 | 5 | row_win(board, 1) -------------------------------------------------------------------------------- /Homework 2/Exercise 7.py: -------------------------------------------------------------------------------- 1 | def col_win(board, player): 2 | for i in range(3): 3 | return board[0][i] == player and board[1:][i] == board[:-1][i] 4 | 5 | col_win(board, 1) -------------------------------------------------------------------------------- /Homework 2/Exercise 8.py: -------------------------------------------------------------------------------- 1 | def diag_win(board, player): 2 | return board[0][0] == board[1][1] == board[2][2] == player 3 | 4 | diag_win(board, 1) -------------------------------------------------------------------------------- /Homework 2/Exercise 9.py: -------------------------------------------------------------------------------- 1 | def evaluate(board): 2 | winner = 0 3 | for player in [1, 2]: 4 | if(row_win(board, player) or col_win(board, player) or diag_win(board, player)): 5 | winner = player 6 | if np.all(board != 0) and winner == 0: 7 | winner = -1 8 | return winner 9 | 10 | evaluate(board) -------------------------------------------------------------------------------- /Homework 2/README.md: -------------------------------------------------------------------------------- 1 | # Homework 2: Python Libraries and Concepts Used in Research 2 | Exercises for Homework (Week 2). Tic-tac-toe (or noughts and crosses) is a simple strategy game in which two players take turns placing a mark on a 3x3 board, attempting to make a row, column, or diagonal of three with their mark. In this homework, we will use the tools we've covered in the past two weeks to create a tic-tac-toe simulator and evaluate basic winning strategies. 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Using Python for Research 2 | 3 | My solutions to homework assignments for Harvardx: PH526x Using Python for Research, EdX, Jul-Aug 2017. All codes are written in Python 3. 4 | 5 | Course Modules: https://www.edx.org/course/using-python-research-harvardx-ph526x 6 | --------------------------------------------------------------------------------