├── Homework_week1.py
├── README.md
├── HarvardX PH526x Certificate | edX.pdf
├── Homework_Week1_1.py
├── Homework_Week1_3.py
├── Homework_Week4_2.py
├── Homework_Week1_2.py
├── Homework_Week3_1.py
├── Homework_Week3_2.py
├── Homework_Week3_3.py
├── Homework_Week4_1.py
└── Homework_Week2.py


/Homework_week1.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Using-Python-For-Research
2 | I have presented here my submitted Python codes for this course
3 | 


--------------------------------------------------------------------------------
/HarvardX PH526x Certificate | edX.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shivam967/Using-Python-For-Research/HEAD/HarvardX PH526x Certificate | edX.pdf


--------------------------------------------------------------------------------
/Homework_Week1_1.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat Apr  8 11:40:55 2017
 5 | 
 6 | @author: shivamchaturvedi
 7 | """
 8 | 
 9 | #In this five-part exercise, we will count the frequency of each letter in a given string.
10 | 
11 | #-------------------------------------------------
12 | 
13 | #1a
14 | #The lower and upper cases of the English alphabet is stored as alphabet using the attribute ascii_letters
15 | #from string library (module) where we will store them in alphabet after defining a given sentence
16 | 
17 | import string
18 | 
19 | sentence = 'Jim quickly realized that the beautiful gowns are expensive'
20 | 
21 | alphabet = string.ascii_letters
22 | #-------------------------------------------------
23 | 
24 | #1b
25 | #Consider the sentence 'Jim quickly realized that the beautiful gowns are expensive'. 
26 | #Create a dictionary count_letters with keys consisting of each unique letter in the sentence
27 | #and values consisting of the number of times each letter is used in this sentence.
28 | 
29 | count_letters = {}
30 |           
31 | for letter in sentence:
32 |     if letter in alphabet:
33 |         if letter in count_letters:
34 |             count_letters[letter] += 1
35 |         else:
36 |             count_letters[letter] = 1
37 | 
38 | #print(count_letters.keys())
39 | #print(count_letters.values())
40 | #-------------------------------------------------
41 | 
42 | #1c 
43 | #Rewrite your code from 1b to make a function called counter that takes a string input_string 
44 | #and returns a dictionary of letter counts count_letters. If you were unable to complete 1b, 
45 | #you can use the solution by selecting Show Answer. Use your function to call counter(sentence).
46 | 
47 | def counter (input_string):
48 |     count_letters = {}
49 |     for letter in input_string:
50 |         if letter in count_letters:
51 |                 count_letters[letter] += 1
52 |         else:
53 |             count_letters[letter] = 1
54 |     return count_letters
55 | 
56 |         #print(counter(sentence))
57 | 
58 | #-------------------------------------------------
59 | 
60 | #1d
61 | #Abraham Lincoln was a president during the American Civil War. His famous 1863 Gettysburg Address 
62 | #has been stored as address, and the counter function defined in part 1c has been loaded. 
63 | #Use these to return a dictionary consisting of the count of each letter in this address, 
64 | #and save this as address_count. Print address_count.
65 | 
66 | address = '1863 Gettysburg'
67 | 
68 | address_count = counter(address)
69 | 
70 | #print(address_count)
71 | 
72 | #-------------------------------------------------
73 | 
74 | #1e
75 | #What is the most common letter used in the Gettysburg Address?
76 | #Store this letter as most_frequent_letter, and print your answer.
77 | 
78 | from collections import Counter
79 | 
80 | most_frequent_letter = Counter(address).most_common(1)
81 | 
82 | print(most_frequent_letter)
83 | 
84 | 
85 | 
86 | #Another way to solve it (this solution returns the most common LETTER only without its iteration number)
87 | 
88 | maximum = 0
89 | letter_maximum = ""
90 | for letter in address_count:
91 |     if address_count[letter] > maximum:
92 |         maximum = address_count[letter]
93 |         most_frequent_letter = letter
94 | 
95 | print(most_frequent_letter)
96 | 


--------------------------------------------------------------------------------
/Homework_Week1_3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat Apr 22 00:35:11 2017
 5 | 
 6 | @author: shivamchaturvedi
 7 | """
 8 | 
 9 | #A list of numbers can be very unsmooth, meaning very high numbers can be right next to very low numbers. 
10 | #This list may represent a smooth path in reality that is masked with random noise (for example, 
11 | #satellite trajectories with inaccurate transmission). One way to smooth the values in the list is to 
12 | #replace each value with the average of each value's neighbors, including the value itself.
13 | 
14 | #-------------------------------------------------
15 | 
16 | #3a
17 | 
18 | #Let's make a function moving_window_average(x, n_neighbors) that takes a list x and the number of neighbors
19 | # n_neighbors on either side to consider. For each value, the function computes the average of each value's 
20 | #neighbors, including themselves. Have the function return a list of these averaged values that is the same 
21 | #length as the original list. If there are not enough neighbors (for cases near the edge), substitute the 
22 | #original value as many times as there are missing neighbors.
23 | #Use your function to find the moving window sum of x=[0,10,5,3,1,5] and n_neighbors=1.
24 | 
25 | 
26 | import random
27 | 
28 | random.seed(1) #Initialize the basic random number generator.
29 | 
30 | def moving_window_average(x, n_neighbors=1):
31 |     n = len(x)
32 |     width = n_neighbors*2 + 1
33 |     x = [x[0]]*n_neighbors + x + [x[-1]]*n_neighbors
34 |     # To complete the function,
35 |     # return a list of the mean of values from i to i+width for all values i from 0 to n-1.
36 |     return [sum(x[i:(i+width)]) / width for i in range(n)]
37 |     
38 |     
39 | x=[0,10,5,3,1,5]
40 | print(moving_window_average(x, 1))
41 | 
42 | #-------------------------------------------------
43 | 
44 | #3b
45 | #Compute and store R=1000 random values from 0-1 as x.
46 | #moving_window_average(x, n_neighbors) is pre-loaded into memory from 3a. 
47 | #Compute the moving window average for x for the range of n_neighbors 1-9.
48 | #Store x as well as each of these averages as consecutive lists in a list called Y.
49 | 
50 | random.seed(1) # This line fixes the value called by your function,
51 |                # and is used for answer-checking.
52 |     
53 | # write your code here!
54 |     
55 | R = 1000
56 | x = [random.uniform(0, 1) for i in range(0, 1000)] #Return a random floating point number N such that
57 |    #a <= N <= b for a <= b 
58 |    #and b <= N <= a for b < a.
59 | Y = [x] + [moving_window_average(x, n_neighbors) for n_neighbors in range(1, 10)]
60 | print(len(Y))
61 | #-------------------------------------------------
62 | 
63 | #3c
64 | #moving_window_average(x, n_neighbors=2) and Y are already loaded into memory. 
65 | #For each list in Y, calculate and store the range (the maximum minus the minimum) in a new list ranges.
66 | #Print your answer. As the window width increases, does the range of each list increase or decrease? 
67 | #Why do you think that is?
68 | 
69 | ranges = [max(x)-min(x) for x in Y]
70 | print(ranges) #The range decreases, because the average smooths a larger number of neighbors. 
71 | #Because the numbers in the original list are just random, we expect the average of many of them to be 
72 | #roughly 1 / 2, and more averaging means more smoothness in this value.


--------------------------------------------------------------------------------
/Homework_Week4_2.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ==============================
  3 | Case Study 2 - Bird Migration
  4 | ==============================
  5 | '''
  6 | 
  7 | #In this case study, we will continue taking a look at patterns of flight 
  8 | #for each of the three birds in our dataset.
  9 | 
 10 | #------------------------------------------------------------------------------
 11 | 
 12 | #Exercise 1
 13 | #----------
 14 | 
 15 | #pandas makes it easy to perform basic operations on groups within a dataframe 
 16 | #without needing to loop through each value in the dataframe. The sample code 
 17 | #shows you how to group the dataframe by birdname and then find the average 
 18 | #speed_2d for each bird. Modify the code to assign the mean altitudes of each 
 19 | #bird into an object called mean_altitudes.
 20 | 
 21 | #load the dataframe
 22 | import pandas as pd 
 23 | 
 24 | birddata = pd.read_csv('/Users/Admin/Desktop/bird_tracking.csv')
 25 | 
 26 | # First, use `groupby` to group up the data.
 27 | grouped_birds = birddata.groupby("bird_name")
 28 | 
 29 | # Now operations are performed on each group.
 30 | mean_speeds = grouped_birds.speed_2d.mean()
 31 | 
 32 | # The `head` method prints the first 5 lines of each bird.
 33 | print grouped_birds.head()
 34 | 
 35 | # Find the mean `altitude` for each bird.
 36 | # Assign this to `mean_altitudes`.
 37 | mean_altitudes = grouped_birds.altitude.mean()
 38 | 
 39 | #------------------------------------------------------------------------------
 40 | 
 41 | #Exercise 2
 42 | #----------
 43 | 
 44 | #In this exercise, we will group the flight times by date and calculate the 
 45 | #mean altitude within that day. Use groupby to group the data by date.
 46 | 
 47 | # Convert birddata.date_time to the `pd.datetime` format.
 48 | birddata.date_time = pd.to_datetime(birddata.date_time)
 49 | 
 50 | # Create a new column of day of observation
 51 | birddata["date"] = birddata.date_time.dt.date
 52 | 
 53 | # Check the head of the column.
 54 | print birddata.date.head()
 55 | 
 56 | grouped_bydates = birddata.groupby("date")
 57 | 
 58 | #Calculate the mean altitude per day and store these results as 
 59 | #mean_altitudes_perday.
 60 | 
 61 | mean_altitudes_perday = grouped_bydates.altitude.mean()
 62 | 
 63 | #------------------------------------------------------------------------------
 64 | 
 65 | #Exercise 3
 66 | #----------
 67 | 
 68 | #birddata already contains the date column. To find the average speed for each 
 69 | #bird and day, create a new grouped dataframe called grouped_birdday that 
 70 | #groups the data by both bird_name and date.
 71 | 
 72 | grouped_birdday = birddata.groupby(["bird_name", "date"])
 73 | mean_altitudes_perday = grouped_birdday.altitude.mean()
 74 | 
 75 | # look at the head of `mean_altitudes_perday`.
 76 | mean_altitudes_perday.head()
 77 | 
 78 | #------------------------------------------------------------------------------
 79 | 
 80 | #Exercise 4
 81 | #----------
 82 | 
 83 | #Great! Now find the average speed for each bird and day. Store these are three 
 84 | #pandas Series objects – one for each bird.
 85 | 
 86 | #Use the plotting code provided to plot the average speeds for each bird.
 87 | 
 88 | import matplotlib.pyplot as plt
 89 | 
 90 | eric_daily_speed  = grouped_birdday.speed_2d.mean()["Eric"]
 91 | sanne_daily_speed = grouped_birdday.speed_2d.mean()["Sanne"]
 92 | nico_daily_speed  = grouped_birdday.speed_2d.mean()["Nico"]
 93 | 
 94 | eric_daily_speed.plot(label="Eric")
 95 | sanne_daily_speed.plot(label="Sanne")
 96 | nico_daily_speed.plot(label="Nico")
 97 | plt.legend(loc="upper left")
 98 | plt.show()
 99 | 
100 | #------------------------------------------------------------------------------


--------------------------------------------------------------------------------
/Homework_Week1_2.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Apr 22 00:34:24 2017
  5 | 
  6 | @author: shivamchaturvedi
  7 | """
  8 | 
  9 | #The ratio of the areas of a circle and the square inscribing it is pi / 4. 
 10 | #In this six-part exercise, we will find a way to approximate this value.
 11 | 
 12 | #-------------------------------------------------
 13 | 
 14 | #2a
 15 | #Using the math library, calculate and print the value of pi / 4.
 16 | 
 17 | 
 18 | import math
 19 | from math import pi
 20 | 
 21 | x = math.pi/4
 22 | 
 23 | #print(x)
 24 | 
 25 | #-------------------------------------------------
 26 | 
 27 | #2b
 28 | #Using random.uniform, create a function rand() that generates a single float between -1 and 1.
 29 | #Call rand() once. So we can check your solution, we will use random.seed to fix the value called 
 30 | #by your function.
 31 | 
 32 | 
 33 | import random
 34 | 
 35 | random.seed(1) # This line fixes the value called by your function,
 36 |                # and is used for answer-checking.
 37 | 
 38 | def rand():
 39 |    return random.uniform(-1, 1)
 40 | 
 41 | rand()
 42 | 
 43 | #-------------------------------------------------
 44 | 
 45 | #2c
 46 | #The distance between two points x and y is the square root of the sum of squared differences along 
 47 | #each dimension of x and y. Create a function distance(x, y) that takes two vectors and outputs the 
 48 | #distance between them. Use your function to find the distance between (0,0) and (1,1).
 49 | #Print your answer.
 50 | 
 51 | def distance(x, y): #x,y are points 
 52 |     dist = math.sqrt((x[0] - y[0]) ** 2 + (x[1] - y[1]) ** 2 )#In our case here, the first point is x=(x[0],x[1])
 53 |     #and the second point is y=(y[0],y[1]). Therefore, the distance between two points is given by:
 54 |         #distance = sqrt((x[0]-y[0])^2+(x[1]-y[1])^2)
 55 |     return dist
 56 |     
 57 | print(distance((0,0),(1,1)))
 58 | 
 59 | #-------------------------------------------------
 60 | 
 61 | #2d
 62 | #distance(x, y) is pre-loaded from part 2c. Make a function in_circle(x, origin) that uses distance 
 63 | #to determine if a two-dimensional point falls within the the unit circle with a given origin. 
 64 | #That is, find if a two-dimensional point has distance <1 from the origin (0,0).
 65 | #Use your function to print whether the point (1,1) lies within the unit circle centered at (0,0).
 66 | 
 67 | def in_circle(x, origin = (0,0)): #Python, like other languages, provides support for default argument values, 
 68 |   #that is function arguments that can either be specified by the caller or left blank to automatically receive a predefined value.
 69 |   #In our case here we pre-defined the origin of the unit circle to be (0,0)
 70 |    return distance(x, origin) < 1
 71 | 
 72 | print(in_circle((1,1),(0,0))) #This results False as the point resides outside the unit circle. 
 73 | #if we choose another point, let say (0.5, 0.5), the resut would be true as the point sets inside the unit cirlce.
 74 | 
 75 | #As the origin is a pre-defined argument, we can exclude it from the function-calling syntax and write
 76 | #print(in_circle((1,1))) instead
 77 | #-------------------------------------------------
 78 | 
 79 | #2e
 80 | #The functions rand and in_circle are defined from previous exercises. 
 81 | #Using these functions, code is pre-entered that creates a list x of R=10000 
 82 | #two-dimensional points. Create a list of 10000 booleans called inside that are 
 83 | #True if and only if the point in x with that index falls within the unit circle. 
 84 | #Make sure to use in_circle!
 85 | #Print the proportion of points within the circle. 
 86 | #This proportion is an estimate of the ratio of the two areas!
 87 |     
 88 | R = 10000
 89 | x = [ (rand(), rand()) for i in range(R) ]
 90 | inside = [ in_circle(p) for p in x ]
 91 | print(sum(inside) / R)
 92 | 
 93 | #------
 94 | 
 95 | #another way to do it
 96 | R = 10000
 97 | x = []
 98 | inside = []
 99 | 
100 | for i in range(R):
101 |     point = [rand(), rand()]
102 |     x.append(point)
103 |     
104 | for j in x:
105 |     inside = [in_circle(j)] 
106 |     
107 | print(sum(inside)/R)
108 |  
109 | #-------------------------------------------------
110 | 
111 | #2f
112 | #Note: inside and R are defined as in Exercise 2e. Recall the true ratio of the area of of the unit circle 
113 | #to the area to the inscribing square is pi / 4.
114 | #Find and print the difference between this value and your estimate from part 2e.
115 | 
116 | 
117 | print( (math.pi / 4) - (sum(inside) / R) )


--------------------------------------------------------------------------------
/Homework_Week3_1.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Apr 22 00:40:41 2017
  5 | 
  6 | @author: shivamchaturvedi
  7 | """
  8 | 
  9 | '''
 10 | ==============================
 11 | Case Study 1 - Caesar Cipher!
 12 | ==============================
 13 | '''
 14 | 
 15 | 
 16 | #A cipher is a secret code for a language. In this case study, we will explore 
 17 | #a cipher that is reported by contemporary Greek historians to have been used 
 18 | #by Julius Caesar to send secret messages to generals during times of war.
 19 | 
 20 | #The Caesar cipher shifts each letter of this message to another letter in the 
 21 | #alphabet, which is a fixed number of letters away from the original. 
 22 | #If our encryption key were 1, we would shift h to the next letter i, 
 23 | #i to the next letter j, and so on. If we reach the end of the alphabet, 
 24 | #which for us is the space character, we simply loop back to a. 
 25 | #To decode the message, we make a similar shift, except we move the same 
 26 | #number of steps backwards in the alphabet.
 27 | 
 28 | #----------------------------------------------------------------------------------------------------------------
 29 | 
 30 | 
 31 | # Exercise 1
 32 | #-----------
 33 | 
 34 | #TODO: Create a dictionary letters with keys consisting of the numbers from 0 
 35 | #to 26, and values consisting of the lowercase letters of the English alphabet,
 36 | # including the space ' ' at the end.
 37 | 
 38 | # Let's look at the lowercase letters.
 39 | import string
 40 | string.ascii_lowercase
 41 | 
 42 | # We will consider the alphabet to be these letters, along with a space.
 43 | alphabet = string.ascii_lowercase + " "
 44 | 
 45 | # create `letters` here!
 46 | letters = {0:'a', 1:'b', 2:'c', 3:'d', 4:'e', 5:'f', 6:'g', 7:'h', 8:'i'
 47 |            , 9:'j', 10:'k', 11:'l', 12:'m', 13:'n', 14:'o', 15:'p', 16:'q'
 48 |            , 17:'r', 18:'s', 19:'t', 20:'u', 21:'v', 22:'w', 23:'x', 24:'y'
 49 |            , 25:'z', 26:' '}
 50 | 
 51 | #Another way is:
 52 | 
 53 | letters = {i:alphabet[i] for i in range(0,27)}
 54 | print(letters)
 55 | 
 56 | #------------------------------------------------------------------------------
 57 | 
 58 | # Exercise 2
 59 | #-----------
 60 | 
 61 | #alphabet and letters are already defined. Create a dictionary encoding with 
 62 | #keys being the characters in alphabet and values being numbers from 0-26, 
 63 | #shifted by an integer encryption_key=3. For example, the key a should have 
 64 | #value encryption_key, key b should have value encryption_key + 1, and so on. 
 65 | #If any result of this addition is less than 0 or greater than 26, you can 
 66 | #ensure the result remains within 0-26 using result % 27.
 67 | 
 68 | 
 69 | encryption_key = 3
 70 | 
 71 | # define `encoding` here!
 72 | encoding = {alphabet[i]:((i + encryption_key) % 27) for i in range(27)}
 73 | 
 74 | #OR
 75 | 
 76 | #alphabet_len = len(alphabet)
 77 | #encoding = {alphabet[i]:((i + encryption_key) % alphabet_len) for i in range(alphabet_len)}
 78 | 
 79 | print(encoding)
 80 | 
 81 | 
 82 | #------------------------------------------------------------------------------
 83 | 
 84 | # Exercise 3
 85 | #-----------
 86 | 
 87 | #alphabet and letters are preloaded from the previous exercise. Write a function 
 88 | #caesar(message, encryption_key) to encode a message with the Caesar cipher.
 89 | 
 90 | #Use your code from Exercise 2 to find the value of encoding for each letter in message.
 91 | 
 92 | #Use these values as keys in the dictionary letters to determine the encoded 
 93 | #letter for each letter in message.
 94 | #Your function should return a string consisting of these encoded letters.
 95 | 
 96 | #Use caesar to encode message using encryption_key = 3, and save the result as encoded_message.
 97 | 
 98 | #Print encoded_message.
 99 | 
100 | message = "hi my name is caesar"
101 | 
102 | def caesar(message, key):
103 |     # return the encoded message as a single string!
104 |     alphabet_len = len(alphabet)
105 |     encoded_message = {alphabet[i]:((i + key) % alphabet_len) for i in range(alphabet_len)}
106 |     return ''.join([alphabet[encoded_message[letter]] for letter in message])
107 |     
108 | encoded_message = caesar(message, 3)
109 | 
110 | print(encoded_message)
111 | 
112 | #------------------------------------------------------------------------------
113 | 
114 | # Exercise 4
115 | #-----------
116 | 
117 | #Decode and save coded_message using caesar and encryption_key = -3. 
118 | #coded_message is already loaded from the previous problem.
119 | #Store your decoded message as decoded_message.
120 | #Print decoded_message. Does this recover your original message?
121 | 
122 | decoded_message = caesar(encoded_message, -3)
123 | print(decoded_message)
124 | 
125 | #It can be seen that it is the same original message!
126 | #------------------------------------------------------------------------------


--------------------------------------------------------------------------------
/Homework_Week3_2.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Apr 22 00:40:40 2017
  5 | 
  6 | @author: shivamchaturvedi
  7 | """
  8 | 
  9 | '''
 10 | ====================================================
 11 | Case Study 2 - Word Frequency Distribution in Hamlet
 12 | ====================================================
 13 | '''
 14 | 
 15 | #In this case study, we will find and plot the distribution of word frequencies for each translation of Hamlet. 
 16 | #Perhaps the distribution of word frequencies of Hamlet depends on the translation --- let's find out!
 17 | 
 18 | #For these exercises, functions count_words_fast, read_book, and word_stats are already defined as in the Case 2 Videos (Videos 3.2.x).
 19 | 
 20 | #Define functions
 21 | #------------------
 22 | 
 23 | from collections import Counter
 24 | 
 25 | def count_words_fast(text):
 26 |     """count the number of times each word occurs in text (str). 
 27 |     Return dictionary where keys are unique words and values are 
 28 |     word counts. skip punctuations"""
 29 |     text = text.lower() #lowercase for the counting letters so the function can cont the same words whether it's capatilised or not
 30 |     skips = [".", ",", ";", ":", "'", '"'] #skipping all the punctuations to not be counted with the words that come bfore them
 31 |     for ch in skips:
 32 |         text = text.replace(ch,"")
 33 |     word_counts = Counter(text.split(" "))
 34 |     return word_counts
 35 | 
 36 | #------------------
 37 | def read_book(title_path):
 38 |     """Read a book and return it as a string"""
 39 |     with open(title_path, "r") as current_file:
 40 |         text = current_file.read()
 41 |         text = text.replace("\n","").replace("\r","")
 42 |     return text
 43 | 
 44 | #------------------
 45 | def word_stats(word_counts):
 46 |    """return the number of unique words and word frequencies"""
 47 |    num_unique = len(word_counts) #calculate the number of unique words in the text
 48 |    counts = word_counts.values() #calculate the frequency of each word in the text
 49 |    return(num_unique,counts)
 50 | 
 51 | #----------------------------------------------------------------------------------------------------------------
 52 | 
 53 | 
 54 | # Exercise 1
 55 | #-----------
 56 | 
 57 | #TODO: Write a function word_count_distribution(text) that takes a book string and returns a dictionary with items 
 58 | #corresponding to the count of times a collection of words appears in the translation, and values corresponding to 
 59 | #the number of number of words that appear with that frequency.
 60 | 
 61 | #TODO: First use count_words_fast(text) to create a dictionary called word_counts with unique words in the dictionary 
 62 | #as keys and their frequency in the book as values.
 63 | 
 64 | #TODO: Next, create and return a new dictionary count_distribution with unique values from word_counts as keys and their 
 65 | #frequency as values. For example, 'you are what you eat' contains three words that occur once and one word that occurs twice, 
 66 | #so word_count_distribution('you are what you eat') should return a dictionary {1:3, 2:1}.
 67 | 
 68 | def word_count_distribution(text):
 69 |     word_counts = count_words_fast(text)
 70 |     count_distribution = dict(Counter(word_counts.values()))
 71 |     return count_distribution
 72 | 
 73 | #TODO: 'Romeo and Juliet' is preloaded as text. Call word_count_distribution(text), and save the result as distribution.
 74 | 
 75 | distribution = word_count_distribution(text) 
 76 | 
 77 | print(distribution)
 78 | #------------------------------------------------------------------------------
 79 | 
 80 | # Exercise 2
 81 | #-----------
 82 | 
 83 | #TODO: Create a function more_frequent(distribution) that takes a word frequency dictionary (like that made in Exercise 1)
 84 | # and outputs a dictionary with the same keys as those in distribution 
 85 | # (the number of times a group of words appears in the text), and values corresponding to the fraction of words that occur 
 86 | # with more frequency than that key.
 87 | 
 88 | import numpy as np
 89 | 
 90 | def more_frequent(distribution):
 91 |     counts = list(distribution.keys())
 92 |     frequency_of_counts = list(distribution.values())
 93 |     cumulative_frequencies = np.cumsum(frequency_of_counts)
 94 |     more_frequent = 1 - cumulative_frequencies / cumulative_frequencies[-1] #To obtain the fraction of words more frequent than this,
 95 |     #divide this cumulative sum by its maximum, and subtract this value from 1. 
 96 |     return dict(zip(counts, more_frequent))
 97 |     
 98 | 
 99 | #TODO: Call more_frequent(distribution).
100 | 
101 | more_frequent(distribution)   
102 | 
103 | #------------------------------------------------------------------------------
104 | 
105 | # Exercise 3
106 | #-----------
107 | 
108 | #Edit the code used to read though each of the books in our library, and store 
109 | #the word frequency distribution for each translation of William Shakespeare's 
110 | #"Hamlet" as a Pandas dataframe hamlets with columns named "language" and 
111 | #"distribution". word_count_distribution is preloaded from Exercise 1. 
112 | #How many translations are there? Which languages are they translated into?
113 | 
114 | import pandas as pd
115 | 
116 | hamlets = pd.DataFrame(columns=('language','distribution'))
117 | 
118 | book_dir = "Books"
119 | title_num = 1
120 | 
121 | for language in book_titles:
122 |     for author in book_titles[language]:
123 |         for title in book_titles[language][author]:
124 |             if title == "Hamlet":
125 |                 inputfile = data_filepath+"Books/"+language+"/"+author+"/"+title+".txt"
126 |                 text = read_book(inputfile)
127 |                 distribution = word_count_distribution(text) 
128 |                 hamlets.loc[title_num] = language, distribution
129 |                 title_num += 1
130 |                 
131 | #There are three translations: English, German, and Portuguese.
132 | 
133 | #------------------------------------------------------------------------------
134 | 
135 | # Exercise 4
136 | #-----------
137 | 
138 | #Plot the word frequency distributions of each translations on a single log-log plot. 
139 | #Note that we have already done most of the work for you. Do the distributions of each translation differ?
140 | 
141 | import matplotlib.pyplot as plt
142 | 
143 | colors = ["crimson", "forestgreen", "blueviolet"]
144 | handles, hamlet_languages = [], []
145 | for index in range(hamlets.shape[0]):
146 |     language, distribution = hamlets.language[index+1], hamlets.distribution[index+1]
147 |     dist = more_frequent(distribution)
148 |     plot, = plt.loglog(sorted(list(dist.keys())),sorted(list(dist.values()),
149 |         reverse = True), color = colors[index], linewidth = 2)
150 |     handles.append(plot)
151 |     hamlet_languages.append(language)
152 | plt.title("Word Frequencies in Hamlet Translations")
153 | xlim    = [0, 2e3]
154 | xlabel  = "Frequency of Word $W$"
155 | ylabel  = "Fraction of Words\nWith Greater Frequency than $W$"
156 | plt.xlim(xlim); plt.xlabel(xlabel); plt.ylabel(ylabel)
157 | plt.legend(handles, hamlet_languages, loc = "upper right", numpoints = 1)
158 | plt.show()
159 | 
160 | 
161 | #The distributions differ somewhat, but their basic shape is the same. 
162 | #By the way, distributions that look like a straight line like these are called 
163 | #'scale-free,' because the line looks the same no matter where on the x-axis you look!


--------------------------------------------------------------------------------
/Homework_Week3_3.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ===================================
  3 | Case Study 3 - Wine Classification
  4 | ===================================
  5 | '''
  6 | 
  7 | #In this case study, we will analyze a dataset consisting of an assortment of 
  8 | #wines classified as "high quality" and "low quality" and will use the k-Nearest 
  9 | #Neighbors classifier to determine whether or not other information about the wine 
 10 | #helps us correctly predict whether a new wine will be of high quality.
 11 | 
 12 | 
 13 | 
 14 | #Exercise 1
 15 | #----------
 16 | 
 17 | #TODO: Read in the data as a pandas dataframe using pd.read_csv. 
 18 | #The data can be found at https://s3.amazonaws.com/demo-datasets/wine.csv.
 19 | 
 20 | import pandas as pd
 21 | import numpy as np
 22 | 
 23 | data = pd.read_csv('https://s3.amazonaws.com/demo-datasets/wine.csv') #read directly from the link
 24 | 
 25 | print data.head(5)
 26 | print np.shape(data) #(6497, 15)
 27 | 
 28 | #------------------------------------------------------------------------------
 29 | 
 30 | #Exercise 2
 31 | #----------
 32 | 
 33 | #TODO: The dataset remains stored as data. Two columns in data are is_red and color, 
 34 | #which are redundant. Drop color from the dataset, and save the new dataset as 
 35 | #numeric_data.
 36 | 
 37 | numeric_data = data.drop('color', axis=1)
 38 | print (numeric_data.head(5))
 39 | #------------------------------------------------------------------------------
 40 | 
 41 | #Exercise 3
 42 | #----------
 43 | 
 44 | #TODO: To ensure that each variable contributes equally to the kNN classifier, 
 45 | #we need to standardize the data. First, from each variable in numeric_data, 
 46 | #subtract its mean. Second, for each variable in numeric_data, divide by its 
 47 | #standard deviation. Store your standardized result as numeric_data.
 48 | 
 49 | 
 50 | numeric_data = (numeric_data - np.mean(numeric_data)) / np.std(numeric_data)
 51 | 
 52 | print(numeric_data.head(5))
 53 | 
 54 | #TODO: Principal component analysis is a way to take a linear snapshot of the data 
 55 | #from several different angles, with each snapshot ordered by how well it 
 56 | #aligns with variation in the data. The sklearn.decomposition module contains 
 57 | #the PCA class, which determines the most informative principal components of 
 58 | #the data (a matrix with columns corresponding to the principal components). 
 59 | #Use pca.fit(numeric_data).transform(numeric_data) to extract the first two 
 60 | #principal components from the data. Store this as principal_components.
 61 | 
 62 | import sklearn.decomposition
 63 | pca = sklearn.decomposition.PCA(n_components=2)
 64 | principal_components = pca.fit(numeric_data).transform(numeric_data)
 65 | 
 66 | #------------------------------------------------------------------------------
 67 | 
 68 | #Exercise 4
 69 | #----------
 70 | 
 71 | #TODO: The first two principal components can be accessed using principal_components[:,0] 
 72 | #and principal_components[:,1]. Store these as x and y respectively, and plot the 
 73 | #first two principal components. The high and low quality wines will be colored 
 74 | #using red and blue. How well are the two groups of wines separated by the first 
 75 | #two principal components?
 76 | 
 77 | import matplotlib.pyplot as plt
 78 | from matplotlib.colors import ListedColormap
 79 | from matplotlib.backends.backend_pdf import PdfPages
 80 | observation_colormap = ListedColormap(['red', 'blue'])
 81 | x = principal_components[:,0]
 82 | y = principal_components[:,1]
 83 | plt.plot(principal_components[:,0], principal_components[:,1])
 84 | plt.title("Principal Components of Wine")
 85 | plt.scatter(x, y, alpha = 0.2,
 86 |     c = data['high_quality'], cmap = observation_colormap, edgecolors = 'none')
 87 | plt.xlim(-8, 8); plt.ylim(-8, 8)
 88 | plt.xlabel("Principal Component 1"); plt.ylabel("Principal Component 2")
 89 | plt.show()
 90 | 
 91 | #------------------------------------------------------------------------------
 92 | 
 93 | #Exercise 5
 94 | #----------
 95 | 
 96 | #TODO: We are now ready to fit the wine data to our kNN classifier. 
 97 | #Create a function accuracy(predictions, outcomes) that takes two lists of the 
 98 | #same size as arguments and returns a single number, which is the percentage of 
 99 | #elements that are equal for the two lists.
100 | 
101 | def accuracy(predictions, outcomes):
102 |     for i in predictions:
103 |         for j in outcomes:
104 |             if i==j:
105 |                 Percentage = 100*np.mean(predictions == outcomes)
106 |     return Percentage
107 |                 
108 | #TODO: Use accuracy to compare the percentage of similar elements in 
109 | #x = np.array([1,2,3]) and y = np.array([1,2,4]).
110 | 
111 | x = np.array([1,2,3])
112 | y = np.array([1,2,4])
113 | 
114 | percentage = accuracy(x, y)
115 | 
116 | #TODO: Print your answer.
117 | 
118 | print percentage 
119 | 
120 | #------------------------------------------------------------------------------
121 | 
122 | #Exercise 6
123 | #----------
124 | 
125 | #TODO: The dataset remains stored as data. Because most wines in the dataset are 
126 | #classified as low quality, one very simple classification rule is to predict 
127 | #that all wines are of low quality. Use the accuracy function (preloaded into 
128 | #memory as defined in Exercise 5) to calculate how many wines in the dataset 
129 | #are of low quality. Accomplish this by calling accuracy with 0 as the first 
130 | #argument, and data["high_quality"] as the second argument.
131 | 
132 | number_of_low_quality = accuracy(0, data["high_quality"])
133 | 
134 | #TODO: Print your result.
135 | 
136 | print number_of_low_quality
137 | 
138 | #------------------------------------------------------------------------------
139 | 
140 | #Exercise 7
141 | #----------
142 | 
143 | #TODO: Use knn.predict(numeric_data) to predict which wines are high and low quality 
144 | #and store the result as library_predictions.
145 | 
146 | from sklearn.neighbors import KNeighborsClassifier
147 | 
148 | knn = KNeighborsClassifier(n_neighbors = 5)
149 | knn.fit(numeric_data, data['high_quality'])
150 | library_predictions = knn.predict(numeric_data)
151 | 
152 | 
153 | #TODO: Use accuracy to find the accuracy of your predictions, using library_predictions 
154 | #as the first argument and data["high_quality"] as the second argument.
155 | 
156 | predictions_accuracy = accuracy(library_predictions, data["high_quality"])
157 | 
158 | #TODO: Print your answer. Is this prediction better than the simple classifier 
159 | #in Exercise 6?
160 | 
161 | print (predictions_accuracy) # Yes, this is better!
162 | 
163 | #------------------------------------------------------------------------------
164 | 
165 | #Exercise 8
166 | #----------
167 | 
168 | #TODO: Unlike the scikit-learn function, our homemade kNN classifier does not take 
169 | #any shortcuts in calculating which neighbors are closest to each observation, 
170 | #so it is likely too slow to carry out on the whole dataset. To circumvent this, 
171 | #fix the random generator using random.seed(123), and select 10 rows from the 
172 | #dataset using random.sample(range(n_nrows), 10). Store this selection as selection.
173 | 
174 | import random 
175 | 
176 | n_rows = data.shape[0]
177 | 
178 | random.seed(123)
179 | selection = random.sample(range(n_rows), 10)
180 | 
181 | #------------------------------------------------------------------------------
182 | 
183 | #Exercise 9
184 | #----------
185 | 
186 | #TODO: The sample of 10 row indices are stored as selection from the previous exercise. 
187 | #For each predictor p in predictors[selection], 
188 | #use knn_predict(p, predictors[training_indices,:], outcomes, k=5) to predict 
189 | #the quality of each wine in the prediction set, and store these predictions 
190 | #as a np.array called my_predictions. Note that knn_predict is already defined 
191 | #as in the Case 3 videos.
192 | 
193 | predictors = np.array(numeric_data)
194 | training_indices = [i for i in range(len(predictors)) if i not in selection]
195 | outcomes = np.array(data["high_quality"])
196 | 
197 | my_predictions = [knn_predict(p, predictors[training_indices,:], outcomes, k=5) for p in predictors[selection]]
198 | 
199 | 
200 | #TODO: Using the accuracy function, compare these results to the selected rows from 
201 | #the high_quality variable in data using my_predictions as the first argument 
202 | #and data.high_quality[selection] as the second argument. Store these results 
203 | #as percentage.
204 | 
205 | percentage = accuracy(my_predictions, data.high_quality[selection] )
206 | 
207 | #TODO: Print your answer.
208 | 
209 | print(percentage) #Our accuracy is comparable to the library's function!
210 | 
211 | #------------------------------------------------------------------------------


--------------------------------------------------------------------------------
/Homework_Week4_1.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ===================================================
  3 | Case Study 1 - Scotch Wishkies Analysis Using Bokeh
  4 | ===================================================
  5 | '''
  6 | 
  7 | #In this case study, we have prepared step-by-step instructions for you on how 
  8 | #to prepare plots in Bokeh, a library designed for simple interactive plotting. 
  9 | #We will demonstrate Bokeh by continuing the analysis of Scotch whiskies.
 10 | 
 11 | #----------------------------------------------------------------------------------------------------------------
 12 | 
 13 | # Exercise 1
 14 | #-----------
 15 | 
 16 | #Here we provide a basic demonstration of an interactive grid plot using Bokeh. 
 17 | #Execute the following code and follow along with the comments. We will later 
 18 | #adapt this code to plot the correlations among distillery flavor profiles as 
 19 | #well as plot a geographical map of distilleries colored by region and flavor 
 20 | #profile.
 21 | 
 22 | #Make sure to study this code now, as we will edit similar code in the 
 23 | #exercises that follow.
 24 | 
 25 | #Once you have plotted the code, hover, click, and drag your cursor on the plot 
 26 | #to interact with it. Additionally, explore the icons in the top-right corner 
 27 | #of the plot for more interactive options!
 28 | 
 29 | # First, we import a tool to allow text to pop up on a plot when the cursor
 30 | # hovers over it.  Also, we import a data structure used to store arguments
 31 | # of what to plot in Bokeh.  Finally, we will use numpy for this section as well!
 32 | 
 33 | from bokeh.models import HoverTool, ColumnDataSource
 34 | import numpy as np
 35 | 
 36 | # Let's plot a simple 5x5 grid of squares, alternating in color as red and blue.
 37 | 
 38 | plot_values = [1,2,3,4,5]
 39 | plot_colors = ["red", "blue"]
 40 | 
 41 | # How do we tell Bokeh to plot each point in a grid?  Let's use a function that
 42 | # finds each combination of values from 1-5.
 43 | from itertools import product
 44 | 
 45 | grid = list(product(plot_values, plot_values))
 46 | print(grid)
 47 | 
 48 | 
 49 | #------------------------------------------------------------------------------
 50 | 
 51 | # Exercise 2
 52 | #-----------
 53 | 
 54 | 
 55 | #Let's create the names and colors we will use to plot the correlation matrix 
 56 | #of whisky flavors. Later, we will also use these colors to plot each distillery 
 57 | #geographically. Create a dictionary region_colors with regions as keys 
 58 | #and cluster_colors as values.
 59 | 
 60 | cluster_colors = ["red", "orange", "green", "blue", "purple", "gray"]
 61 | regions = ["Speyside", "Highlands", "Lowlands", "Islands", "Campbelltown", "Islay"]
 62 | 
 63 | region_colors = dict(zip(regions,cluster_colors))
 64 | 
 65 | #Print region_colors.
 66 | 
 67 | print(region_colors)
 68 | 
 69 | #------------------------------------------------------------------------------
 70 | 
 71 | # Exercise 3
 72 | #-----------
 73 | 
 74 | #correlations is a two-dimensional np.array with both rows and columns 
 75 | #corresponding to distilleries and elements corresponding to the flavor 
 76 | #correlation of each row/column pair. Let's define a list correlation_colors, 
 77 | #with string values corresponding to colors to be used to plot each distillery 
 78 | #pair. Low correlations among distillery pairs will be white, high correlations 
 79 | #will be a distinct group color if the distilleries from the same group, and 
 80 | #gray otherwise. Edit the code to define correlation_colors for each distillery 
 81 | #pair to have input 'white' if their correlation is less than 0.7.
 82 | 
 83 | #whisky.Group is a pandas dataframe column consisting of distillery group 
 84 | #memberships. For distillery pairs with correlation greater than 0.7, if they 
 85 | #share the same whisky group, use the corresponding color from cluster_colors. 
 86 | #Otherwise, the correlation_colors value for that distillery pair will be 
 87 | #defined as 'lightgray'.
 88 | 
 89 | distilleries = list(whisky.Distillery)
 90 | correlation_colors = []
 91 | for i in range(len(distilleries)):
 92 |     for j in range(len(distilleries)):
 93 |         if correlations[i,j] < .70:                    # if low correlation,
 94 |             correlation_colors.append('white')         # just use white.
 95 |         else:                                          # otherwise,
 96 |             if whisky.Group[i] == whisky.Group[j]:     # if the groups match,
 97 |                 correlation_colors.append(cluster_colors[whisky.Group[i]]) # color them by their mutual group.
 98 |             else:                                      # otherwise
 99 |                 correlation_colors.append('lightgray') # color them lightgray.
100 | 
101 | #------------------------------------------------------------------------------
102 | 
103 | # Exercise 4
104 | #-----------
105 | 
106 | #We will edit the following code to make an interactive grid of the correlations 
107 | #among distillery pairs using correlation_colors and correlations. 
108 | #correlation_colors is a list of each distillery pair. To convert correlations 
109 | #from a np.array to a list, we will use the flatten method. Define the color 
110 | #of each rectangle in the grid using to correlation_colors.
111 | 
112 | #Define the alpha (transparency) values using correlations.flatten().
113 | 
114 | #Define correlations and using correlations.flatten(). When the cursor hovers 
115 | #over a rectangle, this will output the distillery pair, show both distilleries 
116 | #as well as their correlation coefficient.
117 | 
118 | source = ColumnDataSource(
119 |     data = {
120 |         "x": np.repeat(distilleries,len(distilleries)),
121 |         "y": list(distilleries)*len(distilleries),
122 |         "colors": correlation_colors,
123 |         "alphas": correlations.flatten(),
124 |         "correlations": correlations.flatten(),
125 |     }
126 | )
127 | 
128 | output_file("Whisky Correlations.html", title="Whisky Correlations")
129 | fig = figure(title="Whisky Correlations",
130 |     x_axis_location="above", tools="resize,hover,save",
131 |     x_range=list(reversed(distilleries)), y_range=distilleries)
132 | fig.grid.grid_line_color = None
133 | fig.axis.axis_line_color = None
134 | fig.axis.major_tick_line_color = None
135 | fig.axis.major_label_text_font_size = "5pt"
136 | fig.xaxis.major_label_orientation = np.pi / 3
137 | 
138 | fig.rect('x', 'y', .9, .9, source=source,
139 |      color='colors', alpha='alphas')
140 | hover = fig.select(dict(type=HoverTool))
141 | hover.tooltips = {
142 |     "Whiskies": "@x, @y",
143 |     "Correlation": "@correlations",
144 | }
145 | show(fig)
146 | #------------------------------------------------------------------------------
147 | 
148 | # Exercise 5
149 | #-----------
150 | 
151 | #Next, we provide an example of plotting points geographically. 
152 | #Run the following code, to be adapted in the next section. 
153 | #Compare this code to that used in plotting the distillery correlations.
154 | 
155 | points = [(0,0), (1,2), (3,1)]
156 | xs, ys = zip(*points)
157 | colors = ["red", "blue", "green"]
158 | 
159 | output_file("Spatial_Example.html", title="Regional Example")
160 | location_source = ColumnDataSource(
161 |     data={
162 |         "x": xs,
163 |         "y": ys,
164 |         "colors": colors,
165 |     }
166 | )
167 | 
168 | fig = figure(title = "Title",
169 |     x_axis_location = "above", tools="resize, hover, save")
170 | fig.plot_width  = 300
171 | fig.plot_height = 380
172 | fig.circle("x", "y", 10, 10, size=10, source=location_source,
173 |      color='colors', line_color = None)
174 | 
175 | hover = fig.select(dict(type = HoverTool))
176 | hover.tooltips = {
177 |     "Location": "(@x, @y)"
178 | }
179 | show(fig)
180 | 
181 | 
182 | #------------------------------------------------------------------------------
183 | 
184 | # Exercise 6
185 | #-----------
186 | 
187 | #Adapt the given code from the beginning to show(fig) in order to define a 
188 | #function location_plot(title, colors). This function takes a string title
189 | # and a list of colors corresponding to each distillery and outputs a Bokeh 
190 | #plot of each distillery by latitude and longitude. As the cursor hovers over 
191 | #each point, it displays the distillery name, latitude, and longitude.
192 | 
193 | def location_plot(title, colors):
194 |     output_file(title+".html")
195 |     location_source = ColumnDataSource(
196 |         data={
197 |             "x": whisky[" Latitude"],
198 |             "y": whisky[" Longitude"],
199 |             "colors": colors,
200 |             "regions": whisky.Region,
201 |             "distilleries": whisky.Distillery
202 |         }
203 |     )
204 | 
205 |     fig = figure(title = title,
206 |         x_axis_location = "above", tools="resize, hover, save")
207 |     fig.plot_width  = 400
208 |     fig.plot_height = 500
209 |     fig.circle("x", "y", 10, 10, size=9, source=location_source,
210 |          color='colors', line_color = None)
211 |     fig.xaxis.major_label_orientation = np.pi / 3
212 |     hover = fig.select(dict(type = HoverTool))
213 |     hover.tooltips = {
214 |         "Distillery": "@distilleries",
215 |         "Location": "(@x, @y)"
216 |     }
217 |     show(fig)
218 |     
219 | #whisky.Region is a pandas column containing the regional group membership for 
220 | #each distillery. Make a list consisting of the value of region_colors for 
221 | #each distillery, and store this list as region_cols.
222 | 
223 | region_cols = [region_colors[i] for i in list(whisky["Region"])]
224 | 
225 | #Use location_plot to plot each distillery, colored by its regional grouping. 
226 | 
227 | location_plot("Whisky Locations and Regions", region_cols)  
228 | 
229 | #------------------------------------------------------------------------------
230 | 
231 | # Exercise 7
232 | #-----------
233 | 
234 | #Use list comprehensions to create the list region_cols consisting of the color
235 | # in region_colors that corresponds to each whisky in whisky.Region.
236 | 
237 | region_cols = [region_colors[i] for i in whisky['Region']]
238 | 
239 | #Similarly, create a list classification_cols consisting of the color in 
240 | #cluster_colors that corresponds to each cluster membership in whisky.Group.
241 | 
242 | classification_cols = [cluster_colors[j] for j in whisky['Group']]
243 | 
244 | #location_plot remains stored from the previous exercise. Use it to create two 
245 | #interactive plots of distilleries, one colored by defined region called 
246 | #region_cols and the other with colors defined by coclustering designation 
247 | #called classification_cols. How well do the coclustering groupings match the 
248 | #regional groupings?
249 | 
250 | location_plot("Whisky Locations and Regions", region_cols)
251 | location_plot("Whisky Locations and Groups", classification_cols)
252 | 
253 | 
254 | '''
255 | We see that there is not very much overlap between the regional classifications 
256 | and the coclustering classifications. This means that regional classifications 
257 | are not a very good guide to Scotch whisky flavor profiles.
258 | '''
259 | #------------------------------------------------------------------------------


--------------------------------------------------------------------------------
/Homework_Week2.py:
--------------------------------------------------------------------------------
  1 | #Tic-tac-toe (or noughts and crosses) is a simple strategy game in which two players take turns placing a mark on a 3x3 board,
  2 | #attempting to make a row, column, or diagonal of three with their mark. In this homework, we will use the tools we've covered 
  3 | #in the past two weeks to create a tic-tac-toe simulator, and evaluate basic winning strategies.
  4 | 
  5 | #----------------------------------------------------------------------------------------------------------------
  6 | 
  7 | 
  8 | # Exercise 1
  9 | #-----------
 10 | 
 11 | #For our tic-tac-toe board, we will use a numpy array with dimension 3 by 3. Make a function create_board() that creates such 
 12 | #a board, with values of integers 0. 
 13 | 
 14 | import numpy as np
 15 | 
 16 | def create_board():
 17 |     return np.zeros ((3,3))
 18 | 
 19 | #Call create_board(), and store this as board.
 20 | 
 21 | board = create_board()              
 22 | 
 23 | #------------------------------------------------------------------------------
 24 | 
 25 | # Exercise 2
 26 | #-----------
 27 | 
 28 | #Players 1 and 2 will take turns changing values of this array from a 0 to a 1 or 2, indicating the number of the player who places there. 
 29 | #Create a function place(board, player, position) with player being the current player (an integer 1 or 2), and position a tuple of length 
 30 | #2 specifying a desired location to place their marker. Only allow the current player to place a piece on the board (change the board position 
 31 | #to their number) if that position is empty (zero).
 32 | 
 33 | def place(board, player, position):
 34 |     if board[position] == 0:
 35 |         board[position] = player
 36 |     
 37 | 
 38 | #Use create_board() to store a board as board, and use place to have Player 1 place a piece on spot (0, 0).                                
 39 | 
 40 | board = create_board() 
 41 | place(board , 1 , (0,0))
 42 | 
 43 | #print (board)               
 44 |                  
 45 | #------------------------------------------------------------------------------
 46 | 
 47 | # Exercise 3
 48 | #-----------
 49 | 
 50 | #Create a function possibilities(board) that returns a list of all positions (tuples) on the board that are not 
 51 | #occupied (0). (Hint: numpy.where is a handy function that returns a list of indexes that meet a condition.)
 52 | 
 53 | def possibilities(board):
 54 |     not_occupied = np.where(board == 0)
 55 |     return list(zip(not_occupied[0], not_occupied[1]))
 56 | 
 57 | 
 58 | 
 59 | #board is already defined from previous exercises. Call possibilities(board) to see what it returns!
 60 | 
 61 | print(possibilities(board))
 62 | 
 63 | #------------------------------------------------------------------------------
 64 | 
 65 | # Exercise 4
 66 | #-----------
 67 | 
 68 | #Create a function random_place(board, player) that places a marker for the current player at random 
 69 | #among all the available positions (those currently set to 0).
 70 | 
 71 | import random 
 72 | 
 73 | def random_place(board, player):
 74 |     selections = possibilities(board)
 75 |     if len(selections) > 0:
 76 |         selection = random.choice(selections)
 77 |         place(board, player, selection)
 78 |     return board
 79 |   
 80 |                          
 81 |                                
 82 | #board is already defined from previous exercises. Call random_place(board, player) to place a random 
 83 | #marker for Player 2, and store this as board to update its value.
 84 | 
 85 | random_place(board, 2)
 86 | 
 87 | #------------------------------------------------------------------------------
 88 | 
 89 | # Exercise 5
 90 | #-----------
 91 | 
 92 | #board is already defined from previous exercises. Use random_place(board, player) to place three pieces 
 93 | #on board each for players 1 and 2.
 94 | 
 95 | board = create_board()
 96 | for i in range(3):
 97 |     for player in [1, 2]:
 98 |         random_place(board, player)                                                                   
 99 |                                                                    
100 | #Print board to see your result
101 | 
102 | print(board)
103 | 
104 | #------------------------------------------------------------------------------
105 | 
106 | #Exercise 6
107 | #-----------
108 | 
109 | #Now that players may place their pieces, how will they know they've won? Make a function row_win(board, player) 
110 | #that takes the player (integer), and determines if any row consists of only their marker. 
111 | #Have it return True of this condition is met, and False otherwise.
112 | 
113 | def check_row(row, player):
114 |     for marker in row:
115 |         if marker != player:
116 |             return False
117 |     return True
118 | 
119 | def row_win(board, player):
120 |     for row in board:
121 |         if check_row(row, player):
122 |             return True
123 |     return False                                                                                        
124 |                                                                                                  
125 | #board is already defined from previous exercises. Call row_win to check if Player 1 has a complete row.
126 | 
127 | row_win(board, 1)
128 | #------------------------------------------------------------------------------
129 | 
130 | #Exercise 7
131 | #-----------
132 | 
133 | #Create a similar function col_win(board, player) that takes the player (integer), 
134 | #and determines if any column consists of only their marker. Have it return True if this condition is met, 
135 | #and False otherwise.
136 |         
137 | def col_win(board, player):
138 |     for row in board.T:
139 |         if check_row(row, player):
140 |             return True
141 |     return False
142 |                                   
143 |                                   
144 | #board is already defined from previous exercises. Call col_win to check if Player 1 has a complete column.
145 | 
146 | col_win(board, 1)
147 | #------------------------------------------------------------------------------
148 | 
149 | #Exercise 8
150 | #-----------
151 | 
152 | #Finally, create a function diag_win(board, player) that tests if either diagonal of the board 
153 | #consists of only their marker. Have it return True if this condition is met, and False otherwise.
154 | 
155 | def diag_win(board, player):
156 |     main_diag = board.diagonal()
157 |     anti_diag = np.flipud(board).diagonal()[::-1]
158 |     return check_row(main_diag, player) or check_row(anti_diag, player)                                    
159 | 
160 | 
161 | #board is already defined from previous exercises. Call diag_win to check if Player 1 has a complete diagonal.
162 | 
163 | diag_win(board, 1)
164 | 
165 | #------------------------------------------------------------------------------
166 | 
167 | #Exercise 9
168 | #-----------
169 | 
170 | #Create a function evaluate(board) that uses row_win, col_win, and diag_win functions for both players. 
171 | #If one of them has won, return that player's number. If the board is full but no one has won, return -1.
172 | #Otherwise, return 0.
173 | 
174 | def evaluate(board):
175 |     winner = 0
176 |     for player in [1, 2]:
177 |         # Check if `row_win`, `col_win`, or `diag_win` apply.  if so, store `player` as `winner`.
178 |         if row_win(board, player) or diag_win(board, player) or col_win(board, player):
179 |             return player
180 |     if np.all(board != 0):
181 |         winner = -1
182 |     return winner
183 |     
184 |                            
185 | #board is already defined from previous exercises. Call evaluate to see if either player has won the game yet.
186 | 
187 | evaluate(board)
188 | #------------------------------------------------------------------------------
189 | 
190 | #Exercise 10
191 | #-----------
192 | 
193 | #create_board(), random_place(board, player), and evaluate(board) have been created from previous exercises. 
194 | #Create a function play_game() that creates a board, calls alternates between two players 
195 | #(beginning with Player 1), and evaluates the board for a winner after every placement. 
196 | #Play the game until one player wins (returning 1 or 2 to reflect the winning player), 
197 | #or the game is a draw (returning -1).
198 | 
199 | def play_game():
200 |     board = create_board()
201 |     while True:
202 |         for player in [1, 2]:
203 |             random_place(board, player)
204 |             result = evaluate(board)
205 |             if result != 0:
206 |                 return result             
207 |              
208 | #Call play_game once.
209 | 
210 | play_game()
211 | 
212 | #------------------------------------------------------------------------------
213 | 
214 | #Exercise 11
215 | #-----------
216 | 
217 | #Use the play_game() function to play 1,000 random games, where Player 1 always goes first.
218 | #When doing this, import and use the time library to call the time function both before and after 
219 | #playing all 1,000 games in order to evaluate how long this takes per game. Print your answer.
220 | 
221 | import time 
222 | 
223 | start_time = time.time()
224 |  
225 | ITERATIONS = 1000
226 | result = []
227 | for i in range(ITERATIONS):
228 |     result.append(play_game())                
229 | 
230 | end_time = time.time()
231 | 
232 | print(end_time - start_time)
233 | 
234 | #The library matplotlib.pyplot has already been stored as plt. Use plt.hist and plt.show to plot a 
235 | #histogram of the results. Does Player 1 win more than Player 2? 
236 | #Does either player win more than each player draws?
237 | 
238 | import matplotlib.pyplot as plt
239 | 
240 | plt.hist(result) 
241 | plt.savefig('tic_tac_toe_Hist.pdf')
242 | plt.show()
243 | 
244 | 
245 | '''
246 | #Result:
247 | #------
248 | We see (from the histogram plot) that Player 1 wins more than Player 2, and the game sometimes ends in draws. 
249 | The total amount of time taken is about a few seconds, but will vary from machine to machine.
250 | '''
251 | #------------------------------------------------------------------------------
252 | 
253 | #Exercise 12
254 | #-----------
255 | 
256 | #This result is expected --- when guessing at random, it's better to go first. 
257 | #Let's see if Player 1 can improve their strategy. create_board(), random_place(board, player), 
258 | #and evaluate(board) have been created from previous exercises. Create a function play_strategic_game(), 
259 | #where Player 1 always starts with the middle square, and otherwise both players place their markers randomly.
260 | 
261 | def play_strategic_game():
262 |     board, winner = create_board(), 0
263 |     board[1,1] = 1
264 |     while winner == 0:
265 |         for player in [2,1]:
266 |             board = random_place(board, player)
267 |             winner = evaluate(board)
268 |             if winner != 0:
269 |                 break
270 |     return winner    
271 | 
272 | #Call play_strategic_game once.
273 | 
274 | play_strategic_game() 
275 | #------------------------------------------------------------------------------
276 | 
277 | #Exercise 13
278 | #-----------
279 | 
280 | #The results from Exercise 12 have been stored. Use the play_strategic_game() function to play 1,000 random games.
281 | #Use the time libary to evaluate how long all these games takes.
282 | 
283 | start_time = time.time()
284 | 
285 | ITERATIONS = 1000
286 | result2 = []
287 | for i in range(ITERATIONS):
288 |     result2.append(play_strategic_game()) 
289 |                                                                            
290 | end_time = time.time()
291 | 
292 | print(end_time - start_time)
293 | 
294 | 
295 | 
296 | #The library matplotlib.pyplot has already been stored as plt. Use plt.hist and plt.show to plot your results. 
297 | #Did Player 1's performance improve? Does either player win more than each player draws?
298 | 
299 | plt.hist(result2)
300 | plt.savefig('tic_tac_toe_Hist_2.pdf')
301 | plt.show()
302 | 
303 | 
304 | '''
305 | #Result:
306 | #------
307 | Yes, starting in the middle square is a large advantage when play is otherwise random. 
308 | Also, each game takes less time to play, because each victory is decided earlier. 
309 | Player 1 wins much more than Player 2, and draws are less common.
310 | '''


--------------------------------------------------------------------------------