├── .gitignore
├── P0 Setup Numpy Exercises Tutorial on Common Packages.txt
├── P0
    ├── resources_project0.tar.gz
    ├── resources_project0
    │   └── project0
    │   │   ├── debug.py
    │   │   ├── main.py
    │   │   └── test.py
    ├── resources_project0_Abel.rar
    └── resources_project0_solution.tar.gz
├── P1 Automatic Review Analyzer.txt
├── P1
    ├── resources_sentiment_analysis.tar.gz
    ├── resources_sentiment_analysis
    │   ├── 200.txt
    │   ├── 4000.txt
    │   ├── NN.py
    │   ├── __pycache__
    │   │   ├── project1.cpython-36.pyc
    │   │   ├── project1.cpython-37.pyc
    │   │   └── utils.cpython-36.pyc
    │   ├── main.py
    │   ├── project1.py
    │   ├── reviews_submit.tsv
    │   ├── reviews_test.tsv
    │   ├── reviews_train.tsv
    │   ├── reviews_val.tsv
    │   ├── stopwords.txt
    │   ├── test.py
    │   ├── toy_data.tsv
    │   └── utils.py
    ├── resources_sentiment_analysis_Abel.rar
    └── resources_sentiment_analysis_solution.tar.gz
├── P2P3 Digit Recognition.txt
├── P2P3
    ├── resources_mnist.tar.gz
    ├── resources_mnist
    │   └── mnist
    │   │   ├── ._.DS_Store
    │   │   ├── Datasets
    │   │       ├── mnist.pkl.gz
    │   │       ├── test_labels_mini.txt.gz
    │   │       ├── test_multi_digit_mini.pkl.gz
    │   │       ├── train_labels_mini.txt.gz
    │   │       └── train_multi_digit_mini.pkl.gz
    │   │   ├── __pycache__
    │   │       └── utils.cpython-36.pyc
    │   │   ├── part1
    │   │       ├── __pycache__
    │   │       │   ├── features.cpython-36.pyc
    │   │       │   ├── kernel.cpython-36.pyc
    │   │       │   ├── linear_regression.cpython-36.pyc
    │   │       │   ├── softmax.cpython-36.pyc
    │   │       │   ├── svm.cpython-36.pyc
    │   │       │   └── utils.cpython-36.pyc
    │   │       ├── cubic_features_checker.py
    │   │       ├── features.py
    │   │       ├── kernel.py
    │   │       ├── linear_regression.py
    │   │       ├── main.py
    │   │       ├── softmax.py
    │   │       ├── svm.py
    │   │       ├── test.py
    │   │       └── theta.pkl.gz
    │   │   ├── part2-mnist
    │   │       ├── __pycache__
    │   │       │   └── train_utils.cpython-36.pyc
    │   │       ├── mnist_model_fully_connected.pt
    │   │       ├── nnet_cnn.py
    │   │       ├── nnet_fc.py
    │   │       └── train_utils.py
    │   │   ├── part2-nn
    │   │       └── neural_nets.py
    │   │   ├── part2-twodigit
    │   │       ├── __pycache__
    │   │       │   ├── train_utils.cpython-36.pyc
    │   │       │   └── utils_multiMNIST.cpython-36.pyc
    │   │       ├── conv.py
    │   │       ├── mlp.py
    │   │       ├── sample_images
    │   │       │   ├── img20000.jpg
    │   │       │   ├── img20001.jpg
    │   │       │   ├── img20002.jpg
    │   │       │   ├── img20003.jpg
    │   │       │   ├── img20004.jpg
    │   │       │   ├── img20005.jpg
    │   │       │   ├── img20006.jpg
    │   │       │   ├── img20007.jpg
    │   │       │   ├── img20008.jpg
    │   │       │   ├── img20009.jpg
    │   │       │   ├── img20010.jpg
    │   │       │   ├── img20011.jpg
    │   │       │   ├── img20012.jpg
    │   │       │   ├── img20013.jpg
    │   │       │   ├── img20014.jpg
    │   │       │   ├── img20015.jpg
    │   │       │   ├── img20016.jpg
    │   │       │   ├── img20017.jpg
    │   │       │   ├── img20018.jpg
    │   │       │   ├── img20019.jpg
    │   │       │   ├── img20020.jpg
    │   │       │   ├── img20021.jpg
    │   │       │   ├── img20022.jpg
    │   │       │   ├── img20023.jpg
    │   │       │   ├── img20024.jpg
    │   │       │   ├── img20025.jpg
    │   │       │   ├── img20026.jpg
    │   │       │   ├── img20027.jpg
    │   │       │   ├── img20028.jpg
    │   │       │   ├── img20029.jpg
    │   │       │   ├── img20030.jpg
    │   │       │   ├── img20031.jpg
    │   │       │   ├── img20032.jpg
    │   │       │   ├── img20033.jpg
    │   │       │   ├── img20034.jpg
    │   │       │   ├── img20035.jpg
    │   │       │   ├── img20036.jpg
    │   │       │   ├── img20037.jpg
    │   │       │   ├── img20038.jpg
    │   │       │   ├── img20039.jpg
    │   │       │   ├── img20040.jpg
    │   │       │   ├── img20041.jpg
    │   │       │   ├── img20042.jpg
    │   │       │   ├── img20043.jpg
    │   │       │   ├── img20044.jpg
    │   │       │   ├── img20045.jpg
    │   │       │   ├── img20046.jpg
    │   │       │   ├── img20047.jpg
    │   │       │   ├── img20048.jpg
    │   │       │   ├── img20049.jpg
    │   │       │   ├── img20050.jpg
    │   │       │   ├── img20051.jpg
    │   │       │   ├── img20052.jpg
    │   │       │   ├── img20053.jpg
    │   │       │   ├── img20054.jpg
    │   │       │   ├── img20055.jpg
    │   │       │   ├── img20056.jpg
    │   │       │   ├── img20057.jpg
    │   │       │   ├── img20058.jpg
    │   │       │   ├── img20059.jpg
    │   │       │   ├── img20060.jpg
    │   │       │   ├── img20061.jpg
    │   │       │   ├── img20062.jpg
    │   │       │   ├── img20063.jpg
    │   │       │   ├── img20064.jpg
    │   │       │   ├── img20065.jpg
    │   │       │   ├── img20066.jpg
    │   │       │   ├── img20067.jpg
    │   │       │   ├── img20068.jpg
    │   │       │   ├── img20069.jpg
    │   │       │   ├── img20070.jpg
    │   │       │   ├── img20071.jpg
    │   │       │   ├── img20072.jpg
    │   │       │   ├── img20073.jpg
    │   │       │   ├── img20074.jpg
    │   │       │   ├── img20075.jpg
    │   │       │   ├── img20076.jpg
    │   │       │   ├── img20077.jpg
    │   │       │   ├── img20078.jpg
    │   │       │   ├── img20079.jpg
    │   │       │   ├── img20080.jpg
    │   │       │   ├── img20081.jpg
    │   │       │   ├── img20082.jpg
    │   │       │   ├── img20083.jpg
    │   │       │   ├── img20084.jpg
    │   │       │   ├── img20085.jpg
    │   │       │   ├── img20086.jpg
    │   │       │   ├── img20087.jpg
    │   │       │   ├── img20088.jpg
    │   │       │   ├── img20089.jpg
    │   │       │   ├── img20090.jpg
    │   │       │   ├── img20091.jpg
    │   │       │   ├── img20092.jpg
    │   │       │   ├── img20093.jpg
    │   │       │   ├── img20094.jpg
    │   │       │   ├── img20095.jpg
    │   │       │   ├── img20096.jpg
    │   │       │   ├── img20097.jpg
    │   │       │   ├── img20098.jpg
    │   │       │   └── img20099.jpg
    │   │       ├── train_utils.py
    │   │       └── utils_multiMNIST.py
    │   │   └── utils.py
    ├── resources_mnist_Abel.rar
    └── resources_mnist_solution.tar.gz
├── P4 Collaborative Filtering via Gaussian Mixtures.txt
├── P4
    ├── resources_netflix.tar.gz
    ├── resources_netflix
    │   └── netflix
    │   │   ├── __pycache__
    │   │       ├── common.cpython-36.pyc
    │   │       ├── em.cpython-36.pyc
    │   │       ├── kmeans.cpython-36.pyc
    │   │       └── naive_em.cpython-36.pyc
    │   │   ├── common.py
    │   │   ├── em.py
    │   │   ├── kmeans.py
    │   │   ├── main.py
    │   │   ├── naive_em.py
    │   │   ├── netflix_complete.txt
    │   │   ├── netflix_incomplete.txt
    │   │   ├── test.py
    │   │   ├── test_complete.txt
    │   │   ├── test_incomplete.txt
    │   │   ├── test_solutions.txt
    │   │   └── toy_data.txt
    ├── resources_netflix_Abel.rar
    └── resources_netflix_solution.tar.gz
├── P5 Text-Based Game.txt
├── P5
    ├── resources_rl.tar.gz
    ├── resources_rl
    │   └── rl
    │   │   ├── __pycache__
    │   │       ├── framework.cpython-36.pyc
    │   │       └── utils.cpython-36.pyc
    │   │   ├── agent_dqn.py
    │   │   ├── agent_linear.py
    │   │   ├── agent_tabular_ql.py
    │   │   ├── framework.py
    │   │   ├── game.tsv
    │   │   └── utils.py
    ├── resources_rl_Abel.rar
    └── resources_rl_solution.tar.gz
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | # *.png
2 | # *.pdf
3 | /edx archived
4 | 
5 | # *.gz
6 | # *.rar
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/P0 Setup Numpy Exercises Tutorial on Common Packages.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P0 Setup Numpy Exercises Tutorial on Common Packages.txt


--------------------------------------------------------------------------------
/P0/resources_project0.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P0/resources_project0.tar.gz


--------------------------------------------------------------------------------
/P0/resources_project0/project0/debug.py:
--------------------------------------------------------------------------------
 1 | def get_sum_metrics(predictions, metrics=[]):
 2 |     for i in range(3):
 3 |         metrics.append(lambda x: x + i)
 4 | 
 5 |     sum_metrics = 0
 6 |     for metric in metrics:
 7 |         sum_metrics += metric(predictions)
 8 | 
 9 |     return sum_metrics
10 | 
11 | 
12 | def main():
13 |     print(get_sum_metrics(0))  # Should be (0 + 0) + (0 + 1) + (0 + 2) = 3
14 |     print(get_sum_metrics(1))  # Should be (1 + 0) + (1 + 1) + (1 + 2) = 6
15 |     print(get_sum_metrics(2))  # Should be (2 + 0) + (2 + 1) + (2 + 2) = 9
16 |     print(get_sum_metrics(3, [lambda x: x]))  # Should be (3) + (3 + 0) + (3 + 1) + (3 + 2) = 15
17 |     print(get_sum_metrics(0))  # Should be (0 + 0) + (0 + 1) + (0 + 2) = 3
18 |     print(get_sum_metrics(1))  # Should be (1 + 0) + (1 + 1) + (1 + 2) = 6
19 |     print(get_sum_metrics(2))  # Should be (2 + 0) + (2 + 1) + (2 + 2) = 9
20 | 
21 | if __name__ == "__main__":
22 |     main()
23 | 


--------------------------------------------------------------------------------
/P0/resources_project0/project0/main.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def randomization(n):
 4 |     """
 5 |     Arg:
 6 |       n - an integer
 7 |     Returns:
 8 |       A - a randomly-generated nx1 Numpy array.
 9 |     """
10 |     #Your code here
11 |     raise NotImplementedError
12 | 
13 | def operations(h, w):
14 |     """
15 |     Takes two inputs, h and w, and makes two Numpy arrays A and B of size
16 |     h x w, and returns A, B, and s, the sum of A and B.
17 | 
18 |     Arg:
19 |       h - an integer describing the height of A and B
20 |       w - an integer describing the width of A and B
21 |     Returns (in this order):
22 |       A - a randomly-generated h x w Numpy array.
23 |       B - a randomly-generated h x w Numpy array.
24 |       s - the sum of A and B.
25 |     """
26 |     #Your code here
27 |     raise NotImplementedError
28 | 
29 | 
30 | def norm(A, B):
31 |     """
32 |     Takes two Numpy column arrays, A and B, and returns the L2 norm of their
33 |     sum.
34 | 
35 |     Arg:
36 |       A - a Numpy array
37 |       B - a Numpy array
38 |     Returns:
39 |       s - the L2 norm of A+B.
40 |     """
41 |     #Your code here
42 |     raise NotImplementedError
43 | 
44 | 
45 | def neural_network(inputs, weights):
46 |     """
47 |      Takes an input vector and runs it through a 1-layer neural network
48 |      with a given weight matrix and returns the output.
49 | 
50 |      Arg:
51 |        inputs - 2 x 1 NumPy array
52 |        weights - 2 x 1 NumPy array
53 |      Returns (in this order):
54 |        out - a 1 x 1 NumPy array, representing the output of the neural network
55 |     """
56 |     #Your code here
57 |     raise NotImplementedError
58 | 


--------------------------------------------------------------------------------
/P0/resources_project0/project0/test.py:
--------------------------------------------------------------------------------
 1 | import traceback
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | def green(s):
 6 |     return '\033[1;32m%s\033[m' % s
 7 | 
 8 | 
 9 | def yellow(s):
10 |     return '\033[1;33m%s\033[m' % s
11 | 
12 | 
13 | def red(s):
14 |     return '\033[1;31m%s\033[m' % s
15 | 
16 | 
17 | def log(*m):
18 |     print(" ".join(map(str, m)))
19 | 
20 | 
21 | def log_exit(*m):
22 |     log(red("ERROR:"), *m)
23 |     exit(1)
24 | 
25 | 
26 | def check_numpy():
27 |     try:
28 |         import numpy
29 |         log(green("PASS"), "NumPy installed")
30 |     except ModuleNotFoundError:
31 |         log(red("FAIL"), "NumPy not installed")
32 | 
33 | 
34 | def check_scipy():
35 |     try:
36 |         import scipy
37 |         log(green("PASS"), "SciPy installed")
38 |     except ModuleNotFoundError:
39 |         log(red("FAIL"), "SciPy not installed")
40 | 
41 | 
42 | def check_matplotlib():
43 |     try:
44 |         import matplotlib
45 |         log(green("PASS"), "matplotlib installed")
46 |     except ModuleNotFoundError:
47 |         log(red("FAIL"), "matplotlib not installed")
48 | 
49 | 
50 | def check_torch():
51 |     try:
52 |         import torch
53 |         log(green("PASS"), "PyTorch installed")
54 |     except ModuleNotFoundError:
55 |         log(red("FAIL"), "PyTorch not installed")
56 | 
57 | 
58 | def check_tqdm():
59 |     try:
60 |         import tqdm
61 |         log(green("PASS"), "tqdm installed")
62 |     except ModuleNotFoundError:
63 |         log(red("FAIL"), "tqdm not installed")
64 | 
65 | 
66 | def main():
67 |     try:
68 |         check_numpy()
69 |         check_scipy()
70 |         check_matplotlib()
71 |         check_torch()
72 |         check_tqdm()
73 |     except Exception:
74 |         log_exit(traceback.format_exc())
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     main()
79 | 


--------------------------------------------------------------------------------
/P0/resources_project0_Abel.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P0/resources_project0_Abel.rar


--------------------------------------------------------------------------------
/P0/resources_project0_solution.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P0/resources_project0_solution.tar.gz


--------------------------------------------------------------------------------
/P1 Automatic Review Analyzer.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P1 Automatic Review Analyzer.txt


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P1/resources_sentiment_analysis.tar.gz


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis/200.txt:
--------------------------------------------------------------------------------
1 | 131,181,22,172,144,92,97,187,58,93,6,70,106,68,153,168,179,199,29,46,9,142,134,88,193,110,26,32,117,112,17,39,166,13,94,138,109,147,51,101,59,188,116,5,170,99,100,167,180,146,65,1,104,43,38,184,123,171,137,162,71,44,95,174,12,7,54,152,21,47,28,176,34,2,132,118,42,189,150,14,165,41,192,45,82,128,63,57,197,160,53,75,108,135,121,159,183,67,169,50,87,69,89,196,115,19,148,96,86,11,8,60,33,173,78,4,119,105,182,127,177,30,186,40,49,178,76,157,161,73,164,151,31,74,191,27,125,198,81,20,155,114,139,36,61,56,145,48,16,83,62,85,126,0,102,23,3,140,15,195,133,113,190,141,52,163,156,80,111,90,175,143,120,84,18,25,79,37,154,136,64,158,24,185,72,35,129,55,149,91,122,77,103,124,130,66,10,107,194,98


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis/NN.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import project1 as p1
 3 | import utils
 4 | import numpy as np
 5 | 
 6 | 
 7 | X = np.array([[0, 1],[4,5]])
 8 | Y = np.array([[1,1], [2,2], [3,3]])
 9 | 
10 | Z = X[:,np.newaxis]+Y
11 | 
12 | print(X)
13 | print(Y)
14 | 
15 | print(Z.shape)
16 | print(Z)


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis/__pycache__/project1.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P1/resources_sentiment_analysis/__pycache__/project1.cpython-36.pyc


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis/__pycache__/project1.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P1/resources_sentiment_analysis/__pycache__/project1.cpython-37.pyc


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P1/resources_sentiment_analysis/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis/main.py:
--------------------------------------------------------------------------------
  1 | import project1 as p1
  2 | import utils
  3 | import numpy as np
  4 | 
  5 | #-------------------------------------------------------------------------------
  6 | # Data loading. There is no need to edit code in this section.
  7 | #-------------------------------------------------------------------------------
  8 | 
  9 | train_data = utils.load_data('reviews_train.tsv')
 10 | val_data = utils.load_data('reviews_val.tsv')
 11 | test_data = utils.load_data('reviews_test.tsv')
 12 | 
 13 | train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data))
 14 | val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data))
 15 | test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data))
 16 | 
 17 | dictionary = p1.bag_of_words(train_texts)
 18 | 
 19 | train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
 20 | val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
 21 | test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)
 22 | 
 23 | #-------------------------------------------------------------------------------
 24 | # Problem 5
 25 | #-------------------------------------------------------------------------------
 26 | 
 27 | # toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')
 28 | 
 29 | # T = 2000
 30 | # L = 0.2
 31 | 
 32 | # thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
 33 | # thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
 34 | # thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)
 35 | 
 36 | # def plot_toy_results(algo_name, thetas):
 37 | #     print('theta for', algo_name, 'is', ', '.join(map(str,list(thetas[0]))))
 38 | #     print('theta_0 for', algo_name, 'is', str(thetas[1]))
 39 | #     utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)
 40 | 
 41 | # plot_toy_results('Perceptron', thetas_perceptron)
 42 | # plot_toy_results('Average Perceptron', thetas_avg_perceptron)
 43 | # plot_toy_results('Pegasos', thetas_pegasos)
 44 | 
 45 | #-------------------------------------------------------------------------------
 46 | # Problem 7
 47 | #-------------------------------------------------------------------------------
 48 | 
 49 | # T = 10
 50 | # L = 0.01
 51 | 
 52 | # pct_train_accuracy, pct_val_accuracy = \
 53 | #    p1.classifier_accuracy(p1.perceptron, train_bow_features,val_bow_features,train_labels,val_labels,T=T)
 54 | # print("{:35} {:.4f}".format("Training accuracy for perceptron:", pct_train_accuracy))
 55 | # print("{:35} {:.4f}".format("Validation accuracy for perceptron:", pct_val_accuracy))
 56 | 
 57 | # avg_pct_train_accuracy, avg_pct_val_accuracy = \
 58 | #    p1.classifier_accuracy(p1.average_perceptron, train_bow_features,val_bow_features,train_labels,val_labels,T=T)
 59 | # print("{:43} {:.4f}".format("Training accuracy for average perceptron:", avg_pct_train_accuracy))
 60 | # print("{:43} {:.4f}".format("Validation accuracy for average perceptron:", avg_pct_val_accuracy))
 61 | 
 62 | # avg_peg_train_accuracy, avg_peg_val_accuracy = \
 63 | #    p1.classifier_accuracy(p1.pegasos, train_bow_features,val_bow_features,train_labels,val_labels,T=T,L=L)
 64 | # print("{:50} {:.4f}".format("Training accuracy for Pegasos:", avg_peg_train_accuracy))
 65 | # print("{:50} {:.4f}".format("Validation accuracy for Pegasos:", avg_peg_val_accuracy))
 66 | 
 67 | #-------------------------------------------------------------------------------
 68 | # Problem 8
 69 | #-------------------------------------------------------------------------------
 70 | 
 71 | # data = (train_bow_features, train_labels, val_bow_features, val_labels)
 72 | 
 73 | # # values of T and lambda to try
 74 | # Ts = [1, 5, 10, 15, 25, 50]
 75 | # Ls = [0.001, 0.01, 0.1, 1, 10]
 76 | 
 77 | # pct_tune_results = utils.tune_perceptron(Ts, *data)
 78 | # print('perceptron valid:', list(zip(Ts, pct_tune_results[1])))
 79 | # print('best = {:.4f}, T={:.4f}'.format(np.max(pct_tune_results[1]), Ts[np.argmax(pct_tune_results[1])]))
 80 | 
 81 | # avg_pct_tune_results = utils.tune_avg_perceptron(Ts, *data)
 82 | # print('avg perceptron valid:', list(zip(Ts, avg_pct_tune_results[1])))
 83 | # print('best = {:.4f}, T={:.4f}'.format(np.max(avg_pct_tune_results[1]), Ts[np.argmax(avg_pct_tune_results[1])]))
 84 | 
 85 | # # fix values for L and T while tuning Pegasos T and L, respective
 86 | # fix_L = 0.01
 87 | # peg_tune_results_T = utils.tune_pegasos_T(fix_L, Ts, *data)
 88 | # print('Pegasos valid: tune T', list(zip(Ts, peg_tune_results_T[1])))
 89 | # print('best = {:.4f}, T={:.4f}'.format(np.max(peg_tune_results_T[1]), Ts[np.argmax(peg_tune_results_T[1])]))
 90 | 
 91 | # fix_T = Ts[np.argmax(peg_tune_results_T[1])]
 92 | # peg_tune_results_L = utils.tune_pegasos_L(fix_T, Ls, *data)
 93 | # print('Pegasos valid: tune L', list(zip(Ls, peg_tune_results_L[1])))
 94 | # print('best = {:.4f}, L={:.4f}'.format(np.max(peg_tune_results_L[1]), Ls[np.argmax(peg_tune_results_L[1])]))
 95 | 
 96 | # utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results)
 97 | # utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results)
 98 | # utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T)
 99 | # utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L)
100 | 
101 | #-------------------------------------------------------------------------------
102 | # Use the best method (perceptron, average perceptron or Pegasos) along with
103 | # the optimal hyperparameters according to validation accuracies to test
104 | # against the test dataset. The test data has been provided as
105 | # test_bow_features and test_labels.
106 | #-------------------------------------------------------------------------------
107 | 
108 | # already know Pegasos is the alg for our training data
109 | # so apply test set to pretrained model and return its accuracy 
110 | 
111 | T = 25
112 | L = 0.01
113 | 
114 | avg_peg_train_accuracy, avg_peg_test_accuracy = \
115 |    p1.classifier_accuracy(p1.pegasos, train_bow_features, test_bow_features, train_labels, test_labels, T=T, L=L)
116 | print("{:50} {:.4f}".format("Training accuracy for Pegasos:", avg_peg_train_accuracy))
117 | print("{:50} {:.4f}".format("Test accuracy for Pegasos:", avg_peg_test_accuracy))
118 | 
119 | #-------------------------------------------------------------------------------
120 | # Assign to best_theta, the weights (and not the bias!) learned by your most
121 | # accurate algorithm with the optimal choice of hyperparameters.
122 | #-------------------------------------------------------------------------------
123 | 
124 | # # parameters are from tuning
125 | # T = 25
126 | # L = 0.01
127 | 
128 | # # extract the best theta wo bias from selected alg which is pegosos 
129 | # TT = p1.pegasos(train_bow_features, train_labels, T,L)
130 | 
131 | # best_theta = TT[0] # Your code here
132 | # wordlist   = [word for (idx, word) in sorted(zip(dictionary.values(), dictionary.keys()))]
133 | # sorted_word_features = utils.most_explanatory_word(best_theta, wordlist)
134 | # print("Most Explanatory Word Features")
135 | # print(sorted_word_features[:10])
136 | 


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis/reviews_submit.tsv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P1/resources_sentiment_analysis/reviews_submit.tsv


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis/reviews_test.tsv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P1/resources_sentiment_analysis/reviews_test.tsv


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis/reviews_train.tsv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P1/resources_sentiment_analysis/reviews_train.tsv


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis/reviews_val.tsv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P1/resources_sentiment_analysis/reviews_val.tsv


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis/stopwords.txt:
--------------------------------------------------------------------------------
  1 | i
  2 | me
  3 | my
  4 | myself
  5 | we
  6 | our
  7 | ours
  8 | ourselves
  9 | you
 10 | your
 11 | yours
 12 | yourself
 13 | yourselves
 14 | he
 15 | him
 16 | his
 17 | himself
 18 | she
 19 | her
 20 | hers
 21 | herself
 22 | it
 23 | its
 24 | itself
 25 | they
 26 | them
 27 | their
 28 | theirs
 29 | themselves
 30 | what
 31 | which
 32 | who
 33 | whom
 34 | this
 35 | that
 36 | these
 37 | those
 38 | am
 39 | is
 40 | are
 41 | was
 42 | were
 43 | be
 44 | been
 45 | being
 46 | have
 47 | has
 48 | had
 49 | having
 50 | do
 51 | does
 52 | did
 53 | doing
 54 | a
 55 | an
 56 | the
 57 | and
 58 | but
 59 | if
 60 | or
 61 | because
 62 | as
 63 | until
 64 | while
 65 | of
 66 | at
 67 | by
 68 | for
 69 | with
 70 | about
 71 | against
 72 | between
 73 | into
 74 | through
 75 | during
 76 | before
 77 | after
 78 | above
 79 | below
 80 | to
 81 | from
 82 | up
 83 | down
 84 | in
 85 | out
 86 | on
 87 | off
 88 | over
 89 | under
 90 | again
 91 | further
 92 | then
 93 | once
 94 | here
 95 | there
 96 | when
 97 | where
 98 | why
 99 | how
100 | all
101 | any
102 | both
103 | each
104 | few
105 | more
106 | most
107 | other
108 | some
109 | such
110 | no
111 | nor
112 | not
113 | only
114 | own
115 | same
116 | so
117 | than
118 | too
119 | very
120 | s
121 | t
122 | can
123 | will
124 | just
125 | don
126 | should
127 | now
128 | 


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis/toy_data.tsv:
--------------------------------------------------------------------------------
  1 | -1	1.7600	0.4000
  2 | -1	0.9790	2.2400
  3 | -1	1.8700	-0.9770
  4 | -1	0.9500	-0.1510
  5 | -1	-0.1030	0.4110
  6 | -1	0.1440	1.4500
  7 | -1	0.7610	0.1220
  8 | -1	0.4440	0.3340
  9 | -1	1.4900	-0.2050
 10 | -1	0.3130	-0.8540
 11 | -1	-2.5500	0.6540
 12 | -1	0.8640	-0.7420
 13 | -1	2.2700	-1.4500
 14 | -1	0.0458	-0.1870
 15 | -1	1.5300	1.4700
 16 | -1	0.1550	0.3780
 17 | -1	-0.8878	-1.9808
 18 | -1	-0.3480	0.1560
 19 | -1	1.2300	1.2000
 20 | -1	-0.3873	-0.3023
 21 | -1	-1.0486	-1.4200
 22 | -1	-1.7100	1.9500
 23 | -1	-0.5097	-0.4381
 24 | -1	-1.2500	0.7770
 25 | -1	-1.6139	-0.2127
 26 | -1	-0.8950	0.3870
 27 | -1	-0.5108	-1.1806
 28 | -1	-0.0282	0.4280
 29 | -1	0.0665	0.3020
 30 | -1	-0.6343	-0.3627
 31 | -1	-0.6725	-0.3596
 32 | -1	-0.8131	-1.7263
 33 | -1	0.1770	-0.4020
 34 | -1	-1.6300	0.4630
 35 | -1	-0.9070	0.0519
 36 | -1	0.7290	0.1290
 37 | -1	1.1400	-1.2300
 38 | -1	0.4020	-0.6850
 39 | -1	-0.8708	-0.5788
 40 | -1	-0.3120	0.0562
 41 | -1	-1.1700	0.9010
 42 | -1	0.4660	-1.5400
 43 | -1	1.4900	1.9000
 44 | -1	1.1800	-0.1800
 45 | -1	-1.0700	1.0500
 46 | -1	-0.4030	1.2200
 47 | -1	0.2080	0.9770
 48 | -1	0.3560	0.7070
 49 | -1	0.0105	1.7900
 50 | -1	0.1270	0.4020
 51 | -1	1.8800	-1.3500
 52 | -1	-1.2700	0.9690
 53 | -1	-1.1700	1.9400
 54 | -1	-0.4136	-0.7475
 55 | -1	1.9200	1.4800
 56 | -1	1.8700	0.9060
 57 | -1	-0.8610	1.9100
 58 | -1	-0.2680	0.8020
 59 | -1	0.9470	-0.1550
 60 | -1	0.6140	0.9220
 61 | -1	0.3760	-1.1000
 62 | -1	0.2980	1.3300
 63 | -1	-0.6946	-0.1496
 64 | -1	-0.4350	1.8500
 65 | -1	0.6720	0.4070
 66 | -1	-0.7700	0.5390
 67 | -1	-0.6740	0.0318
 68 | -1	-0.6360	0.6760
 69 | -1	0.5770	-0.2080
 70 | -1	0.3960	-1.0900
 71 | -1	-1.4900	0.4390
 72 | -1	0.1670	0.6350
 73 | -1	2.3800	0.9440
 74 | -1	-0.9130	1.1200
 75 | -1	-1.3159	-0.4616
 76 | -1	-0.0682	1.7100
 77 | -1	-0.7448	-0.8264
 78 | -1	-0.0985	-0.6635
 79 | -1	1.1300	-1.0800
 80 | -1	-1.1475	-0.4378
 81 | -1	-0.4980	1.9300
 82 | -1	0.9490	0.0876
 83 | -1	-1.2300	0.8440
 84 | -1	-1.0002	-1.5448
 85 | -1	1.1900	0.3170
 86 | -1	0.9210	0.3190
 87 | -1	0.8570	-0.6510
 88 | -1	-1.0300	0.6820
 89 | -1	-0.8034	-0.6895
 90 | -1	-0.4560	0.0175
 91 | -1	-0.3540	-1.3750
 92 | -1	-0.6436	-2.2234
 93 | -1	0.6250	-1.6000
 94 | -1	-1.1000	0.0522
 95 | -1	-0.7400	1.5400
 96 | -1	-1.2900	0.2670
 97 | -1	-0.0393	-1.1681
 98 | -1	0.5230	-0.1720
 99 | -1	0.7720	0.8240
100 | -1	2.1600	1.3400
101 | 1	1.6300	1.7600
102 | 1	3.1000	2.6600
103 | 1	2.6400	0.3830
104 | 1	1.9800	1.2600
105 | 1	2.2800	1.9000
106 | 1	2.9100	2.3200
107 | 1	2.7900	1.5300
108 | 1	1.0600	1.5900
109 | 1	1.9800	2.3800
110 | 1	4.2600	1.9600
111 | 1	1.0400	1.6500
112 | 1	1.5400	2.4800
113 | 1	0.4590	2.0600
114 | 1	2.1600	2.2300
115 | 1	1.4000	1.7600
116 | 1	0.5760	1.5100
117 | 1	1.4600	2.4200
118 | 1	0.8440	2.7800
119 | 1	3.4900	-0.0700
120 | 1	2.4300	2.6800
121 | 1	1.3600	1.6000
122 | 1	1.8700	1.7000
123 | 1	1.6900	0.3240
124 | 1	3.1500	3.0800
125 | 1	1.1900	0.5340
126 | 1	2.5200	1.4200
127 | 1	2.1400	1.6800
128 | 1	2.6900	2.6900
129 | 1	1.2700	0.6170
130 | 1	0.4170	2.6100
131 | 1	0.8110	1.4900
132 | 1	1.4000	1.9500
133 | 1	0.0637	2.1900
134 | 1	2.5200	2.0900
135 | 1	1.6900	2.1000
136 | 1	2.4000	-0.7730
137 | 1	3.9600	2.3900
138 | 1	1.3500	1.6100
139 | 1	2.4900	1.8800
140 | 1	-0.0307	4.0600
141 | 1	1.8900	3.0200
142 | 1	1.3100	3.5400
143 | 1	2.2900	2.6100
144 | 1	0.9550	3.2100
145 | 1	2.6900	3.3000
146 | 1	1.3700	1.5200
147 | 1	4.3000	0.9400
148 | 1	1.8600	3.1400
149 | 1	2.1000	2.5800
150 | 1	1.6000	2.3700
151 | 1	0.6930	3.6600
152 | 1	1.8800	1.3200
153 | 1	2.6700	1.5400
154 | 1	0.6660	0.6530
155 | 1	2.6900	1.8400
156 | 1	1.8700	3.0800
157 | 1	0.8730	1.2700
158 | 1	1.6200	2.0900
159 | 1	1.9600	1.7100
160 | 1	1.9400	1.8900
161 | 1	1.2800	1.1900
162 | 1	2.2700	1.1100
163 | 1	0.8430	1.6900
164 | 1	1.8400	4.2600
165 | 1	1.3000	2.9400
166 | 1	2.7500	0.8110
167 | 1	2.7700	0.8160
168 | 1	-0.6590	2.6100
169 | 1	0.2440	2.4500
170 | 1	1.3200	3.6600
171 | 1	3.0700	1.5500
172 | 1	1.3100	0.7860
173 | 1	1.5600	1.7200
174 | 1	1.6400	2.1600
175 | 1	2.5800	2.3500
176 | 1	1.2400	0.5620
177 | 1	3.3600	1.3100
178 | 1	1.3500	1.4800
179 | 1	0.1570	1.5200
180 | 1	1.5200	2.6200
181 | 1	2.7000	2.0000
182 | 1	2.9300	2.3400
183 | 1	1.9800	2.1600
184 | 1	1.8100	1.6100
185 | 1	1.7300	0.8720
186 | 1	2.2800	1.0100
187 | 1	2.8400	1.7500
188 | 1	2.0500	2.4900
189 | 1	2.6400	0.4290
190 | 1	1.7900	2.8800
191 | 1	0.3020	2.3900
192 | 1	-0.2560	0.9770
193 | 1	2.0400	0.3430
194 | 1	1.0100	0.5280
195 | 1	3.6500	2.1600
196 | 1	2.5700	1.7800
197 | 1	1.6500	0.3840
198 | 1	1.7100	1.2400
199 | 1	2.8600	3.1400
200 | 1	3.4700	2.8500


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis/utils.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | import project1 as p1
  6 | import sys
  7 | 
  8 | if sys.version_info[0] < 3:
  9 |     PYTHON3 = False
 10 | else:
 11 |     PYTHON3 = True
 12 | 
 13 | def load_toy_data(path_toy_data):
 14 |     """
 15 |     Loads the 2D toy dataset as numpy arrays.
 16 |     Returns the tuple (features, labels) in which features is an Nx2 numpy matrix and
 17 |     labels is a length-N vector of +1/-1 labels.
 18 |     """
 19 |     labels, xs, ys = np.loadtxt(path_toy_data, delimiter='\t', unpack=True)
 20 |     return np.vstack((xs, ys)).T, labels
 21 | 
 22 | def load_data(path_data, extras=False):
 23 |     """
 24 |     Returns a list of dict with keys:
 25 |     * sentiment: +1 or -1 if the review was positive or negative, respectively
 26 |     * text: the text of the review
 27 | 
 28 |     Additionally, if the `extras` argument is True, each dict will also include the
 29 |     following information:
 30 |     * productId: a string that uniquely identifies each product
 31 |     * userId: a string that uniquely identifies each user
 32 |     * summary: the title of the review
 33 |     * helpfulY: the number of users who thought this review was helpful
 34 |     * helpfulN: the number of users who thought this review was NOT helpful
 35 |     """
 36 | 
 37 |     global PYTHON3
 38 | 
 39 |     basic_fields = {'sentiment', 'text'}
 40 |     numeric_fields = {'sentiment', 'helpfulY', 'helpfulN'}
 41 | 
 42 |     data = []
 43 |     if PYTHON3:
 44 |         f_data = open(path_data, encoding="latin1")
 45 |     else:
 46 |         f_data = open(path_data)
 47 | 
 48 |     for datum in csv.DictReader(f_data, delimiter='\t'):
 49 |         for field in list(datum.keys()):
 50 |             if not extras and field not in basic_fields:
 51 |                 del datum[field]
 52 |             elif field in numeric_fields and datum[field]:
 53 |                 datum[field] = int(datum[field])
 54 | 
 55 |         data.append(datum)
 56 | 
 57 |     f_data.close()
 58 | 
 59 |     return data
 60 | 
 61 | def write_predictions(path_submit_data, preds):
 62 |     if PYTHON3:
 63 |         f_data = open(path_submit_data, encoding="latin1")
 64 |     else:
 65 |         f_data = open(path_submit_data)
 66 | 
 67 |     reader = csv.DictReader(f_data, delimiter='\t')
 68 |     data = list(reader)
 69 | 
 70 |     assert len(preds) == len(data), \
 71 |            'Expected {} predictions but {} were given.'.format(len(data), len(preds))
 72 | 
 73 |     for pred, datum in zip(preds.astype(int), data):
 74 |         assert pred == 1 or pred == -1, 'Invalid prediction: {}.'.format(pred)
 75 |         datum['sentiment'] = pred
 76 |     f_data.close()
 77 | 
 78 |     if PYTHON3:
 79 |         f_out = open(path_submit_data, 'w')
 80 |     else:
 81 |         f_out = open(path_submit_data, 'wb')
 82 | 
 83 |     writer = csv.DictWriter(f_out, delimiter='\t', fieldnames=reader.fieldnames)
 84 |     writer.writeheader()
 85 |     for datum in data:
 86 |         writer.writerow(datum)
 87 |     f_out.close()
 88 | 
 89 | def plot_toy_data(algo_name, features, labels, thetas):
 90 |     """
 91 |     Plots the toy data in 2D.
 92 |     Arguments:
 93 |     * features - an Nx2 ndarray of features (points)
 94 |     * labels - a length-N vector of +1/-1 labels
 95 |     * thetas - the tuple (theta, theta_0) that is the output of the learning algorithm
 96 |     * algorithm - the string name of the learning algorithm used
 97 |     """
 98 |     # plot the points with labels represented as colors
 99 |     plt.subplots()
100 |     colors = ['b' if label == 1 else 'r' for label in labels]
101 |     plt.scatter(features[:, 0], features[:, 1], s=40, c=colors)
102 |     xmin, xmax = plt.axis()[:2]
103 | 
104 |     # plot the decision boundary
105 |     theta, theta_0 = thetas
106 |     xs = np.linspace(xmin, xmax)
107 |     ys = -(theta[0]*xs + theta_0) / (theta[1] + 1e-16)
108 |     plt.plot(xs, ys, 'k-')
109 | 
110 |     # show the plot
111 |     algo_name = ' '.join((word.capitalize() for word in algo_name.split(' ')))
112 |     plt.suptitle('Classified Toy Data ({})'.format(algo_name))
113 |     plt.show()
114 | 
115 | def plot_tune_results(algo_name, param_name, param_vals, acc_train, acc_val):
116 |     """
117 |     Plots classification accuracy on the training and validation data versus
118 |     several values of a hyperparameter used during training.
119 |     """
120 |     # put the data on the plot
121 |     plt.subplots()
122 |     plt.plot(param_vals, acc_train, '-o')
123 |     plt.plot(param_vals, acc_val, '-o')
124 | 
125 |     # make the plot presentable
126 |     algo_name = ' '.join((word.capitalize() for word in algo_name.split(' ')))
127 |     param_name = param_name.capitalize()
128 |     plt.suptitle('Classification Accuracy vs {} ({})'.format(param_name, algo_name))
129 |     plt.legend(['train','val'], loc='upper right', title='Partition')
130 |     plt.xlabel(param_name)
131 |     plt.ylabel('Accuracy (%)')
132 |     plt.show()
133 | 
134 | def tune(train_fn, param_vals, train_feats, train_labels, val_feats, val_labels):
135 |     train_accs = np.ndarray(len(param_vals))
136 |     val_accs = np.ndarray(len(param_vals))
137 | 
138 |     for i, val in enumerate(param_vals):
139 |         theta, theta_0 = train_fn(train_feats, train_labels, val)
140 | 
141 |         train_preds = p1.classify(train_feats, theta, theta_0)
142 |         train_accs[i] = p1.accuracy(train_preds, train_labels)
143 | 
144 |         val_preds = p1.classify(val_feats, theta, theta_0)
145 |         val_accs[i] = p1.accuracy(val_preds, val_labels)
146 | 
147 |     return train_accs, val_accs
148 | 
149 | def tune_perceptron(*args):
150 |     return tune(p1.perceptron, *args)
151 | 
152 | def tune_avg_perceptron(*args):
153 |     return tune(p1.average_perceptron, *args)
154 | 
155 | def tune_pegasos_T(best_L, *args):
156 |     def train_fn(features, labels, T):
157 |         return p1.pegasos(features, labels, T, best_L)
158 |     return tune(train_fn, *args)
159 | 
160 | def tune_pegasos_L(best_T, *args):
161 |     def train_fn(features, labels, L):
162 |         return p1.pegasos(features, labels, best_T, L)
163 |     return tune(train_fn, *args)
164 | 
165 | def most_explanatory_word(theta, wordlist):
166 |     """Returns the word associated with the bag-of-words feature having largest weight."""
167 |     return [word for (theta_i, word) in sorted(zip(theta, wordlist))[::-1]]
168 | 


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis_Abel.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P1/resources_sentiment_analysis_Abel.rar


--------------------------------------------------------------------------------
/P1/resources_sentiment_analysis_solution.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P1/resources_sentiment_analysis_solution.tar.gz


--------------------------------------------------------------------------------
/P2P3 Digit Recognition.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3 Digit Recognition.txt


--------------------------------------------------------------------------------
/P2P3/resources_mnist.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist.tar.gz


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/._.DS_Store:
--------------------------------------------------------------------------------
1 |     Mac OS X            	   2   F      x                                     ATTR       x   x                    


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/Datasets/mnist.pkl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/Datasets/mnist.pkl.gz


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/Datasets/test_labels_mini.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/Datasets/test_labels_mini.txt.gz


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/Datasets/test_multi_digit_mini.pkl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/Datasets/test_multi_digit_mini.pkl.gz


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/Datasets/train_labels_mini.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/Datasets/train_labels_mini.txt.gz


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/Datasets/train_multi_digit_mini.pkl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/Datasets/train_multi_digit_mini.pkl.gz


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part1/__pycache__/features.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part1/__pycache__/features.cpython-36.pyc


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part1/__pycache__/kernel.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part1/__pycache__/kernel.cpython-36.pyc


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part1/__pycache__/linear_regression.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part1/__pycache__/linear_regression.cpython-36.pyc


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part1/__pycache__/softmax.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part1/__pycache__/softmax.cpython-36.pyc


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part1/__pycache__/svm.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part1/__pycache__/svm.cpython-36.pyc


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part1/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part1/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part1/cubic_features_checker.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append("..")
 3 | import utils
 4 | from utils import *
 5 | import numpy as np
 6 | from features import cubic_features
 7 | 
 8 | def verify_cubic_features1D():
 9 |     X=np.array([[np.sqrt(3)],[0]])
10 |     X_cube=np.sort(cubic_features(X))
11 |     X_correct = np.array([[ 1., np.sqrt(9), np.sqrt(27), np.sqrt(27)],[0., 0., 0., 1.]]);
12 |     
13 |     if np.all(np.absolute(X_cube-X_correct) < 1.0e-6):
14 |         print ("Verifying cubic features of 1 dimension: Passed")
15 |     else:
16 |         print ("Verifying cubic features of 1 dimension: Failed")
17 | 
18 |     
19 | def verify_cubic_features2D():
20 |     X=np.array([[np.sqrt(3),np.sqrt(3)],[0,0]])
21 |     X_cube=np.sort(cubic_features(X))
22 |     X_correct = np.array([[1., 3., 3., 5.19615242, 5.19615242, 5.19615242, 5.19615242, 7.34846923, 9., 9.],
23 |                           [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])
24 |     
25 |     if np.all(np.absolute(X_cube-X_correct) < 1.0e-6):
26 |         print ("Verifying cubic features of 2 dimensions: Passed")
27 |     else:
28 |         print ("Verifying cubic features of 2 dimensions: Failed")
29 |         
30 | 
31 | def verify_cubic_features2D2():
32 |     X=np.array([[np.sqrt(3),0],[0,np.sqrt(3)]])
33 |     X_cube=np.sort(cubic_features(X))
34 |     X_correct = np.array([[0., 0., 0., 0., 0., 0., 1., 3., 5.19615242, 5.19615242],
35 |                           [0., 0., 0., 0., 0., 0., 1., 3., 5.19615242, 5.19615242]])
36 |     
37 |     if np.all(np.absolute(X_cube-X_correct) < 1.0e-6):
38 |         print ("Verifying cubic features of 2 dimensions asymmetric vectors: Passed")
39 |     else:
40 |         print ("Verifying cubic features of 2 dimensions asymmetric vectors: Failed")
41 | 
42 | verify_cubic_features1D()
43 | verify_cubic_features2D()
44 | verify_cubic_features2D2()
45 | 


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part1/features.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | 
  4 | # pragma: coderesponse template
  5 | def project_onto_PC(X, pcs, n_components):
  6 |     """
  7 |     Given principal component vectors pcs = principal_components(X)
  8 |     this function returns a new data array in which each sample in X
  9 |     has been projected onto the first n_components principcal components.
 10 |     """
 11 |     # TODO: first center data using the centerData() function.
 12 |     # TODO: Return the projection of the centered dataset
 13 |     #       on the first n_components principal components.
 14 |     #       This should be an array with dimensions: n x n_components.
 15 |     # Hint: these principal components = first n_components columns
 16 |     #       of the eigenvectors returned by principal_components().
 17 |     #       Note that each eigenvector is already be a unit-vector,
 18 |     #       so the projection may be done using matrix multiplication.
 19 | 
 20 |     # X (n, d) 
 21 |     # V (d, k) k eigen-vectors, d >= k
 22 |     # pcs (d, d) full components of eigen-vector 
 23 |     # P (n, n)
 24 | 
 25 |     P = np.matmul(center_data(X), pcs)
 26 | 
 27 |     # keep the n dim and select only n dim from k features 
 28 |     # only hold for d > n
 29 |     return P[:,0:n_components]
 30 | 
 31 | 
 32 | # pragma: coderesponse end
 33 | 
 34 | 
 35 | ### Functions which are already complete, for you to use ###
 36 | 
 37 | def cubic_features(X):
 38 |     """
 39 |     Returns a new dataset with features given by the mapping
 40 |     which corresponds to the cubic kernel.
 41 |     """
 42 |     n, d = X.shape  # dataset size, input dimension
 43 |     X_withones = np.ones((n, d + 1))
 44 |     X_withones[:, :-1] = X
 45 |     new_d = 0  # dimension of output
 46 |     new_d = int((d + 1) * (d + 2) * (d + 3) / 6)
 47 | 
 48 |     new_data = np.zeros((n, new_d))
 49 |     col_index = 0
 50 |     for x_i in range(n):
 51 |         X_i = X[x_i]
 52 |         X_i = X_i.reshape(1, X_i.size)
 53 | 
 54 |         if d > 2:
 55 |             comb_2 = np.matmul(np.transpose(X_i), X_i)
 56 | 
 57 |             unique_2 = comb_2[np.triu_indices(d, 1)]
 58 |             unique_2 = unique_2.reshape(unique_2.size, 1)
 59 |             comb_3 = np.matmul(unique_2, X_i)
 60 |             keep_m = np.zeros(comb_3.shape)
 61 |             index = 0
 62 |             for i in range(d - 1):
 63 |                 keep_m[index + np.arange(d - 1 - i), i] = 0
 64 | 
 65 |                 tri_keep = np.triu_indices(d - 1 - i, 1)
 66 | 
 67 |                 correct_0 = tri_keep[0] + index
 68 |                 correct_1 = tri_keep[1] + i + 1
 69 | 
 70 |                 keep_m[correct_0, correct_1] = 1
 71 |                 index += d - 1 - i
 72 | 
 73 |             unique_3 = np.sqrt(6) * comb_3[np.nonzero(keep_m)]
 74 | 
 75 |             new_data[x_i, np.arange(unique_3.size)] = unique_3
 76 |             col_index = unique_3.size
 77 | 
 78 |     for i in range(n):
 79 |         newdata_colindex = col_index
 80 |         for j in range(d + 1):
 81 |             new_data[i, newdata_colindex] = X_withones[i, j]**3
 82 |             newdata_colindex += 1
 83 |             for k in range(j + 1, d + 1):
 84 |                 new_data[i, newdata_colindex] = X_withones[i, j]**2 * X_withones[i, k] * (3**(0.5))
 85 |                 newdata_colindex += 1
 86 | 
 87 |                 new_data[i, newdata_colindex] = X_withones[i, j] * X_withones[i, k]**2 * (3**(0.5))
 88 |                 newdata_colindex += 1
 89 | 
 90 |                 if k < d:
 91 |                     new_data[i, newdata_colindex] = X_withones[i, j] * X_withones[i, k] * (6**(0.5))
 92 |                     newdata_colindex += 1
 93 | 
 94 |     return new_data
 95 | 
 96 | 
 97 | def center_data(X):
 98 |     """
 99 |     Returns a centered version of the data, where each feature now has mean = 0
100 | 
101 |     Args:
102 |         X - n x d NumPy array of n data points, each with d features
103 | 
104 |     Returns:
105 |         n x d NumPy array X' where for each i = 1, ..., n and j = 1, ..., d:
106 |         X'[i][j] = X[i][j] - means[j]
107 |     """
108 |     feature_means = X.mean(axis=0)
109 |     return(X - feature_means)
110 | 
111 | 
112 | def principal_components(X):
113 |     """
114 |     Returns the principal component vectors of the data, sorted in decreasing order
115 |     of eigenvalue magnitude. This function first caluclates the covariance matrix
116 |     and then finds its eigenvectors.
117 | 
118 |     Args:
119 |         X - n x d NumPy array of n data points, each with d features
120 | 
121 |     Returns:
122 |         d x d NumPy array whose columns are the principal component directions sorted
123 |         in descending order by the amount of variation each direction (these are
124 |         equivalent to the d eigenvectors of the covariance matrix sorted in descending
125 |         order of eigenvalues, so the first column corresponds to the eigenvector with
126 |         the largest eigenvalue
127 |     """
128 |     centered_data = center_data(X)  # first center data
129 |     scatter_matrix = np.dot(centered_data.transpose(), centered_data)
130 |     eigen_values, eigen_vectors = np.linalg.eig(scatter_matrix)
131 |     # Re-order eigenvectors by eigenvalue magnitude:
132 |     idx = eigen_values.argsort()[::-1]
133 |     eigen_values = eigen_values[idx]
134 |     eigen_vectors = eigen_vectors[:, idx]
135 |     return eigen_vectors
136 | 
137 | 
138 | def plot_PC(X, pcs, labels):
139 |     """
140 |     Given the principal component vectors as the columns of matrix pcs,
141 |     this function projects each sample in X onto the first two principal components
142 |     and produces a scatterplot where points are marked with the digit depicted in
143 |     the corresponding image.
144 |     labels = a numpy array containing the digits corresponding to each image in X.
145 |     """
146 |     pc_data = project_onto_PC(X, pcs, n_components=2)
147 |     text_labels = [str(z) for z in labels.tolist()]
148 |     fig, ax = plt.subplots()
149 |     ax.scatter(pc_data[:, 0], pc_data[:, 1], alpha=0, marker=".")
150 |     for i, txt in enumerate(text_labels):
151 |         ax.annotate(txt, (pc_data[i, 0], pc_data[i, 1]))
152 |     ax.set_xlabel('PC 1')
153 |     ax.set_ylabel('PC 2')
154 |     plt.show()
155 | 
156 | 
157 | def reconstruct_PC(x_pca, pcs, n_components, X):
158 |     """
159 |     Given the principal component vectors as the columns of matrix pcs,
160 |     this function reconstructs a single image from its principal component
161 |     representation, x_pca.
162 |     X = the original data to which PCA was applied to get pcs.
163 |     """
164 |     feature_means = X - center_data(X)
165 |     feature_means = feature_means[0, :]
166 |     x_reconstructed = np.dot(x_pca, pcs[:, range(n_components)].T) + feature_means
167 |     return x_reconstructed
168 | 
169 |     


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part1/kernel.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ### Functions for you to fill in ###
 4 | 
 5 | # pragma: coderesponse template
 6 | 
 7 | 
 8 | def polynomial_kernel(X, Y, c, p):
 9 |     """
10 |         Compute the polynomial kernel between two matrices X and Y::
11 |             K(x, y) = (<x, y> + c)^p
12 |         for each pair of rows x in X and y in Y.
13 | 
14 |         Args:
15 |             X - (n, d) NumPy array (n datapoints each with d features)
16 |             Y - (m, d) NumPy array (m datapoints each with d features)
17 |             c - a coefficient to trade off high-order and low-order terms (scalar)
18 |             p - the degree of the polynomial kernel
19 | 
20 |         Returns:
21 |             kernel_matrix - (n, m) Numpy array containing the kernel matrix
22 |     """
23 |     # YOUR CODE HERE
24 |     # operator ** is the power of each element in matrix 
25 |     # np.linalg.matrix_power is multiple matrix multiplication 
26 |     kernel_matrix = (np.matmul(X, np.transpose(Y)) + c)**p
27 | 
28 |     return kernel_matrix
29 | 
30 | # pragma: coderesponse end
31 | 
32 | # pragma: coderesponse template
33 | 
34 | 
35 | def rbf_kernel(X, Y, gamma):
36 |     """
37 |         Compute the Gaussian RBF kernel between two matrices X and Y::
38 |             K(x, y) = exp(-gamma ||x-y||^2)
39 |         for each pair of rows x in X and y in Y.
40 | 
41 |         Args:
42 |             X - (n, d) NumPy array (n datapoints each with d features)
43 |             Y - (m, d) NumPy array (m datapoints each with d features)
44 |             gamma - the gamma parameter of gaussian function (scalar)
45 | 
46 |         Returns:
47 |             kernel_matrix - (n, m) Numpy array containing the kernel matrix
48 |     """
49 |     # YOUR CODE HERE
50 | 
51 | 
52 |     ## sol 1 - broadcasting - an alternative sol for np.outer 
53 |     # X (n, d)
54 |     # X[:,np.newaxis] = X[:,np.newaxis,:] (n, 1, d)
55 |     # Y (m, d)
56 |     # broadcasting (n, m, d)
57 |     # sum dim in d -> axis=2
58 |     #  
59 | 
60 |     return np.exp(-gamma*(np.square(X[:,np.newaxis]-Y).sum(axis=2)))
61 | 
62 | 
63 |     ## sol 2 - extract necessary elements from matrix multiplication 
64 | 
65 |     # xx = np.matmul(X, np.transpose(X))
66 |     # xy = np.matmul(X, np.transpose(Y))   
67 |     # yy = np.matmul(Y, np.transpose(Y))
68 | 
69 |     # dist = np.transpose([np.diag(xx)]) -2*xy + np.diag(yy)
70 | 
71 |     # return np.exp(-gamma*dist)
72 | 
73 | 
74 |     ## sol 3 - iterate over each entity by nested loop 
75 | 
76 |     # [n,d] = np.shape(X)
77 |     # [m,d] = np.shape(Y)
78 |     # K = np.zeros((n,m))
79 | 
80 |     # for i in range(n):
81 |     #     for j in range(m):
82 |     #         dist = np.linalg.norm(X[i][:] - Y[j][:])
83 |     #         print(dist)
84 |     #         K[i][j] = np.exp(-gamma*np.power(dist,2))
85 |     # return K
86 | 
87 | 
88 | # pragma: coderesponse end
89 | 
90 | 
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part1/linear_regression.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ### Functions for you to fill in ###
 4 | 
 5 | #pragma: coderesponse template
 6 | def closed_form(X, Y, lambda_factor):
 7 |     """
 8 |     Computes the closed form solution of linear regression with L2 regularization
 9 | 
10 |     Args:
11 |         X - (n, d + 1) NumPy array (n datapoints each with d features plus the bias feature in the first dimension)
12 |         Y - (n, ) NumPy array containing the labels (a number from 0-9) for each
13 |             data point
14 |         lambda_factor - the regularization constant (scalar)
15 |     Returns:
16 |         theta - (d + 1, ) NumPy array containing the weights of linear regression. Note that theta[0]
17 |         represents the y-axis intercept of the model and therefore X[0] = 1
18 |     """
19 |     # YOUR CODE HERE
20 |     [n,d] = np.shape(X)
21 |     d = d-1
22 | 
23 |     A = np.matmul(np.transpose(X),X) + lambda_factor*np.identity(d+1)
24 |     B = np.matmul(np.transpose(X),Y)
25 |     theta = np.matmul(np.linalg.inv(A),B)
26 | 
27 |     return theta
28 | 
29 | 
30 | #pragma: coderesponse end
31 | 
32 | ### Functions which are already complete, for you to use ###
33 | 
34 | def compute_test_error_linear(test_x, Y, theta):
35 |     test_y_predict = np.round(np.dot(test_x, theta))
36 |     test_y_predict[test_y_predict < 0] = 0
37 |     test_y_predict[test_y_predict > 9] = 9
38 |     return 1 - np.mean(test_y_predict == Y)
39 | 


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part1/softmax.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append("..")
  3 | import utils
  4 | from utils import *
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | import scipy.sparse as sparse
  8 | 
  9 | 
 10 | def augment_feature_vector(X):
 11 |     """
 12 |     Adds the x[i][0] = 1 feature for each data point x[i].
 13 | 
 14 |     Args:
 15 |         X - a NumPy matrix of n data points, each with d - 1 features
 16 | 
 17 |     Returns: X_augment, an (n, d) NumPy array with the added feature for each datapoint
 18 |     """
 19 |     column_of_ones = np.zeros([len(X), 1]) + 1
 20 |     return np.hstack((column_of_ones, X))
 21 | 
 22 | #pragma: coderesponse template
 23 | def compute_probabilities(X, theta, temp_parameter):
 24 |     """
 25 |     Computes, for each datapoint X[i], the probability that X[i] is labeled as j
 26 |     for j = 0, 1, ..., k-1
 27 | 
 28 |     Args:
 29 |         X - (n, d) NumPy array (n datapoints each with d features)
 30 |         theta - (k, d) NumPy array, where row j represents the parameters of our model for label j
 31 |         temp_parameter - the temperature parameter of softmax function (scalar)
 32 |     Returns:
 33 |         H - (k, n) NumPy array, where each entry H[j][i] is the probability that X[i] is labeled as j
 34 |     """
 35 |     #YOUR CODE HERE
 36 |     # for any vector and matrix manipulation always recall np 
 37 |     # exponential function is also within np 
 38 |     D = np.matmul(theta, np.transpose(X))/temp_parameter
 39 |     C = np.max(D, axis=0)
 40 | 
 41 |     E = np.exp(D-C)
 42 |     S = np.sum(E, axis=0)
 43 | 
 44 |     return E/S
 45 | #pragma: coderesponse end
 46 | 
 47 | 
 48 | #pragma: coderesponse template
 49 | def compute_cost_function(X, Y, theta, lambda_factor, temp_parameter):
 50 |     """
 51 |     Computes the total cost over every datapoint.
 52 | 
 53 |     Args:
 54 |         X - (n, d) NumPy array (n datapoints each with d features)
 55 |         Y - (n, ) NumPy array containing the labels (a number from 0-9) for each
 56 |             data point
 57 |         theta - (k, d) NumPy array, where row j represents the parameters of our
 58 |                 model for label j
 59 |         lambda_factor - the regularization constant (scalar)
 60 |         temp_parameter - the temperature parameter of softmax function (scalar)
 61 | 
 62 |     Returns
 63 |         c - the cost value (scalar)
 64 |     """
 65 |     #YOUR CODE HERE
 66 |     n = len(Y)
 67 |     [k,d] = np.shape(theta)
 68 | 
 69 |     # simply recall function with c correction 
 70 |     L = np.log(compute_probabilities(X,theta,temp_parameter)) # k x n = (k, n)
 71 | 
 72 |     J = 0
 73 | 
 74 |     for i in range(n):
 75 |         for j in range(k):
 76 |             if Y[i] == j:
 77 |                 J += L[j,i]
 78 | 
 79 |     return -J/n + lambda_factor/2*np.sum(np.square(theta))
 80 | 
 81 | 
 82 | 
 83 | #pragma: coderesponse end
 84 | 
 85 | #pragma: coderesponse template
 86 | def run_gradient_descent_iteration(X, Y, theta, alpha, lambda_factor, temp_parameter):
 87 |     """
 88 |     Runs one step of batch gradient descent
 89 | 
 90 |     Args:
 91 |         X - (n, d) NumPy array (n datapoints each with d features)
 92 |         Y - (n, ) NumPy array containing the labels (a number from 0-9) for each
 93 |             data point
 94 |         theta - (k, d) NumPy array, where row j represents the parameters of our
 95 |                 model for label j
 96 |         alpha - the learning rate (scalar)
 97 |         lambda_factor - the regularization constant (scalar)
 98 |         temp_parameter - the temperature parameter of softmax function (scalar)
 99 | 
100 |     Returns:
101 |         theta - (k, d) NumPy array that is the final value of parameters theta
102 |     """
103 |     #YOUR CODE HERE
104 |     # use sparse matrix to fit np.shape(H) which is (k, n)
105 |     n = len(Y)
106 |     [k,d] = np.shape(theta)
107 |     
108 |     # recall probability 
109 |     H = compute_probabilities(X, theta, temp_parameter)
110 |     # recall sparse matrix {0,1} for indices ij matching 
111 |     M = sparse.coo_matrix(([1]*n, (Y, range(n))), shape=(k,n)).toarray()
112 |     
113 |     # X (n, d)
114 |     # H (k, n)
115 |     # theta (k, d)
116 | 
117 |     # SGD in matrix form 
118 |     G = -np.matmul(M-H, X)/(temp_parameter*n) + lambda_factor*theta
119 | 
120 |     # SGD update
121 |     return theta - alpha*G
122 |     
123 | #pragma: coderesponse end
124 | 
125 | 
126 | 
127 | #pragma: coderesponse template
128 | def update_y(train_y, test_y):
129 |     """
130 |     Changes the old digit labels for the training and test set for the new (mod 3)
131 |     labels.
132 | 
133 |     Args:
134 |         train_y - (n, ) NumPy array containing the labels (a number between 0-9)
135 |                  for each datapoint in the training set
136 |         test_y - (n, ) NumPy array containing the labels (a number between 0-9)
137 |                 for each datapoint in the test set
138 | 
139 |     Returns:
140 |         train_y_mod3 - (n, ) NumPy array containing the new labels (a number between 0-2)
141 |                      for each datapoint in the training set
142 |         test_y_mod3 - (n, ) NumPy array containing the new labels (a number between 0-2)
143 |                     for each datapoint in the test set
144 |     """
145 |     #YOUR CODE HERE
146 |     return np.mod(train_y,3), np.mod(test_y,3)
147 | 
148 | #pragma: coderesponse end
149 | 
150 | #pragma: coderesponse template
151 | def compute_test_error_mod3(X, Y, theta, temp_parameter):
152 |     """
153 |     Returns the error of these new labels when the classifier predicts the digit. (mod 3)
154 | 
155 |     Args:
156 |         X - (n, d - 1) NumPy array (n datapoints each with d - 1 features)
157 |         Y - (n, ) NumPy array containing the labels (a number from 0-2) for each
158 |             data point
159 |         theta - (k, d) NumPy array, where row j represents the parameters of our
160 |                 model for label j
161 |         temp_parameter - the temperature parameter of softmax function (scalar)
162 | 
163 |     Returns:
164 |         test_error - the error rate of the classifier (scalar)
165 |     """
166 |     #YOUR CODE HERE
167 |     assigned_labels = get_classification(X, theta, temp_parameter)
168 | 
169 |     return 1 - np.mean(np.mod(assigned_labels,3) == np.mod(Y,3))
170 | 
171 | #pragma: coderesponse end
172 | 
173 | def softmax_regression(X, Y, temp_parameter, alpha, lambda_factor, k, num_iterations):
174 |     """
175 |     Runs batch gradient descent for a specified number of iterations on a dataset
176 |     with theta initialized to the all-zeros array. Here, theta is a k by d NumPy array
177 |     where row j represents the parameters of our model for label j for
178 |     j = 0, 1, ..., k-1
179 | 
180 |     Args:
181 |         X - (n, d - 1) NumPy array (n data points, each with d-1 features)
182 |         Y - (n, ) NumPy array containing the labels (a number from 0-9) for each
183 |             data point
184 |         temp_parameter - the temperature parameter of softmax function (scalar)
185 |         alpha - the learning rate (scalar)
186 |         lambda_factor - the regularization constant (scalar)
187 |         k - the number of labels (scalar)
188 |         num_iterations - the number of iterations to run gradient descent (scalar)
189 | 
190 |     Returns:
191 |         theta - (k, d) NumPy array that is the final value of parameters theta
192 |         cost_function_progression - a Python list containing the cost calculated at each step of gradient descent
193 |     """
194 |     X = augment_feature_vector(X)
195 |     theta = np.zeros([k, X.shape[1]])
196 |     cost_function_progression = []
197 |     for i in range(num_iterations):
198 |         cost_function_progression.append(compute_cost_function(X, Y, theta, lambda_factor, temp_parameter))
199 |         theta = run_gradient_descent_iteration(X, Y, theta, alpha, lambda_factor, temp_parameter)
200 |     return theta, cost_function_progression
201 | 
202 | def get_classification(X, theta, temp_parameter):
203 |     """
204 |     Makes predictions by classifying a given dataset
205 | 
206 |     Args:
207 |         X - (n, d - 1) NumPy array (n data points, each with d - 1 features)
208 |         theta - (k, d) NumPy array where row j represents the parameters of our model for
209 |                 label j
210 |         temp_parameter - the temperature parameter of softmax function (scalar)
211 | 
212 |     Returns:
213 |         Y - (n, ) NumPy array, containing the predicted label (a number between 0-9) for
214 |             each data point
215 |     """
216 |     X = augment_feature_vector(X)
217 |     probabilities = compute_probabilities(X, theta, temp_parameter)
218 |     return np.argmax(probabilities, axis = 0)
219 | 
220 | def plot_cost_function_over_time(cost_function_history):
221 |     plt.plot(range(len(cost_function_history)), cost_function_history)
222 |     plt.ylabel('Cost Function')
223 |     plt.xlabel('Iteration number')
224 |     plt.show()
225 | 
226 | def compute_test_error(X, Y, theta, temp_parameter):
227 |     error_count = 0.
228 |     assigned_labels = get_classification(X, theta, temp_parameter)
229 |     return 1 - np.mean(assigned_labels == Y)
230 | 


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part1/svm.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.svm import LinearSVC
 3 | 
 4 | 
 5 | ### Functions for you to fill in ###
 6 | 
 7 | #pragma: coderesponse template
 8 | def one_vs_rest_svm(train_x, train_y, test_x):
 9 |     """
10 |     Trains a linear SVM for binary classifciation
11 | 
12 |     Args:
13 |         train_x - (n, d) NumPy array (n datapoints each with d features)
14 |         train_y - (n, ) NumPy array containing the labels (0 or 1) for each training data point
15 |         test_x - (m, d) NumPy array (m datapoints each with d features)
16 |     Returns:
17 |         pred_test_y - (m,) NumPy array containing the labels (0 or 1) for each test data point
18 |     """
19 |     
20 |     clf = LinearSVC(C=0.1, random_state=0)
21 |     clf.fit(train_x, train_y)
22 | 
23 |     return clf.predict(test_x)
24 | #pragma: coderesponse end
25 |     
26 |     
27 | 
28 | 
29 | #pragma: coderesponse template
30 | def multi_class_svm(train_x, train_y, test_x):
31 |     """
32 |     Trains a linear SVM for multiclass classifciation using a one-vs-rest strategy
33 | 
34 |     Args:
35 |         train_x - (n, d) NumPy array (n datapoints each with d features)
36 |         train_y - (n, ) NumPy array containing the labels (int) for each training data point
37 |         test_x - (m, d) NumPy array (m datapoints each with d features)
38 |     Returns:
39 |         pred_test_y - (m,) NumPy array containing the labels (int) for each test data point
40 |     """
41 | 
42 |     clf = LinearSVC(C=0.1, random_state=0)
43 |     clf.fit(train_x, train_y)
44 | 
45 |     return clf.predict(test_x)
46 | #pragma: coderesponse end
47 | 
48 | 
49 | def compute_test_error_svm(test_y, pred_test_y):
50 | 
51 |     return 1 - np.mean(test_y == pred_test_y)
52 | 


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part1/test.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import time
  4 | import traceback
  5 | import numpy as np
  6 | import linear_regression
  7 | import svm
  8 | import softmax
  9 | import features
 10 | import kernel
 11 | 
 12 | sys.path.append("..")
 13 | import utils
 14 | 
 15 | verbose = False
 16 | 
 17 | epsilon = 1e-6
 18 | 
 19 | def green(s):
 20 |     return '\033[1;32m%s\033[m' % s
 21 | 
 22 | def yellow(s):
 23 |     return '\033[1;33m%s\033[m' % s
 24 | 
 25 | def red(s):
 26 |     return '\033[1;31m%s\033[m' % s
 27 | 
 28 | def log(*m):
 29 |     print(" ".join(map(str, m)))
 30 | 
 31 | def log_exit(*m):
 32 |     log(red("ERROR:"), *m)
 33 |     exit(1)
 34 | 
 35 | 
 36 | def check_real(ex_name, f, exp_res, *args):
 37 |     try:
 38 |         res = f(*args)
 39 |     except NotImplementedError:
 40 |         log(red("FAIL"), ex_name, ": not implemented")
 41 |         return True
 42 |     if not np.isreal(res):
 43 |         log(red("FAIL"), ex_name, ": does not return a real number, type: ", type(res))
 44 |         return True
 45 |     if not -epsilon < res - exp_res < epsilon:
 46 |         log(red("FAIL"), ex_name, ": incorrect answer. Expected", exp_res, ", got: ", res)
 47 |         return True
 48 | 
 49 | 
 50 | def equals(x, y):
 51 |     if type(y) == np.ndarray:
 52 |         return (np.abs(x - y) < epsilon).all()
 53 |     return -epsilon < x - y < epsilon
 54 | 
 55 | def check_tuple(ex_name, f, exp_res, *args, **kwargs):
 56 |     try:
 57 |         res = f(*args, **kwargs)
 58 |     except NotImplementedError:
 59 |         log(red("FAIL"), ex_name, ": not implemented")
 60 |         return True
 61 |     if not type(res) == tuple:
 62 |         log(red("FAIL"), ex_name, ": does not return a tuple, type: ", type(res))
 63 |         return True
 64 |     if not len(res) == len(exp_res):
 65 |         log(red("FAIL"), ex_name, ": expected a tuple of size ", len(exp_res), " but got tuple of size", len(res))
 66 |         return True
 67 |     if not all(equals(x, y) for x, y in zip(res, exp_res)):
 68 |         log(red("FAIL"), ex_name, ": incorrect answer. Expected", exp_res, ", got: ", res)
 69 |         return True
 70 | 
 71 | def check_array(ex_name, f, exp_res, *args):
 72 |     try:
 73 |         res = f(*args)
 74 |     except NotImplementedError:
 75 |         log(red("FAIL"), ex_name, ": not implemented")
 76 |         return True
 77 |     if not type(res) == np.ndarray:
 78 |         log(red("FAIL"), ex_name, ": does not return a numpy array, type: ", type(res))
 79 |         return True
 80 |     if not len(res) == len(exp_res):
 81 |         log(red("FAIL"), ex_name, ": expected an array of shape ", exp_res.shape, " but got array of shape", res.shape)
 82 |         return True
 83 |     if not equals(res, exp_res):
 84 |         log(red("FAIL"), ex_name, ": incorrect answer. Expected", exp_res, ", got: ", res)
 85 | 
 86 |         return True
 87 | 
 88 | def check_list(ex_name, f, exp_res, *args):
 89 |     try:
 90 |         res = f(*args)
 91 |     except NotImplementedError:
 92 |         log(red("FAIL"), ex_name, ": not implemented")
 93 |         return True
 94 |     if not type(res) == list:
 95 |         log(red("FAIL"), ex_name, ": does not return a list, type: ", type(res))
 96 |         return True
 97 |     if not len(res) == len(exp_res):
 98 |         log(red("FAIL"), ex_name, ": expected a list of size ", len(exp_res), " but got list of size", len(res))
 99 |         return True
100 |     if not all(equals(x, y) for x, y in zip(res, exp_res)):
101 |         log(red("FAIL"), ex_name, ": incorrect answer. Expected", exp_res, ", got: ", res)
102 |         return True
103 | 
104 | def check_get_mnist():
105 |     ex_name = "Get MNIST data"
106 |     train_x, train_y, test_x, test_y = utils.get_MNIST_data()
107 |     log(green("PASS"), ex_name, "")
108 | 
109 | 
110 | def check_closed_form():
111 |     ex_name = "Closed form"
112 |     X = np.arange(1, 16).reshape(3, 5)
113 |     Y = np.arange(1, 4)
114 |     lambda_factor = 0.5
115 |     exp_res = np.array([-0.03411225,  0.00320187,  0.04051599,  0.07783012,  0.11514424])
116 |     if check_array(
117 |             ex_name, linear_regression.closed_form,
118 |             exp_res, X, Y, lambda_factor):
119 |         return
120 | 
121 |     log(green("PASS"), ex_name, "")
122 | 
123 | def check_svm():
124 |     ex_name = "One vs rest SVM"
125 |     n, m, d = 5, 3, 7
126 |     train_x = np.random.random((n, d))
127 |     test_x = train_x[:m]
128 |     train_y = np.zeros(n)
129 |     train_y[-1] = 1
130 |     exp_res = np.zeros(m)
131 | 
132 |     if check_array(
133 |             ex_name, svm.one_vs_rest_svm,
134 |             exp_res, train_x, train_y, test_x):
135 |         return
136 | 
137 |     train_y = np.ones(n)
138 |     train_y[-1] = 0
139 |     exp_res = np.ones(m)
140 | 
141 |     if check_array(
142 |             ex_name, svm.one_vs_rest_svm,
143 |             exp_res, train_x, train_y, test_x):
144 |         return
145 | 
146 |     log(green("PASS"), ex_name, "")
147 | 
148 | 
149 | def check_compute_probabilities():
150 |     ex_name = "Compute probabilities"
151 |     n, d, k = 3, 5, 7
152 |     X = np.arange(0, n * d).reshape(n, d)
153 |     zeros = np.zeros((k, d))
154 |     temp = 0.2
155 |     exp_res = np.ones((k, n)) / k
156 |     if check_array(
157 |             ex_name, softmax.compute_probabilities,
158 |             exp_res, X, zeros, temp):
159 |         return
160 | 
161 |     theta = np.arange(0, k * d).reshape(k, d)
162 |     softmax.compute_probabilities(X, theta, temp)
163 |     exp_res = np.zeros((k, n))
164 |     exp_res[-1] = 1
165 |     if check_array(
166 |             ex_name, softmax.compute_probabilities,
167 |             exp_res, X, theta, temp):
168 |         return
169 | 
170 |     log(green("PASS"), ex_name, "")
171 | 
172 | def check_compute_cost_function():
173 |     ex_name = "Compute cost function"
174 |     n, d, k = 3, 5, 7
175 |     X = np.arange(0, n * d).reshape(n, d)
176 |     Y = np.arange(0, n)
177 |     zeros = np.zeros((k, d))
178 |     temp = 0.2
179 |     lambda_factor = 0.5
180 |     exp_res = 1.9459101490553135
181 |     if check_real(
182 |             ex_name, softmax.compute_cost_function,
183 |             exp_res, X, Y, zeros, lambda_factor, temp):
184 |         return
185 |     log(green("PASS"), ex_name, "")
186 | 
187 | def check_run_gradient_descent_iteration():
188 |     ex_name = "Run gradient descent iteration"
189 |     n, d, k = 3, 5, 7
190 |     X = np.arange(0, n * d).reshape(n, d)
191 |     Y = np.arange(0, n)
192 |     zeros = np.zeros((k, d))
193 |     alpha = 2
194 |     temp = 0.2
195 |     lambda_factor = 0.5
196 |     exp_res = np.zeros((k, d))
197 |     exp_res = np.array([
198 |        [ -7.14285714,  -5.23809524,  -3.33333333,  -1.42857143, 0.47619048],
199 |        [  9.52380952,  11.42857143,  13.33333333,  15.23809524, 17.14285714],
200 |        [ 26.19047619,  28.0952381 ,  30.        ,  31.9047619 , 33.80952381],
201 |        [ -7.14285714,  -8.57142857, -10.        , -11.42857143, -12.85714286],
202 |        [ -7.14285714,  -8.57142857, -10.        , -11.42857143, -12.85714286],
203 |        [ -7.14285714,  -8.57142857, -10.        , -11.42857143, -12.85714286],
204 |        [ -7.14285714,  -8.57142857, -10.        , -11.42857143, -12.85714286]
205 |     ])
206 | 
207 |     if check_array(
208 |             ex_name, softmax.run_gradient_descent_iteration,
209 |             exp_res, X, Y, zeros, alpha, lambda_factor, temp):
210 |         return
211 |     softmax.run_gradient_descent_iteration(X, Y, zeros, alpha, lambda_factor, temp)
212 |     log(green("PASS"), ex_name, "")
213 | 
214 | def check_update_y():
215 |     ex_name = "Update y"
216 |     train_y = np.arange(0, 10)
217 |     test_y = np.arange(9, -1, -1)
218 |     exp_res = (
219 |             np.array([0, 1, 2, 0, 1, 2, 0, 1, 2, 0]),
220 |             np.array([0, 2, 1, 0, 2, 1, 0, 2, 1, 0])
221 |             )
222 |     if check_tuple(
223 |             ex_name, softmax.update_y,
224 |             exp_res, train_y, test_y):
225 |         return
226 |     log(green("PASS"), ex_name, "")
227 | 
228 | 
229 | def check_project_onto_PC():
230 |     ex_name = "Project onto PC"
231 |     X = np.array([
232 |         [0, 1, 2, 3 ],
233 |         [1, 2, 3, 4 ],
234 |         [2, 3, 4, 5 ],
235 |     ]);
236 |     pcs = features.principal_components(X)
237 |     exp_res = np.array([
238 |         [-2, 0, 0],
239 |         [0, 0, 0],
240 |         [2, 0, 0]
241 |     ])
242 |     n_components = 3
243 |     if check_array(
244 |             ex_name, features.project_onto_PC,
245 |             exp_res, X, pcs, n_components):
246 |         return
247 |     log(green("PASS"), ex_name, "")
248 | 
249 | 
250 | def check_polynomial_kernel():
251 |     ex_name = "Polynomial kernel"
252 |     n, m, d = 3, 5, 7
253 |     c = 1
254 |     p = 2
255 |     X = np.random.random((n, d))
256 |     Y = np.random.random((m, d))
257 |     try:
258 |         K = kernel.polynomial_kernel(X, Y, c, d)
259 |     except NotImplementedError:
260 |         log(red("FAIL"), ex_name, ": not implemented")
261 |         return True
262 |     for i in range(n):
263 |         for j in range(m):
264 |             exp = (X[i] @ Y[j] + c) ** d
265 |             got = K[i][j]
266 |             if (not equals(exp, got)):
267 |                 log(
268 |                     red("FAIL"), ex_name,
269 |                     ": values at ({}, {}) do not match. Expected {}, got {}"
270 |                     .format(i, j, exp, got)
271 |                 )
272 |     log(green("PASS"), ex_name, "")
273 | 
274 | 
275 | 
276 | def check_rbf_kernel():
277 |     ex_name = "RBF kernel"
278 |     n, m, d = 3, 5, 7
279 |     gamma = 0.5
280 |     X = np.random.random((n, d))
281 |     Y = np.random.random((m, d))
282 |     try:
283 |         K = kernel.rbf_kernel(X, Y, gamma)
284 |     except NotImplementedError:
285 |         log(red("FAIL"), ex_name, ": not implemented")
286 |         return True
287 |     for i in range(n):
288 |         for j in range(m):
289 |             exp = np.exp(-gamma * (np.linalg.norm(X[i] - Y[j]) ** 2))
290 |             got = K[i][j]
291 |             if (not equals(exp, got)):
292 |                 log(
293 |                     red("FAIL"), ex_name,
294 |                     ": values at ({}, {}) do not match. Expected {}, got {}"
295 |                     .format(i, j, exp, got)
296 |                 )
297 |     log(green("PASS"), ex_name, "")
298 | 
299 | 
300 | 
301 | def main():
302 |     log(green("PASS"), "Import mnist project")
303 |     try:
304 |         check_get_mnist()
305 |         check_closed_form()
306 |         check_svm()
307 |         check_compute_probabilities()
308 |         check_compute_cost_function()
309 |         check_run_gradient_descent_iteration()
310 |         check_update_y()
311 |         check_project_onto_PC()
312 |         check_polynomial_kernel()
313 |         check_rbf_kernel()
314 |     except Exception:
315 |         log_exit(traceback.format_exc())
316 | 
317 | if __name__ == "__main__":
318 |     main()
319 | 


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part1/theta.pkl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part1/theta.pkl.gz


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-mnist/__pycache__/train_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-mnist/__pycache__/train_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-mnist/mnist_model_fully_connected.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-mnist/mnist_model_fully_connected.pt


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-mnist/nnet_cnn.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | import _pickle as c_pickle, gzip
 4 | import numpy as np
 5 | from tqdm import tqdm
 6 | import torch
 7 | import torch.autograd as autograd
 8 | import torch.nn.functional as F
 9 | import torch.nn as nn
10 | import sys
11 | sys.path.append("..")
12 | import utils
13 | from utils import *
14 | from train_utils import batchify_data, run_epoch, train_model, Flatten
15 | 
16 | def main():
17 |     # Load the dataset
18 |     num_classes = 10
19 |     X_train, y_train, X_test, y_test = get_MNIST_data()
20 | 
21 |     # We need to rehape the data back into a 1x28x28 image
22 |     X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28))
23 |     X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28))
24 | 
25 |     # Split into train and dev
26 |     dev_split_index = int(9 * len(X_train) / 10)
27 |     X_dev = X_train[dev_split_index:]
28 |     y_dev = y_train[dev_split_index:]
29 |     X_train = X_train[:dev_split_index]
30 |     y_train = y_train[:dev_split_index]
31 | 
32 |     permutation = np.array([i for i in range(len(X_train))])
33 |     np.random.shuffle(permutation)
34 |     X_train = [X_train[i] for i in permutation]
35 |     y_train = [y_train[i] for i in permutation]
36 | 
37 |     # Split dataset into batches
38 |     batch_size = 32
39 |     train_batches = batchify_data(X_train, y_train, batch_size)
40 |     dev_batches = batchify_data(X_dev, y_dev, batch_size)
41 |     test_batches = batchify_data(X_test, y_test, batch_size)
42 | 
43 |     #################################
44 |     ## Model specification TODO
45 | #pragma: coderesponse template name="pytorchcnn" dedent="true"
46 |     model = nn.Sequential(
47 |               nn.Conv2d(1, 32, (3, 3)),
48 |               nn.ReLU(),
49 |               nn.MaxPool2d((2, 2)),
50 | 
51 |               nn.Conv2d(32, 64, (3, 3)),
52 |               nn.ReLU(),
53 |               nn.MaxPool2d((2, 2)),
54 | 
55 |               Flatten(),
56 |               
57 |               nn.Linear(1600, 128),
58 |               nn.Dropout(p = 0.5),
59 |               nn.Linear(128, 10)
60 |             )
61 | #pragma: coderesponse end
62 |     ##################################
63 | 
64 |     train_model(train_batches, dev_batches, model, nesterov=True)
65 | 
66 |     ## Evaluate the model on test data
67 |     loss, accuracy = run_epoch(test_batches, model.eval(), None)
68 | 
69 |     print ("Loss on test set:"  + str(loss) + " Accuracy on test set: " + str(accuracy))
70 | 
71 | 
72 | if __name__ == '__main__':
73 |     # Specify seed for deterministic behavior, then shuffle. Do not change seed for official submissions to edx
74 |     np.random.seed(12321)  # for reproducibility
75 |     torch.manual_seed(12321)
76 |     main()
77 | 


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-mnist/nnet_fc.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | import _pickle as cPickle, gzip
 4 | import numpy as np
 5 | from tqdm import tqdm
 6 | import torch
 7 | import torch.autograd as autograd
 8 | import torch.nn.functional as F
 9 | import torch.nn as nn
10 | import sys
11 | sys.path.append("..")
12 | import utils
13 | from utils import *
14 | from train_utils import batchify_data, run_epoch, train_model
15 | 
16 | def main():
17 |     # Load the dataset
18 |     num_classes = 10
19 |     X_train, y_train, X_test, y_test = get_MNIST_data()
20 | 
21 |     # Split into train and dev
22 |     dev_split_index = int(9 * len(X_train) / 10)
23 |     X_dev = X_train[dev_split_index:]
24 |     y_dev = y_train[dev_split_index:]
25 |     X_train = X_train[:dev_split_index]
26 |     y_train = y_train[:dev_split_index]
27 | 
28 |     permutation = np.array([i for i in range(len(X_train))])
29 |     np.random.shuffle(permutation)
30 |     X_train = [X_train[i] for i in permutation]
31 |     y_train = [y_train[i] for i in permutation]
32 | 
33 |     # Split dataset into batches
34 |     batch_size = 32
35 |     train_batches = batchify_data(X_train, y_train, batch_size)
36 |     dev_batches = batchify_data(X_dev, y_dev, batch_size)
37 |     test_batches = batchify_data(X_test, y_test, batch_size)
38 | 
39 |     #################################
40 |     ## Model specification TODO
41 |     model = nn.Sequential(
42 |               nn.Linear(784, 10),
43 |               nn.LeakyReLU(),
44 |               nn.Linear(10, 10),
45 |               
46 |             )
47 |     lr=0.1
48 |     momentum=0
49 |     ##################################
50 | 
51 |     train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum)
52 | 
53 |     ## Evaluate the model on test data
54 |     loss, accuracy = run_epoch(test_batches, model.eval(), None)
55 | 
56 |     print ("Loss on test set:"  + str(loss) + " Accuracy on test set: " + str(accuracy))
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     # Specify seed for deterministic behavior, then shuffle. Do not change seed for official submissions to edx
61 |     np.random.seed(12321)  # for reproducibility
62 |     torch.manual_seed(12321)  # for reproducibility
63 |     main()
64 | 


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-mnist/train_utils.py:
--------------------------------------------------------------------------------
 1 | """Training utilities."""
 2 | 
 3 | from tqdm import tqdm
 4 | import numpy as np
 5 | import torch
 6 | import torch.nn.functional as F
 7 | import torch.nn as nn
 8 | 
 9 | 
10 | class Flatten(nn.Module):
11 |     """A custom layer that views an input as 1D."""
12 | 
13 |     def forward(self, input):
14 |         return input.view(input.size(0), -1)
15 | 
16 | # Helpers
17 | def batchify_data(x_data, y_data, batch_size):
18 |     """Takes a set of data points and labels and groups them into batches."""
19 |     # Only take batch_size chunks (i.e. drop the remainder)
20 |     N = int(len(x_data) / batch_size) * batch_size
21 |     batches = []
22 |     for i in range(0, N, batch_size):
23 |         batches.append({
24 |             'x': torch.tensor(x_data[i:i+batch_size], dtype=torch.float32),
25 |             'y': torch.tensor(y_data[i:i+batch_size], dtype=torch.long
26 |         )})
27 |     return batches
28 | 
29 | def compute_accuracy(predictions, y):
30 |     """Computes the accuracy of predictions against the gold labels, y."""
31 |     return np.mean(np.equal(predictions.numpy(), y.numpy()))
32 | 
33 | 
34 | # Training Procedure
35 | def train_model(train_data, dev_data, model, lr=0.01, momentum=0.9, nesterov=False, n_epochs=30):
36 |     """Train a model for N epochs given data and hyper-params."""
37 |     # We optimize with SGD
38 |     optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, nesterov=nesterov)
39 | 
40 |     for epoch in range(1, 11):
41 |         print("-------------\nEpoch {}:\n".format(epoch))
42 | 
43 | 
44 |         # Run **training***
45 |         loss, acc = run_epoch(train_data, model.train(), optimizer)
46 |         print('Train loss: {:.6f} | Train accuracy: {:.6f}'.format(loss, acc))
47 | 
48 |         # Run **validation**
49 |         val_loss, val_acc = run_epoch(dev_data, model.eval(), optimizer)
50 |         print('Val loss:   {:.6f} | Val accuracy:   {:.6f}'.format(val_loss, val_acc))
51 |         # Save model
52 |         torch.save(model, 'mnist_model_fully_connected.pt')
53 |     return val_acc
54 | 
55 | def run_epoch(data, model, optimizer):
56 |     """Train model for one pass of train data, and return loss, acccuracy"""
57 |     # Gather losses
58 |     losses = []
59 |     batch_accuracies = []
60 | 
61 |     # If model is in train mode, use optimizer.
62 |     is_training = model.training
63 | 
64 |     # Iterate through batches
65 |     for batch in tqdm(data):
66 |         # Grab x and y
67 |         x, y = batch['x'], batch['y']
68 | 
69 |         # Get output predictions
70 |         out = model(x)
71 | 
72 |         # Predict and store accuracy
73 |         predictions = torch.argmax(out, dim=1)
74 |         batch_accuracies.append(compute_accuracy(predictions, y))
75 | 
76 |         # Compute loss
77 |         loss = F.cross_entropy(out, y)
78 |         losses.append(loss.data.item())
79 | 
80 |         # If training, do an update.
81 |         if is_training:
82 |             optimizer.zero_grad()
83 |             loss.backward()
84 |             optimizer.step()
85 | 
86 |     # Calculate epoch level scores
87 |     avg_loss = np.mean(losses)
88 |     avg_accuracy = np.mean(batch_accuracies)
89 |     return avg_loss, avg_accuracy
90 | 


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-nn/neural_nets.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | 
  4 | """
  5 |  ==================================
  6 |  Problem 3: Neural Network Basics
  7 |  ==================================
  8 |     Generates a neural network with the following architecture:
  9 |         Fully connected neural network.
 10 |         Input vector takes in two features.
 11 |         One hidden layer with three neurons whose activation function is ReLU.
 12 |         One output neuron whose activation function is the identity function.
 13 | """
 14 | 
 15 | 
 16 | #pragma: coderesponse template
 17 | def rectified_linear_unit(x):
 18 |     """ Returns the ReLU of x, or the maximum between 0 and x."""
 19 |     if x >= 0:
 20 |         return x
 21 |     else:
 22 |         return 0
 23 | #pragma: coderesponse end
 24 | 
 25 | #pragma: coderesponse template
 26 | def rectified_linear_unit_derivative(x):
 27 |     """ Returns the derivative of ReLU."""
 28 |     if x > 0:
 29 |         return 1
 30 |     else:
 31 |         return 0
 32 | 
 33 | #pragma: coderesponse end
 34 | 
 35 | def output_layer_activation(x):
 36 |     """ Linear function, returns input as is. """
 37 |     # z = f(x) = x
 38 |     return x
 39 | 
 40 | def output_layer_activation_derivative(x):
 41 |     """ Returns the derivative of a linear function: 1. """
 42 |     # dL/dL = 1 
 43 |     # a general setup of derivative 
 44 |     return 1
 45 | 
 46 | class NeuralNetwork():
 47 |     """
 48 |         Contains the following functions:
 49 |             -train: tunes parameters of the neural network based on error obtained from forward propagation.
 50 |             -predict: predicts the label of a feature vector based on the class's parameters.
 51 |             -train_neural_network: trains a neural network over all the data points for the specified number of epochs during initialization of the class.
 52 |             -test_neural_network: uses the parameters specified at the time in order to test that the neural network classifies the points given in testing_points within a margin of error.
 53 |     """
 54 | 
 55 |     def __init__(self):
 56 | 
 57 |         # DO NOT CHANGE PARAMETERS
 58 |         self.input_to_hidden_weights = np.matrix('1 1; 1 1; 1 1')
 59 |         self.hidden_to_output_weights = np.matrix('1 1 1')
 60 |         self.biases = np.matrix('0; 0; 0')
 61 |         self.learning_rate = .001
 62 |         self.epochs_to_train = 10
 63 |         self.training_points = [((2,1), 10), ((3,3), 21), ((4,5), 32), ((6, 6), 42)]
 64 |         self.testing_points = [(1,1), (2,2), (3,3), (5,5), (10,10)]
 65 | 
 66 | #pragma: coderesponse template prefix="class NeuralNetwork(NeuralNetworkBase):\n\n"
 67 |     def train(self, x1, x2, y):
 68 | 
 69 |         ### Forward propagation ###
 70 |         input_values = np.matrix([[x1],[x2]]) # (2,1) (2 by 1)
 71 | 
 72 |         # Calculate the input and activation of the hidden layer
 73 |         hidden_layer_weighted_input = np.dot(self.input_to_hidden_weights, input_values) + self.biases # TODO (3,1) z
 74 | 
 75 |         # vectorize function that only take scalar
 76 |         hidden_layer_activation = np.vectorize(rectified_linear_unit)(hidden_layer_weighted_input) # TODO (3,1) f(z) = ReLU(z)
 77 | 
 78 |         # keep matrix manipulation
 79 |         # no need to convert to array
 80 |         output =  np.dot(self.hidden_to_output_weights, hidden_layer_activation) # TODO (1,1) u1 = f(z1)*V11 + f(z1)*V21 + f(z1)*V31    
 81 |         activated_output = np.vectorize(output_layer_activation)(output) # TODO (1,1) f(u1) = o1
 82 | 
 83 |         ### Backpropagation ###
 84 | 
 85 |         # Compute gradients
 86 |         # error is defined as gradient at each layer before activation 
 87 | 
 88 |         # dL/du1 = dL/do1 * do1/du1 = dL/do1 * df(u1)/du1
 89 |         output_layer_error = (y - activated_output) * np.vectorize(output_layer_activation_derivative)(output) # TODO (1,1) 
 90 |         # dL/dz = dL/du1 * du1/df(z) * df(z)/dz) 
 91 |         hidden_layer_error = np.multiply(np.vectorize(rectified_linear_unit_derivative)(hidden_layer_weighted_input), self.hidden_to_output_weights.transpose()) * output_layer_error # TODO (3,1)  
 92 |         
 93 |         # dL/db = dL/dz * dz/db
 94 |         bias_gradients = hidden_layer_error # TODO (3,1) 
 95 |         # dL/dw = dL/dz * dz/dw
 96 |         input_to_hidden_weight_gradients = hidden_layer_error * input_values.transpose() # TODO (3,2)
 97 |         # dL/dV = dL/du1 * du1/dV
 98 |         hidden_to_output_weight_gradients = output_layer_error * hidden_layer_activation.transpose() # TODO (1,3)        
 99 | 
100 |         # Use gradients to adjust weights and biases using gradient descent
101 |         self.biases += self.learning_rate * bias_gradients # TODO (3,1)
102 |         self.input_to_hidden_weights +=  self.learning_rate * input_to_hidden_weight_gradients # TODO (3,2)
103 |         self.hidden_to_output_weights += self.learning_rate * hidden_to_output_weight_gradients # TODO (1,3)
104 | 
105 | #pragma: coderesponse end
106 | 
107 | 
108 | #pragma: coderesponse template prefix="class NeuralNetwork(NeuralNetworkBase):\n\n"
109 |     def predict(self, x1, x2):
110 | 
111 |         input_values = np.matrix([[x1],[x2]])
112 | 
113 |         # Compute output for a single input(should be same as the forward propagation in training)
114 |         hidden_layer_weighted_input = np.dot(self.input_to_hidden_weights, input_values) + self.biases # TODO
115 |         hidden_layer_activation = np.vectorize(rectified_linear_unit)(hidden_layer_weighted_input) # TODO
116 |         output = np.dot(self.hidden_to_output_weights, hidden_layer_activation) # TODO
117 |         activated_output = np.vectorize(output_layer_activation)(output) # TODO
118 | 
119 |         return activated_output.item()
120 | 
121 | 
122 | #pragma: coderesponse end
123 | 
124 |     # Run this to train your neural network once you complete the train method
125 |     def train_neural_network(self):
126 | 
127 |         for epoch in range(self.epochs_to_train):
128 |             for x,y in self.training_points:
129 |                 self.train(x[0], x[1], y)
130 | 
131 |     # Run this to test your neural network implementation for correctness after it is trained
132 |     def test_neural_network(self):
133 | 
134 |         for point in self.testing_points:
135 |             print("Point,", point, "Prediction,", self.predict(point[0], point[1]))
136 |             if abs(self.predict(point[0], point[1]) - 7*point[0]) < 0.1:
137 |                 print("Test Passed")
138 |             else:
139 |                 print("Point ", point[0], point[1], " failed to be predicted correctly.")
140 |                 return
141 | 
142 | x = NeuralNetwork()
143 | x.train
144 | 
145 | # UNCOMMENT THE LINE BELOW TO TEST YOUR NEURAL NETWORK
146 | x.test_neural_network()
147 | 


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/__pycache__/train_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/__pycache__/train_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/__pycache__/utils_multiMNIST.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/__pycache__/utils_multiMNIST.cpython-36.pyc


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/conv.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from train_utils import batchify_data, run_epoch, train_model, Flatten
 6 | import utils_multiMNIST as U
 7 | path_to_data_dir = '../Datasets/'
 8 | use_mini_dataset = True
 9 | 
10 | batch_size = 64
11 | nb_classes = 10
12 | nb_epoch = 30
13 | num_classes = 10
14 | img_rows, img_cols = 42, 28 # input image dimensions
15 | 
16 | 
17 | 
18 | #pragma: coderesponse template name="cnn"
19 | class CNN(nn.Module):
20 | 
21 |     def __init__(self, input_dimension):
22 |         super(CNN, self).__init__()
23 |         # TODO initialize model layers here
24 | 
25 |         self.conv2d_1 = nn.Conv2d(1, 32, (3, 3))
26 |         self.relu = nn.ReLU()
27 |         self.maxpool2d = nn.MaxPool2d((2,2))
28 |         self.conv2d_2 = nn.Conv2d(32, 64, (3, 3))
29 |         self.flatten = Flatten() 
30 |         self.linear1 = nn.Linear(2880, 64)
31 |         self.dropout = nn.Dropout(p = 0.5)
32 |         self.linear2 = nn.Linear(64, 20)
33 | 
34 | 
35 |     def forward(self, x):
36 | 
37 |         # TODO use model layers to predict the two digits
38 | 
39 |         x = self.conv2d_1(x)
40 |         x = self.relu(x)
41 |         x = self.maxpool2d(x) 
42 |         x = self.conv2d_2(x)
43 |         x = self.relu(x)
44 |         x = self.maxpool2d(x)
45 |         x = self.flatten(x)
46 |         x = self.linear1(x)
47 |         x = self.dropout(x)
48 |         x = self.linear2(x)
49 | 
50 |         out_first_digit = x[:,:10]
51 |         out_second_digit = x[:,10:]
52 | 
53 | 
54 |         return out_first_digit, out_second_digit
55 | #pragma: coderesponse end
56 | 
57 | def main():
58 |     X_train, y_train, X_test, y_test = U.get_data(path_to_data_dir, use_mini_dataset)
59 | 
60 |     # Split into train and dev
61 |     dev_split_index = int(9 * len(X_train) / 10)
62 |     X_dev = X_train[dev_split_index:]
63 |     y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]]
64 |     X_train = X_train[:dev_split_index]
65 |     y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]]
66 | 
67 |     permutation = np.array([i for i in range(len(X_train))])
68 |     np.random.shuffle(permutation)
69 |     X_train = [X_train[i] for i in permutation]
70 |     y_train = [[y_train[0][i] for i in permutation], [y_train[1][i] for i in permutation]]
71 | 
72 |     # Split dataset into batches
73 |     train_batches = batchify_data(X_train, y_train, batch_size)
74 |     dev_batches = batchify_data(X_dev, y_dev, batch_size)
75 |     test_batches = batchify_data(X_test, y_test, batch_size)
76 | 
77 |     # Load model
78 |     input_dimension = img_rows * img_cols
79 |     model = CNN(input_dimension) # TODO add proper layers to CNN class above
80 | 
81 |     # Train
82 |     train_model(train_batches, dev_batches, model)
83 | 
84 |     ## Evaluate the model on test data
85 |     loss, acc = run_epoch(test_batches, model.eval(), None)
86 |     print('Test loss1: {:.6f}  accuracy1: {:.6f}  loss2: {:.6f}   accuracy2: {:.6f}'.format(loss[0], acc[0], loss[1], acc[1]))
87 | 
88 | if __name__ == '__main__':
89 |     # Specify seed for deterministic behavior, then shuffle. Do not change seed for official submissions to edx
90 |     np.random.seed(12321)  # for reproducibility
91 |     torch.manual_seed(12321)  # for reproducibility
92 |     main()
93 | 


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/mlp.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from train_utils import batchify_data, run_epoch, train_model, Flatten
 6 | import utils_multiMNIST as U
 7 | path_to_data_dir = '../Datasets/'
 8 | use_mini_dataset = True
 9 | 
10 | batch_size = 64
11 | nb_classes = 10
12 | nb_epoch = 30
13 | num_classes = 10
14 | img_rows, img_cols = 42, 28 # input image dimensions
15 | 
16 | #pragma: coderesponse template name="mlp"
17 | class MLP(nn.Module):
18 | 
19 |     def __init__(self, input_dimension):
20 |         super(MLP, self).__init__()
21 |         self.flatten = Flatten()
22 | 
23 |         # TODO initialize model layers here
24 |         
25 |         self.flatten = Flatten()
26 |         self.linear1 = nn.Linear(input_dimension, 64)
27 |         self.linear2 = nn.Linear(64, 20)
28 |         self.softmax = nn.Softmax()
29 | 
30 | 
31 |     def forward(self, x):
32 |         xf = self.flatten(x)
33 | 
34 |         # TODO use model layers to predict the two digits
35 | 
36 |         xr = self.linear1(xf)
37 |         xl2 = self.linear2(xr)
38 |         out_first_digit = self.softmax(xl2[:,:10])
39 |         out_second_digit = self.softmax(xl2[:,10:]) 
40 |         xl1 = self.linear1(xf)
41 |         xl2 = self.linear2(xl1)
42 |         out_first_digit = xl2[:,:10]
43 |         out_second_digit = xl2[:,10:]
44 | 
45 |         return out_first_digit, out_second_digit
46 |         
47 | #pragma: coderesponse end
48 | 
49 | def main():
50 |     X_train, y_train, X_test, y_test = U.get_data(path_to_data_dir, use_mini_dataset)
51 | 
52 |     # Split into train and dev
53 |     dev_split_index = int(9 * len(X_train) / 10)
54 |     X_dev = X_train[dev_split_index:]
55 |     y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]]
56 |     X_train = X_train[:dev_split_index]
57 |     y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]]
58 | 
59 |     permutation = np.array([i for i in range(len(X_train))])
60 |     np.random.shuffle(permutation)
61 |     X_train = [X_train[i] for i in permutation]
62 |     y_train = [[y_train[0][i] for i in permutation], [y_train[1][i] for i in permutation]]
63 | 
64 |     # Split dataset into batches
65 |     train_batches = batchify_data(X_train, y_train, batch_size)
66 |     dev_batches = batchify_data(X_dev, y_dev, batch_size)
67 |     test_batches = batchify_data(X_test, y_test, batch_size)
68 | 
69 |     # Load model
70 |     input_dimension = img_rows * img_cols
71 |     model = MLP(input_dimension) # TODO add proper layers to MLP class above
72 | 
73 |     # Train
74 |     train_model(train_batches, dev_batches, model)
75 | 
76 |     ## Evaluate the model on test data
77 |     loss, acc = run_epoch(test_batches, model.eval(), None)
78 |     print('Test loss1: {:.6f}  accuracy1: {:.6f}  loss2: {:.6f}   accuracy2: {:.6f}'.format(loss[0], acc[0], loss[1], acc[1]))
79 | 
80 | if __name__ == '__main__':
81 |     # Specify seed for deterministic behavior, then shuffle. Do not change seed for official submissions to edx
82 |     np.random.seed(12321)  # for reproducibility
83 |     torch.manual_seed(12321)  # for reproducibility
84 |     main()
85 | 


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20000.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20001.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20002.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20003.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20004.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20005.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20005.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20006.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20006.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20007.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20007.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20008.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20008.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20009.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20009.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20010.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20010.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20011.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20011.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20012.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20012.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20013.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20013.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20014.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20014.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20015.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20015.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20016.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20016.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20017.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20017.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20018.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20018.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20019.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20019.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20020.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20020.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20021.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20021.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20022.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20022.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20023.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20023.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20024.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20024.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20025.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20025.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20026.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20026.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20027.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20027.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20028.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20028.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20029.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20029.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20030.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20030.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20031.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20031.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20032.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20032.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20033.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20033.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20034.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20034.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20035.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20035.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20036.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20036.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20037.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20037.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20038.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20038.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20039.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20039.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20040.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20040.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20041.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20041.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20042.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20042.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20043.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20043.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20044.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20044.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20045.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20045.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20046.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20046.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20047.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20047.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20048.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20048.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20049.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20049.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20050.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20050.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20051.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20051.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20052.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20052.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20053.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20053.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20054.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20054.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20055.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20055.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20056.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20056.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20057.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20057.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20058.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20058.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20059.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20059.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20060.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20060.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20061.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20061.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20062.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20062.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20063.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20063.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20064.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20064.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20065.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20065.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20066.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20066.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20067.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20067.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20068.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20068.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20069.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20069.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20070.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20070.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20071.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20071.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20072.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20072.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20073.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20073.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20074.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20074.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20075.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20075.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20076.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20076.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20077.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20077.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20078.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20078.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20079.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20079.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20080.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20080.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20081.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20081.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20082.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20082.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20083.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20083.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20084.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20084.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20085.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20085.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20086.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20086.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20087.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20087.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20088.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20088.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20089.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20089.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20090.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20090.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20091.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20091.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20092.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20092.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20093.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20093.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20094.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20094.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20095.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20095.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20096.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20096.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20097.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20097.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20098.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20098.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20099.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist/mnist/part2-twodigit/sample_images/img20099.jpg


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/train_utils.py:
--------------------------------------------------------------------------------
  1 | """Training utilities."""
  2 | 
  3 | from tqdm import tqdm
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn.functional as F
  7 | import torch.nn as nn
  8 | 
  9 | 
 10 | class Flatten(nn.Module):
 11 |     """A custom layer that views an input as 1D."""
 12 |     
 13 |     def forward(self, input):
 14 |         return input.view(input.size(0), -1)
 15 | 
 16 | 
 17 | def batchify_data(x_data, y_data, batch_size):
 18 |     """Takes a set of data points and labels and groups them into batches."""
 19 |     # Only take batch_size chunks (i.e. drop the remainder)
 20 |     N = int(len(x_data) / batch_size) * batch_size
 21 |     batches = []
 22 |     for i in range(0, N, batch_size):
 23 |         batches.append({
 24 |             'x': torch.tensor(x_data[i:i + batch_size],
 25 |                               dtype=torch.float32),
 26 |             'y': torch.tensor([y_data[0][i:i + batch_size],
 27 |                                y_data[1][i:i + batch_size]],
 28 |                                dtype=torch.int64)
 29 |         })
 30 |     return batches
 31 | 
 32 | 
 33 | def compute_accuracy(predictions, y):
 34 |     """Computes the accuracy of predictions against the gold labels, y."""
 35 |     return np.mean(np.equal(predictions.numpy(), y.numpy()))
 36 | 
 37 | 
 38 | def train_model(train_data, dev_data, model, lr=0.01, momentum=0.9, nesterov=False, n_epochs=30):
 39 |     """Train a model for N epochs given data and hyper-params."""
 40 |     # We optimize with SGD
 41 |     optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, nesterov=nesterov)
 42 | 
 43 |     for epoch in range(1, n_epochs + 1):
 44 |         print("-------------\nEpoch {}:\n".format(epoch))
 45 | 
 46 |         # Run **training***
 47 |         loss, acc = run_epoch(train_data, model.train(), optimizer)
 48 |         print('Train | loss1: {:.6f}  accuracy1: {:.6f} | loss2: {:.6f}  accuracy2: {:.6f}'.format(loss[0], acc[0], loss[1], acc[1]))
 49 | 
 50 |         # Run **validation**
 51 |         val_loss, val_acc = run_epoch(dev_data, model.eval(), optimizer)
 52 |         print('Valid | loss1: {:.6f}  accuracy1: {:.6f} | loss2: {:.6f}  accuracy2: {:.6f}'.format(val_loss[0], val_acc[0], val_loss[1], val_acc[1]))
 53 | 
 54 |         # Save model
 55 |         torch.save(model, 'mnist_model_fully_connected.pt')
 56 | 
 57 | 
 58 | def run_epoch(data, model, optimizer):
 59 |     """Train model for one pass of train data, and return loss, acccuracy"""
 60 |     # Gather losses
 61 |     losses_first_label = []
 62 |     losses_second_label = []
 63 |     batch_accuracies_first = []
 64 |     batch_accuracies_second = []
 65 | 
 66 |     # If model is in train mode, use optimizer.
 67 |     is_training = model.training
 68 | 
 69 |     # Iterate through batches
 70 |     for batch in tqdm(data):
 71 |         # Grab x and y
 72 |         x, y = batch['x'], batch['y']
 73 | 
 74 |         # Get output predictions for both the upper and lower numbers
 75 |         out1, out2 = model(x)
 76 | 
 77 |         # Predict and store accuracy
 78 |         predictions_first_label = torch.argmax(out1, dim=1)
 79 |         predictions_second_label = torch.argmax(out2, dim=1)
 80 |         batch_accuracies_first.append(compute_accuracy(predictions_first_label, y[0]))
 81 |         batch_accuracies_second.append(compute_accuracy(predictions_second_label, y[1]))
 82 | 
 83 |         # Compute both losses
 84 |         loss1 = F.cross_entropy(out1, y[0])
 85 |         loss2 = F.cross_entropy(out2, y[1])
 86 |         losses_first_label.append(loss1.data.item())
 87 |         losses_second_label.append(loss2.data.item())
 88 | 
 89 |         # If training, do an update.
 90 |         if is_training:
 91 |             optimizer.zero_grad()
 92 |             joint_loss = 0.5 * (loss1 + loss2)
 93 |             joint_loss.backward()
 94 |             optimizer.step()
 95 | 
 96 |     # Calculate epoch level scores
 97 |     avg_loss = np.mean(losses_first_label), np.mean(losses_second_label)
 98 |     avg_accuracy = np.mean(batch_accuracies_first), np.mean(batch_accuracies_second)
 99 |     return avg_loss, avg_accuracy
100 | 


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/part2-twodigit/utils_multiMNIST.py:
--------------------------------------------------------------------------------
 1 | import gzip, _pickle, numpy as np
 2 | num_classes = 10
 3 | img_rows, img_cols = 42, 28
 4 | 
 5 | def get_data(path_to_data_dir, use_mini_dataset):
 6 | 	if use_mini_dataset:
 7 | 		exten = '_mini'
 8 | 	else:
 9 | 		exten = ''
10 | 	f = gzip.open(path_to_data_dir + 'train_multi_digit' + exten + '.pkl.gz', 'rb')
11 | 	X_train = _pickle.load(f, encoding='latin1')
12 | 	f.close()
13 | 	X_train =  np.reshape(X_train, (len(X_train), 1, img_rows, img_cols))
14 | 	f = gzip.open(path_to_data_dir + 'test_multi_digit' + exten +'.pkl.gz', 'rb')
15 | 	X_test = _pickle.load(f, encoding='latin1')
16 | 	f.close()
17 | 	X_test =  np.reshape(X_test, (len(X_test),1, img_rows, img_cols))
18 | 	f = gzip.open(path_to_data_dir + 'train_labels' + exten +'.txt.gz', 'rb')
19 | 	y_train = np.loadtxt(f)
20 | 	f.close()
21 | 	f = gzip.open(path_to_data_dir +'test_labels' + exten + '.txt.gz', 'rb')
22 | 	y_test = np.loadtxt(f)
23 | 	f.close()
24 | 	return X_train, y_train, X_test, y_test


--------------------------------------------------------------------------------
/P2P3/resources_mnist/mnist/utils.py:
--------------------------------------------------------------------------------
 1 | import pickle, gzip, numpy as np
 2 | import matplotlib.pyplot as plt
 3 | import matplotlib.cm as cm
 4 | import math
 5 | 
 6 | 
 7 | def plot_images(X):
 8 |     if X.ndim == 1:
 9 |         X = np.array([X])
10 |     num_images = X.shape[0]
11 |     num_rows = math.floor(math.sqrt(num_images))
12 |     num_cols = math.ceil(num_images/num_rows)
13 |     for i in range(num_images):
14 |         reshaped_image = X[i,:].reshape(28,28)
15 |         plt.subplot(num_rows, num_cols, i+1)
16 |         plt.imshow(reshaped_image, cmap = cm.Greys_r)
17 |         plt.axis('off')
18 |     plt.show()
19 | 
20 | 
21 | def pick_examples_of(X, Y, labels, total_count):
22 |     bool_arr = None
23 |     for label in labels:
24 |         bool_arr_for_label = (Y == label)
25 |         if bool_arr is None:
26 |             bool_arr = bool_arr_for_label
27 |         else:
28 |             bool_arr |= bool_arr_for_label
29 |     filtered_x = X[bool_arr]
30 |     filtered_y = Y[bool_arr]
31 |     return (filtered_x[:total_count], filtered_y[:total_count])
32 | 
33 | 
34 | def extract_training_and_test_examples_with_labels(train_x, train_y, test_x, test_y, labels, training_count, test_count):
35 |     filtered_train_x, filtered_train_y = pick_examples_of(train_x, train_y, labels, training_count)
36 |     filtered_test_x, filtered_test_y = pick_examples_of(test_x, test_y, labels, test_count)
37 |     return (filtered_train_x, filtered_train_y, filtered_test_x, filtered_test_y)
38 | 
39 | def write_pickle_data(data, file_name):
40 |     f = gzip.open(file_name, 'wb')
41 |     pickle.dump(data, f)
42 |     f.close()
43 | 
44 | def read_pickle_data(file_name):
45 |     f = gzip.open(file_name, 'rb')
46 |     data = pickle.load(f, encoding='latin1')
47 |     f.close()
48 |     return data
49 | 
50 | def get_MNIST_data():
51 |     """
52 |     Reads mnist dataset from file
53 | 
54 |     Returns:
55 |         train_x - 2D Numpy array (n, d) where each row is an image
56 |         train_y - 1D Numpy array (n, ) where each row is a label
57 |         test_x  - 2D Numpy array (n, d) where each row is an image
58 |         test_y  - 1D Numpy array (n, ) where each row is a label
59 | 
60 |     """
61 |     train_set, valid_set, test_set = read_pickle_data('../Datasets/mnist.pkl.gz')
62 |     train_x, train_y = train_set
63 |     valid_x, valid_y = valid_set
64 |     train_x = np.vstack((train_x, valid_x))
65 |     train_y = np.append(train_y, valid_y)
66 |     test_x, test_y = test_set
67 |     return (train_x, train_y, test_x, test_y)
68 | 
69 | def load_train_and_test_pickle(file_name):
70 |     train_x, train_y, test_x, test_y = read_pickle_data(file_name)
71 |     return train_x, train_y, test_x, test_y
72 | 
73 | # returns the feature set in a numpy ndarray
74 | def load_CSV(filename):
75 |     stuff = np.asarray(np.loadtxt(open(filename, 'rb'), delimiter=','))
76 |     return stuff
77 | 
78 | 
79 | ###############################
80 | 
81 | import numpy as np
82 | import scipy.io
83 | 
84 | 
85 | [train_x, train_y, test_x, test_y] = get_MNIST_data()
86 | 
87 | 
88 | # scipy.io.savemat('train_x.mat', dict(train_x = train_x))
89 | 
90 | scipy.io.savemat('train_x.mat', {'train_x': train_x})
91 | scipy.io.savemat('train_y.mat', {'train_y': train_y})
92 | scipy.io.savemat('test_x.mat', {'test_x': test_x})
93 | scipy.io.savemat('test_y.mat', {'test_y': test_y})
94 | 
95 | 
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/P2P3/resources_mnist_Abel.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist_Abel.rar


--------------------------------------------------------------------------------
/P2P3/resources_mnist_solution.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P2P3/resources_mnist_solution.tar.gz


--------------------------------------------------------------------------------
/P4 Collaborative Filtering via Gaussian Mixtures.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P4 Collaborative Filtering via Gaussian Mixtures.txt


--------------------------------------------------------------------------------
/P4/resources_netflix.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P4/resources_netflix.tar.gz


--------------------------------------------------------------------------------
/P4/resources_netflix/netflix/__pycache__/common.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P4/resources_netflix/netflix/__pycache__/common.cpython-36.pyc


--------------------------------------------------------------------------------
/P4/resources_netflix/netflix/__pycache__/em.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P4/resources_netflix/netflix/__pycache__/em.cpython-36.pyc


--------------------------------------------------------------------------------
/P4/resources_netflix/netflix/__pycache__/kmeans.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P4/resources_netflix/netflix/__pycache__/kmeans.cpython-36.pyc


--------------------------------------------------------------------------------
/P4/resources_netflix/netflix/__pycache__/naive_em.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P4/resources_netflix/netflix/__pycache__/naive_em.cpython-36.pyc


--------------------------------------------------------------------------------
/P4/resources_netflix/netflix/common.py:
--------------------------------------------------------------------------------
  1 | """Mixture model for collaborative filtering"""
  2 | from typing import NamedTuple, Tuple
  3 | import numpy as np
  4 | from matplotlib import pyplot as plt
  5 | from matplotlib.patches import Circle, Arc
  6 | 
  7 | 
  8 | class GaussianMixture(NamedTuple):
  9 |     """Tuple holding a gaussian mixture"""
 10 |     mu: np.ndarray  # (K, d) array - each row corresponds to a gaussian component mean
 11 |     var: np.ndarray  # (K, ) array - each row corresponds to the variance of a component
 12 |     p: np.ndarray  # (K, ) array = each row corresponds to the weight of a component
 13 | 
 14 | 
 15 | def init(X: np.ndarray, K: int,
 16 |          seed: int = 0) -> Tuple[GaussianMixture, np.ndarray]:
 17 |     """Initializes the mixture model with random points as initial
 18 |     means and uniform assingments
 19 | 
 20 |     Args:
 21 |         X: (n, d) array holding the data
 22 |         K: number of components
 23 |         seed: random seed
 24 | 
 25 |     Returns:
 26 |         mixture: the initialized gaussian mixture
 27 |         post: (n, K) array holding the soft counts
 28 |             for all components for all examples
 29 | 
 30 |     """
 31 |     np.random.seed(seed)
 32 |     n, _ = X.shape
 33 |     p = np.ones(K) / K
 34 | 
 35 |     # select K random points as initial means
 36 |     mu = X[np.random.choice(n, K, replace=False)]
 37 |     var = np.zeros(K)
 38 |     # Compute variance
 39 |     for j in range(K):
 40 |         var[j] = ((X - mu[j])**2).mean()
 41 | 
 42 |     mixture = GaussianMixture(mu, var, p)
 43 |     post = np.ones((n, K)) / K
 44 | 
 45 |     return mixture, post
 46 | 
 47 | 
 48 | def plot(X: np.ndarray, mixture: GaussianMixture, post: np.ndarray,
 49 |          title: str):
 50 |     """Plots the mixture model for 2D data"""
 51 |     _, K = post.shape
 52 | 
 53 |     percent = post / post.sum(axis=1).reshape(-1, 1)
 54 |     fig, ax = plt.subplots()
 55 |     ax.title.set_text(title)
 56 |     ax.set_xlim((-20, 20))
 57 |     ax.set_ylim((-20, 20))
 58 |     r = 0.25
 59 |     color = ["r", "b", "k", "y", "m", "c"]
 60 |     for i, point in enumerate(X):
 61 |         theta = 0
 62 |         for j in range(K):
 63 |             offset = percent[i, j] * 360
 64 |             arc = Arc(point,
 65 |                       r,
 66 |                       r,
 67 |                       0,
 68 |                       theta,
 69 |                       theta + offset,
 70 |                       edgecolor=color[j])
 71 |             ax.add_patch(arc)
 72 |             theta += offset
 73 |     for j in range(K):
 74 |         mu = mixture.mu[j]
 75 |         sigma = np.sqrt(mixture.var[j])
 76 |         circle = Circle(mu, sigma, color=color[j], fill=False)
 77 |         ax.add_patch(circle)
 78 |         legend = "mu = ({:0.2f}, {:0.2f})\n stdv = {:0.2f}".format(
 79 |             mu[0], mu[1], sigma)
 80 |         ax.text(mu[0], mu[1], legend)
 81 |     plt.axis('equal')
 82 |     plt.show()
 83 | 
 84 | def rmse(X, Y):
 85 |     return np.sqrt(np.mean((X - Y)**2))
 86 | 
 87 | def bic(X: np.ndarray, mixture: GaussianMixture,
 88 |         log_likelihood: float) -> float:
 89 |     """Computes the Bayesian Information Criterion for a
 90 |     mixture of gaussians
 91 | 
 92 |     Args:
 93 |         X: (n, d) array holding the data
 94 |         mixture: a mixture of spherical gaussian
 95 |         log_likelihood: the log-likelihood of the data
 96 | 
 97 |     Returns:
 98 |         float: the BIC for this mixture
 99 |     """
100 | 
101 |     K = len(mixture.p)
102 |     [n,d] = np.shape(X)
103 | 
104 |     p = K*(d+2)-1
105 | 
106 |     BIC = log_likelihood - 0.5*p*np.log(n) 
107 | 
108 |     return BIC 
109 |     


--------------------------------------------------------------------------------
/P4/resources_netflix/netflix/em.py:
--------------------------------------------------------------------------------
  1 | """Mixture model for matrix completion"""
  2 | from typing import Tuple
  3 | import numpy as np
  4 | from scipy.special import logsumexp
  5 | from common import GaussianMixture
  6 | 
  7 | 
  8 | def estep(X: np.ndarray, mixture: GaussianMixture) -> Tuple[np.ndarray, float]:
  9 |     """E-step: Softly assigns each datapoint to a gaussian component
 10 | 
 11 |     Args:
 12 |         X: (n, d) array holding the data, with incomplete entries (set to 0)
 13 |         mixture: the current gaussian mixture
 14 | 
 15 |     Returns:
 16 |         np.ndarray: (n, K) array holding the soft counts
 17 |             for all components for all examples
 18 |         float: log-likelihood of the assignment
 19 | 
 20 |     """
 21 |     
 22 |     [n,d] = np.shape(X)
 23 |     k = len(mixture.p)
 24 |     NK = np.zeros([n,k])
 25 | 
 26 |     ########################### cartisian domain 
 27 | 
 28 |     # for i in range(n):
 29 |     #     Cu = np.where(X[i] != 0)[0] # return tuple so need [0]
 30 |     #     Hu = np.where(X[i] == 0)[0]
 31 |     #     d = len(Cu) # dimension decided by non-zero features 
 32 | 
 33 |     #     for j in range(k):
 34 | 
 35 |     #         A = np.power(2*np.pi*mixture.var[j], -d/2) # (1,1) -> ()        
 36 |     #         B = np.linalg.norm(X[i,Cu] - mixture.mu[j,Cu]) # (1,1) -> ()
 37 |     #         C = np.exp(-1/2/mixture.var[j]*B**2) # (1,1) -> ()
 38 | 
 39 |     #         # K-class Gaussian before mixture  
 40 |     #         NK[i,j] = A*C # (n,k)
 41 | 
 42 |     # # apply weighting to perform Gaussian mixture  
 43 |     # N_post = np.multiply(NK, mixture.p) # (n,k)
 44 |     # N_post_mix = np.sum(N_post, axis=1) # (n,1) -> (n,)
 45 | 
 46 |     # # log-likelihood
 47 |     # # normalized posterior 
 48 |     # L = np.sum(np.log(N_post_mix))
 49 |     # N_post_norm = N_post / N_post_mix[:,None]
 50 | 
 51 |     # return N_post_norm, L
 52 | 
 53 |     ############################ log domain
 54 | 
 55 |     for i in range(n):
 56 |         Cu = np.where(X[i] != 0)[0] # return tuple so need [0]
 57 |         Hu = np.where(X[i] == 0)[0]
 58 |         d = len(Cu) # dimension decided by non-zero features 
 59 | 
 60 |         for j in range(k):
 61 | 
 62 |             A = -d/2*np.log(2*np.pi*mixture.var[j]) # (1,1) -> ()        
 63 |             B = np.linalg.norm(X[i,Cu] - mixture.mu[j,Cu]) # (1,1) -> ()
 64 |             C = -1/2/mixture.var[j] * B**2 # (1,1) -> ()
 65 | 
 66 |             # K-class Gaussian before mixture  
 67 |             NK[i,j] = A+C # (n,k)
 68 | 
 69 |     # apply weighting to perform Gaussian mixture  
 70 |     N_post = NK + np.log(mixture.p) # (n,k)
 71 |     N_post_mix = logsumexp(N_post, axis=1) # (n,1) -> (n,)
 72 | 
 73 |     # log-likelihood
 74 |     # normalized posterior 
 75 |     L = np.sum(N_post_mix)
 76 |     N_post_norm = N_post - N_post_mix[:,None] 
 77 | 
 78 |     return np.exp(N_post_norm), L
 79 | 
 80 | 
 81 | 
 82 | def mstep(X: np.ndarray, post: np.ndarray, mixture: GaussianMixture,
 83 |           min_variance: float = .25) -> GaussianMixture:
 84 |     """M-step: Updates the gaussian mixture by maximizing the log-likelihood
 85 |     of the weighted dataset
 86 | 
 87 |     Args:
 88 |         X: (n, d) array holding the data, with incomplete entries (set to 0)
 89 |         post: (n, K) array holding the soft counts
 90 |             for all components for all examples
 91 |         mixture: the current gaussian mixture
 92 |         min_variance: the minimum variance for each gaussian
 93 | 
 94 |     Returns:
 95 |         GaussianMixture: the new gaussian mixture
 96 |     """
 97 | 
 98 |     [n,d] = np.shape(X)
 99 |     k = len(mixture.p)
100 | 
101 |     n_k = np.sum(post, axis=0) # (1,k) -> (k,)
102 |     p_k = n_k/n # (1,k) -> (k,)
103 | 
104 |     # allocation array
105 |     mu_k = np.zeros([k,d])
106 |     var_k = np.zeros(k)
107 | 
108 |     # allocate list to append 
109 |     non_zero_length = []
110 | 
111 |     # non-zero length for each sample 
112 |     for i in range(n):
113 | 
114 |         non_zero_index = np.where(X[i] != 0)[0]
115 |         non_zero_length.append(len(non_zero_index)) # list, (1,n)
116 | 
117 |     non_zero_length = np.asarray(non_zero_length) # (n,1) -> (n,)
118 |     
119 |     
120 |     # mean estimation, (k,d) 
121 |     for i in range(k):
122 | 
123 |         for j in range(d):
124 |             index = np.where(X[:,j] != 0)[0] # index where X not zero
125 |             
126 |             # update condition
127 |             if np.sum(post[index,i]) >= 1:
128 |                 mu_k[i,j] = np.inner(X[index,j], post[index,i]) / np.sum(post[index,i])
129 |             else:
130 |                 mu_k[i,j] = mixture.mu[i,j]
131 | 
132 |     # var estimation, (1,k) -> (k,)
133 |     B = np.zeros([n,k])
134 | 
135 |     for i in range(n):
136 | 
137 |         for j in range(k):
138 |             index = np.where(X[i] != 0)[0] # index where X not zero
139 | 
140 |             A = np.linalg.norm(X[i,index] - mu_k[j,index])
141 |             B[i,j] = post[i,j]*A**2
142 | 
143 |     var_k = np.sum(B, axis=0) / np.matmul(post.T, non_zero_length)
144 |     
145 |     # var criteria 
146 |     index = np.where(var_k <= min_variance)[0]
147 |     var_k[index] = min_variance
148 | 
149 |     return GaussianMixture(mu_k, var_k, p_k)
150 | 
151 |    
152 | 
153 | def run(X: np.ndarray, mixture: GaussianMixture,
154 |         post: np.ndarray) -> Tuple[GaussianMixture, np.ndarray, float]:
155 |     """Runs the mixture model
156 | 
157 |     Args:
158 |         X: (n, d) array holding the data
159 |         post: (n, K) array holding the soft counts
160 |             for all components for all examples
161 | 
162 |     Returns:
163 |         GaussianMixture: the new gaussian mixture
164 |         np.ndarray: (n, K) array holding the soft counts
165 |             for all components for all examples
166 |         float: log-likelihood of the current assignment
167 |     """
168 | 
169 |    # initial value 
170 |     [post, L0] = estep(X, mixture)
171 |     mixture = mstep(X, post, mixture)
172 |     [post, L] = estep(X, mixture)
173 |     
174 |     while L-L0 >= 1e-6*abs(L):
175 | 
176 |         mixture = mstep(X, post, mixture)
177 |         [post, L_update] = estep(X, mixture)
178 | 
179 |         L0 = L
180 |         L = L_update
181 |         
182 |     return mixture, post, L
183 | 
184 | 
185 | 
186 | def fill_matrix(X: np.ndarray, mixture: GaussianMixture) -> np.ndarray:
187 |     """Fills an incomplete matrix according to a mixture model
188 | 
189 |     Args:
190 |         X: (n, d) array of incomplete data (incomplete entries =0)
191 |         mixture: a mixture of gaussians
192 | 
193 |     Returns
194 |         np.ndarray: a (n, d) array with completed data
195 |     """
196 |     
197 |     [n,d] = np.shape(X)
198 |     k = len(mixture.p)
199 |     NK = np.zeros([n,k])
200 | 
201 |     ################################# e-step ###########################
202 |     # for i in range(n):
203 |     #     Cu = np.where(X[i] != 0)[0] # return tuple so need [0]
204 |     #     Hu = np.where(X[i] == 0)[0]
205 |     #     d = len(Cu) # dimension decided by non-zero features 
206 | 
207 |     #     for j in range(k):
208 | 
209 |     #         A = -d/2*np.log(2*np.pi*mixture.var[j]) # (1,1) -> ()        
210 |     #         B = np.linalg.norm(X[i,Cu] - mixture.mu[j,Cu]) # (1,1) -> ()
211 |     #         C = -1/2/mixture.var[j] * B**2 # (1,1) -> ()
212 | 
213 |     #         # K-class Gaussian before mixture  
214 |     #         NK[i,j] = A+C # (n,k)
215 | 
216 |     # # apply weighting to perform Gaussian mixture  
217 |     # N_post = NK + np.log(mixture.p) # (n,k)
218 |     # N_post_mix = logsumexp(N_post, axis=1) # (n,1) -> (n,)
219 | 
220 |     # # log-likelihood
221 |     # # normalized posterior 
222 |     # L = np.sum(N_post_mix)
223 |     # N_post_norm = N_post - N_post_mix[:,None] 
224 |     #
225 |     # post = np.exp(N_post_norm)
226 | 
227 |     ##############################################
228 |     [post, L] = estep(X, mixture)
229 |     ##############################################
230 | 
231 |     # just a copy
232 |     X_pred = np.copy(X)
233 | 
234 |     # expectation value 
235 |     update = post @ mixture.mu # (n,d)
236 | 
237 |     # selection Hu
238 |     for i in range(n):
239 |         Cu = np.where(X[i] != 0)[0] # return tuple so need [0]
240 |         Hu = np.where(X[i] == 0)[0]
241 | 
242 |         X_pred[i,Hu] = update[i,Hu]
243 | 
244 |     return X_pred
245 | 


--------------------------------------------------------------------------------
/P4/resources_netflix/netflix/kmeans.py:
--------------------------------------------------------------------------------
 1 | """Mixture model based on kmeans"""
 2 | from typing import Tuple
 3 | import numpy as np
 4 | from common import GaussianMixture
 5 | 
 6 | 
 7 | def estep(X: np.ndarray, mixture: GaussianMixture) -> np.ndarray:
 8 |     """E-step: Assigns each datapoint to the gaussian component with the
 9 |     closest mean
10 | 
11 |     Args:
12 |         X: (n, d) array holding the data
13 |         mixture: the current gaussian mixture
14 | 
15 |     Returns:
16 |         np.ndarray: (n, K) array holding the soft counts
17 |             for all components for all examples
18 | 
19 |         """
20 |     n, _ = X.shape
21 |     K, _ = mixture.mu.shape
22 |     post = np.zeros((n, K))
23 | 
24 |     for i in range(n):
25 |         tiled_vector = np.tile(X[i, :], (K, 1))
26 |         sse = ((tiled_vector - mixture.mu)**2).sum(axis=1)
27 |         j = np.argmin(sse)
28 |         post[i, j] = 1
29 | 
30 |     return post
31 | 
32 | 
33 | def mstep(X: np.ndarray, post: np.ndarray) -> Tuple[GaussianMixture, float]:
34 |     """M-step: Updates the gaussian mixture. Each cluster
35 |     yields a component mean and variance.
36 | 
37 |     Args: X: (n, d) array holding the data
38 |         post: (n, K) array holding the soft counts
39 |             for all components for all examples
40 | 
41 |     Returns:
42 |         GaussianMixture: the new gaussian mixture
43 |         float: the distortion cost for the current assignment
44 |     """
45 |     n, d = X.shape
46 |     _, K = post.shape
47 | 
48 |     n_hat = post.sum(axis=0)
49 |     p = n_hat / n
50 | 
51 |     cost = 0
52 |     mu = np.zeros((K, d))
53 |     var = np.zeros(K)
54 | 
55 |     for j in range(K):
56 |         mu[j, :] = post[:, j] @ X / n_hat[j]
57 |         sse = ((mu[j] - X)**2).sum(axis=1) @ post[:, j]
58 |         cost += sse
59 |         var[j] = sse / (d * n_hat[j])
60 | 
61 |     return GaussianMixture(mu, var, p), cost
62 | 
63 | 
64 | def run(X: np.ndarray, mixture: GaussianMixture,
65 |         post: np.ndarray) -> Tuple[GaussianMixture, np.ndarray, float]:
66 |     """Runs the mixture model
67 | 
68 |     Args:
69 |         X: (n, d) array holding the data
70 |         post: (n, K) array holding the soft counts
71 |             for all components for all examples
72 | 
73 |     Returns:
74 |         GaussianMixture: the new gaussian mixture
75 |         np.ndarray: (n, K) array holding the soft counts
76 |             for all components for all examples
77 |         float: distortion cost of the current assignment
78 |     """
79 | 
80 |     prev_cost = None
81 |     cost = None
82 |     while (prev_cost is None or prev_cost - cost > 1e-4):
83 |         prev_cost = cost
84 |         post = estep(X, mixture)
85 |         mixture, cost = mstep(X, post)
86 | 
87 |     return mixture, post, cost
88 | 


--------------------------------------------------------------------------------
/P4/resources_netflix/netflix/main.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import kmeans
  3 | import common
  4 | import naive_em
  5 | import em
  6 | 
  7 | from matplotlib import pyplot as plt
  8 | 
  9 | from common import GaussianMixture
 10 | 
 11 | 
 12 | 
 13 | X = np.loadtxt("toy_data.txt")
 14 | 
 15 | # TODO: Your code here
 16 | 
 17 | ########################### 2. K-means
 18 | 
 19 | # cost_array = np.zeros([5,4])
 20 | # cost_array.fill(np.nan) # allocate non array
 21 | 
 22 | 
 23 | # for seed in range(5):
 24 | 
 25 | #     for K in range(1,5):
 26 | 
 27 | #         [mixture, post] = common.init(X,K,seed)
 28 | #         [mixture, post, cost] = kmeans.run(X, mixture, post)
 29 |         
 30 | #         cost_array[seed, K-1] = cost
 31 | #         cost_min = np.min(cost_array, axis=0) # min for each column
 32 | 
 33 | #         # common.plot(X, mixture, post, 'clustering')
 34 | 
 35 | # print(cost_array)
 36 | # print(cost_min)
 37 | 
 38 | 
 39 | ######################## 3. Expectation–maximization algorithm
 40 | 
 41 | 
 42 | # L_array = np.zeros([5,4])
 43 | # L_array.fill(np.nan) # allocate non array
 44 | 
 45 | 
 46 | # for seed in range(5):
 47 | 
 48 | #     for K in range(1,5):
 49 | 
 50 | #         [mixture, post] = common.init(X,K,seed)
 51 | #         [mixture, post, L] = naive_em.run(X, mixture, post)
 52 |         
 53 | #         L_array[seed, K-1] = L
 54 | #         L_min = np.min(L_array, axis=0) # min for each column
 55 | 
 56 | #         common.plot(X, mixture, post, 'clustering')
 57 | 
 58 | # print(L_array)
 59 | # print(L_min)
 60 | 
 61 | ############################ 4. Comparing K-means and EM
 62 | 
 63 | 
 64 | # K = 3
 65 | # seed = 0
 66 | 
 67 | # [mixture, post] = common.init(X,K,seed)
 68 | # [mixture, post, cost] = kmeans.run(X, mixture, post)
 69 | # common.plot(X, mixture, post, 'clustering')
 70 | 
 71 | # print(mixture)
 72 | 
 73 | # [mixture, post] = common.init(X,K,seed)
 74 | # [mixture, post, L] = naive_em.run(X, mixture, post)
 75 | # common.plot(X, mixture, post, 'clustering')
 76 | 
 77 | # print(mixture)
 78 | 
 79 | 
 80 | 
 81 | ######################### 5. Picking the best K
 82 | ######################### 5. Bayesian Information Criterion
 83 | 
 84 | # BIC_array = []
 85 | # seed = 0
 86 | 
 87 | # for K in range(1,5):
 88 | 
 89 | #     [mixture, post] = common.init(X,K,seed)
 90 | #     [mixture, post, L] = naive_em.run(X, mixture, post)
 91 | 
 92 | #     BIC = common.bic(X, mixture, L)
 93 | 
 94 | #     BIC_array.append(BIC)
 95 | 
 96 | 
 97 | # BIC_best = max(BIC_array) # least penalty BIC
 98 | # K_best = BIC_array.index(max(BIC_array))+1 # nest K index
 99 | 
100 | # print(BIC_best, K_best)
101 | 
102 | ######################### 8. Using the mixture model for collaborative filtering
103 | 
104 | # X = np.loadtxt("netflix_incomplete.txt")
105 | # X_gold = np.loadtxt('netflix_complete.txt')
106 |  
107 | X = np.loadtxt("test_incomplete.txt")
108 | X_gold = np.loadtxt('test_complete.txt')
109 | 
110 | K = 4
111 | seed = 0
112 | 
113 | [mixture, post] = common.init(X,K,seed)
114 | 
115 | [post, L] = em.estep(X, mixture)
116 | mixture = em.mstep(X, post, mixture)
117 | print(post)
118 | print(L)
119 | print(mixture)
120 | 
121 | [mixture, post, L] = em.run(X, mixture, post)
122 | print(post)
123 | print(L)
124 | print(mixture)
125 | 
126 | 
127 | X_prep = em.fill_matrix(X, mixture)
128 | print(X_prep)
129 | 
130 | RMSE = common.rmse(X_gold, X_prep)
131 | print(RMSE)
132 | 
133 | 
134 | K = 4
135 | 
136 | for seed in range(5):
137 | 
138 |     [mixture, post] = common.init(X,K,seed)
139 |     [mixture, post, L] = em.run(X, mixture, post)   
140 |     print(L)
141 | 
142 |     X_prep = em.fill_matrix(X, mixture)
143 |     RMSE = common.rmse(X_gold, X_prep)
144 |     print(RMSE)
145 | 


--------------------------------------------------------------------------------
/P4/resources_netflix/netflix/naive_em.py:
--------------------------------------------------------------------------------
  1 | """Mixture model using EM"""
  2 | from typing import Tuple
  3 | import numpy as np
  4 | from common import GaussianMixture
  5 | 
  6 | 
  7 | def estep(X: np.ndarray, mixture: GaussianMixture) -> Tuple[np.ndarray, float]:
  8 |     """E-step: Softly assigns each datapoint to a gaussian component
  9 | 
 10 |     Args:
 11 |         X: (n, d) array holding the data
 12 |         mixture: the current gaussian mixture
 13 | 
 14 |     Returns:
 15 |         np.ndarray: (n, K) array holding the soft counts
 16 |             for all components for all examples
 17 |         float: log-likelihood of the assignment
 18 |     """
 19 | 
 20 |     [n,d] = np.shape(X)
 21 |     K = len(mixture.p)
 22 | 
 23 |     NK = []
 24 | 
 25 |     for i in range(K):
 26 | 
 27 |         A = np.power(2*np.pi*mixture.var[i], -d/2) # (1,1) -> ()
 28 |         B = np.linalg.norm(X-mixture.mu[i], axis=1) # (n,1) -> (1,n) -> (n,)
 29 |         C = np.exp(-1/2/mixture.var[i]*B**2) # (n,1) -> (1,n) -> (n,)
 30 | 
 31 |         NK.append(A*C) # pdf for each class before mixture NK -> (K,n)
 32 | 
 33 |     N = np.matmul(mixture.p, NK) # mixture pdf # (1,n) -> (n,)
 34 |     L = np.sum(np.log(N))
 35 | 
 36 |     N_weight = NK*mixture.p.reshape(K,1) # weighting each class -> (K,n)
 37 |     N_post = N_weight/N # posterior is normalized pdf for each class with weighting -> (K,n)
 38 | 
 39 |     return np.transpose(N_post), L
 40 | 
 41 | 
 42 | def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture:
 43 |     """M-step: Updates the gaussian mixture by maximizing the log-likelihood
 44 |     of the weighted dataset
 45 | 
 46 |     Args:
 47 |         X: (n, d) array holding the data
 48 |         post: (n, K) array holding the soft counts
 49 |             for all components for all examples
 50 | 
 51 |     Returns:
 52 |         GaussianMixture: the new gaussian mixture
 53 |     """
 54 |     
 55 |     [n,d] = np.shape(X)
 56 |     [n,K] = np.shape(post)
 57 | 
 58 |     n_K = np.sum(post, axis=0) # (1,K) -> (K,)
 59 |     p_K = n_K/n # (1,K) -> (K,)
 60 | 
 61 |     mu_K = np.matmul(post.transpose(), X) # (K,d)
 62 |     mu_K = mu_K.transpose()/n_K
 63 |     mu_K = mu_K.transpose()
 64 | 
 65 |     var_K = [] # list
 66 | 
 67 |     for i in range(K):
 68 |         A = np.linalg.norm(X-mu_K[i], axis=1) # (n,1) -> (n,)
 69 |         B = np.matmul(A**2, post[:,i])
 70 |         var_K.append(B/d/n_K[i]) # (1,K) -> (K,)
 71 | 
 72 |     var_K = np.asarray(var_K) # convert to array
 73 | 
 74 |     return GaussianMixture(mu_K, var_K, p_K)
 75 | 
 76 | 
 77 | 
 78 | def run(X: np.ndarray, mixture: GaussianMixture,
 79 |         post: np.ndarray) -> Tuple[GaussianMixture, np.ndarray, float]:
 80 |     """Runs the mixture model
 81 | 
 82 |     Args:
 83 |         X: (n, d) array holding the data
 84 |         post: (n, K) array holding the soft counts
 85 |             for all components for all examples
 86 | 
 87 |     Returns:
 88 |         GaussianMixture: the new gaussian mixture
 89 |         np.ndarray: (n, K) array holding the soft counts
 90 |             for all components for all examples
 91 |         float: log-likelihood of the current assignment
 92 |     """
 93 | 
 94 |     # initial value 
 95 |     [post, L0] = estep(X, mixture)
 96 |     mixture = mstep(X, post)
 97 |     [post, L] = estep(X, mixture)
 98 |     
 99 |     while L-L0 > 1e-6*abs(L):
100 | 
101 |         mixture = mstep(X, post)
102 |         [post, L_update] = estep(X, mixture)
103 | 
104 |         L0 = L
105 |         L = L_update
106 |         
107 |     return mixture, post, L
108 | 


--------------------------------------------------------------------------------
/P4/resources_netflix/netflix/test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import em
 3 | import common
 4 | 
 5 | #############################
 6 | 
 7 | from typing import Tuple
 8 | import numpy as np
 9 | from scipy.special import logsumexp
10 | from common import GaussianMixture
11 | import naive_em
12 | 
13 | ###############################
14 | 
15 | X = np.loadtxt("test_incomplete.txt")
16 | X_gold = np.loadtxt("test_complete.txt")
17 | 
18 | # X = np.loadtxt("netflix_incomplete.txt")
19 | # X_gold = np.loadtxt("netflix_complete.txt")
20 | 
21 | 
22 | K = 4
23 | n, d = X.shape
24 | seed = 0
25 | 
26 | # TODO: Your code here
27 | 
28 | 
29 | [mixture, post] = common.init(X,K,seed)
30 | 
31 | # [m,n] = np.shape(X)
32 | 
33 | # for i in range(m):
34 | 
35 | #     Cu = np.where(X[i] != 0)[0] # return tuple so need [0]
36 | #     Hu = np.where(X[i] == 0)[0]
37 | 
38 | #     print(Cu)
39 | #     print(Hu)
40 | 
41 | print(X)
42 | print(mixture)
43 | print(em.estep(X, mixture))
44 | 
45 | # print(naive_em.estep(X, mixture))
46 | 


--------------------------------------------------------------------------------
/P4/resources_netflix/netflix/test_complete.txt:
--------------------------------------------------------------------------------
 1 | 2 5 3 4 3
 2 | 3 5 3 4 3
 3 | 2 4 3 3 1
 4 | 4 4 4 5 2
 5 | 3 4 4 4 4
 6 | 1 5 4 5 5
 7 | 2 5 4 5 1
 8 | 3 4 5 4 3
 9 | 3 5 3 3 3
10 | 2 5 3 3 3
11 | 3 4 3 3 3
12 | 1 5 3 5 1
13 | 4 5 3 4 3
14 | 1 4 3 5 2
15 | 1 5 3 3 5
16 | 3 5 3 4 3
17 | 3 5 4 4 2
18 | 3 5 3 5 1
19 | 2 4 5 5 3
20 | 2 5 4 4 2
21 | 


--------------------------------------------------------------------------------
/P4/resources_netflix/netflix/test_incomplete.txt:
--------------------------------------------------------------------------------
 1 | 2 5 3 0 0
 2 | 3 5 0 4 3
 3 | 2 0 3 3 1
 4 | 4 0 4 5 2
 5 | 3 4 0 0 4
 6 | 1 0 4 5 5
 7 | 2 5 0 0 1
 8 | 3 0 5 4 3
 9 | 0 5 3 3 3
10 | 2 0 0 3 3
11 | 3 4 3 3 3
12 | 1 5 3 0 1
13 | 4 5 3 4 3
14 | 1 4 0 5 2
15 | 1 5 3 3 5
16 | 3 5 3 4 3
17 | 3 0 0 4 2
18 | 3 5 3 5 1
19 | 2 4 5 5 0
20 | 2 5 4 4 2
21 | 


--------------------------------------------------------------------------------
/P4/resources_netflix/netflix/test_solutions.txt:
--------------------------------------------------------------------------------
  1 | Input:
  2 | X:
  3 | [[2. 5. 3. 0. 0.]
  4 |  [3. 5. 0. 4. 3.]
  5 |  [2. 0. 3. 3. 1.]
  6 |  [4. 0. 4. 5. 2.]
  7 |  [3. 4. 0. 0. 4.]
  8 |  [1. 0. 4. 5. 5.]
  9 |  [2. 5. 0. 0. 1.]
 10 |  [3. 0. 5. 4. 3.]
 11 |  [0. 5. 3. 3. 3.]
 12 |  [2. 0. 0. 3. 3.]
 13 |  [3. 4. 3. 3. 3.]
 14 |  [1. 5. 3. 0. 1.]
 15 |  [4. 5. 3. 4. 3.]
 16 |  [1. 4. 0. 5. 2.]
 17 |  [1. 5. 3. 3. 5.]
 18 |  [3. 5. 3. 4. 3.]
 19 |  [3. 0. 0. 4. 2.]
 20 |  [3. 5. 3. 5. 1.]
 21 |  [2. 4. 5. 5. 0.]
 22 |  [2. 5. 4. 4. 2.]]
 23 | K: 4
 24 | Mu:
 25 | [[2. 4. 5. 5. 0.]
 26 |  [3. 5. 0. 4. 3.]
 27 |  [2. 5. 4. 4. 2.]
 28 |  [0. 5. 3. 3. 3.]]
 29 | Var: [5.93 4.87 3.99 4.51]
 30 | P: [0.25 0.25 0.25 0.25]
 31 | 
 32 | After first E-step:
 33 | post:
 34 | [[0.17713577 0.12995693 0.43161668 0.26129062]
 35 |  [0.08790299 0.35848927 0.41566414 0.13794359]
 36 |  [0.15529703 0.10542632 0.5030648  0.23621184]
 37 |  [0.23290326 0.10485918 0.58720619 0.07503136]
 38 |  [0.09060401 0.41569201 0.32452345 0.16918054]
 39 |  [0.07639077 0.08473656 0.41423836 0.42463432]
 40 |  [0.21838413 0.20787523 0.41319756 0.16054307]
 41 |  [0.16534478 0.04759109 0.63399833 0.1530658 ]
 42 |  [0.05486073 0.13290982 0.37956674 0.43266271]
 43 |  [0.08779356 0.28748372 0.37049225 0.25423047]
 44 |  [0.07715067 0.18612696 0.50647898 0.23024339]
 45 |  [0.16678427 0.07789806 0.45643509 0.29888258]
 46 |  [0.08544132 0.24851049 0.53837544 0.12767275]
 47 |  [0.17773171 0.19578852 0.41091504 0.21556473]
 48 |  [0.02553529 0.1258932  0.29235844 0.55621307]
 49 |  [0.07604748 0.19032469 0.54189543 0.1917324 ]
 50 |  [0.15623582 0.31418901 0.41418177 0.1153934 ]
 51 |  [0.19275595 0.13517877 0.56734832 0.10471696]
 52 |  [0.33228594 0.02780214 0.50397264 0.13593928]
 53 |  [0.12546781 0.05835499 0.60962919 0.20654801]]
 54 | LL:-152.16319226209848
 55 | 
 56 | After first M-step:
 57 | Mu:
 58 | [[2.38279095 4.64102716 3.73583539 4.28989488 2.17237898]
 59 |  [2.56629755 4.6686168  3.24084599 3.88882023 2.72874336]
 60 |  [2.45674721 4.72686227 3.55798344 4.05614484 2.5030405 ]
 61 |  [2.00305536 4.7674522  3.37388115 3.7905181  2.97986269]]
 62 | Var: [0.71489705 0.64830186 0.73650336 0.85722393]
 63 | P: [0.13810266 0.17175435 0.46575794 0.22438505]
 64 | 
 65 | After a run
 66 | Mu:
 67 | [[2.00570178 4.99062403 3.13772745 4.00124767 1.16193276]
 68 |  [2.99396416 4.68350343 3.00527213 3.52422521 3.08969957]
 69 |  [2.54539306 4.20213487 4.56501823 4.55520636 2.31130827]
 70 |  [1.01534912 4.99975322 3.49251807 3.99998124 4.99986013]]
 71 | Var: [0.25       0.25       0.44961685 0.27930039]
 72 | P: [0.27660973 0.35431424 0.26752518 0.10155086]
 73 | post:
 74 | [[8.35114583e-01 1.26066023e-01 8.03346942e-03 3.07859243e-02]
 75 |  [2.29595284e-04 9.30406661e-01 6.93634633e-02 2.80840424e-07]
 76 |  [9.98723643e-01 1.34234094e-04 1.14212255e-03 1.65905887e-14]
 77 |  [1.85331147e-04 1.94115053e-03 9.97873518e-01 2.57285049e-14]
 78 |  [1.82091725e-08 8.82200084e-01 1.17730763e-01 6.91351811e-05]
 79 |  [2.13395201e-14 1.74763538e-08 1.23289877e-04 9.99876693e-01]
 80 |  [9.78452231e-01 2.41596929e-05 2.15236097e-02 2.05795060e-14]
 81 |  [1.95291523e-06 3.46537075e-03 9.96532634e-01 4.18625878e-08]
 82 |  [2.53995753e-04 9.99058306e-01 6.46220953e-04 4.14767958e-05]
 83 |  [1.39755279e-03 8.96199140e-01 1.02340131e-01 6.31761952e-05]
 84 |  [1.02964283e-05 9.98438589e-01 1.55110233e-03 1.18280899e-08]
 85 |  [9.99175360e-01 4.92298629e-07 8.24147990e-04 5.73816393e-13]
 86 |  [4.54696111e-06 9.96705586e-01 3.28986689e-03 1.91139775e-10]
 87 |  [4.13182467e-02 1.40457914e-05 9.58667653e-01 5.48560980e-08]
 88 |  [9.22358785e-14 4.78927600e-06 3.67220413e-07 9.99994844e-01]
 89 |  [2.36604822e-04 9.96136619e-01 3.62659186e-03 1.84275504e-07]
 90 |  [1.09042309e-01 2.42442342e-01 6.48515348e-01 8.68166867e-11]
 91 |  [9.62134995e-01 1.21159085e-04 3.77438456e-02 5.30337126e-16]
 92 |  [1.39885506e-04 2.34579872e-06 9.99672523e-01 1.85246074e-04]
 93 |  [6.05773445e-01 1.29236657e-02 3.81302856e-01 3.38895349e-08]]
 94 | LL: -84.98451993042474
 95 | X_gold:
 96 | [[2. 5. 3. 4. 3.]
 97 |  [3. 5. 3. 4. 3.]
 98 |  [2. 4. 3. 3. 1.]
 99 |  [4. 4. 4. 5. 2.]
100 |  [3. 4. 4. 4. 4.]
101 |  [1. 5. 4. 5. 5.]
102 |  [2. 5. 4. 5. 1.]
103 |  [3. 4. 5. 4. 3.]
104 |  [3. 5. 3. 3. 3.]
105 |  [2. 5. 3. 3. 3.]
106 |  [3. 4. 3. 3. 3.]
107 |  [1. 5. 3. 5. 1.]
108 |  [4. 5. 3. 4. 3.]
109 |  [1. 4. 3. 5. 2.]
110 |  [1. 5. 3. 3. 5.]
111 |  [3. 5. 3. 4. 3.]
112 |  [3. 5. 4. 4. 2.]
113 |  [3. 5. 3. 5. 1.]
114 |  [2. 4. 5. 5. 3.]
115 |  [2. 5. 4. 4. 2.]]
116 | X_pred:
117 | [[2.         5.         3.         3.94554203 1.53247395]
118 |  [3.         5.         3.11376    4.         3.        ]
119 |  [2.         4.98967752 3.         3.         1.        ]
120 |  [4.         4.20321354 4.         5.         2.        ]
121 |  [3.         4.         3.18859109 3.64540838 4.        ]
122 |  [1.         4.99965498 4.         5.         5.        ]
123 |  [2.         5.         3.16858887 4.01321529 1.        ]
124 |  [3.         4.20380457 5.         4.         3.        ]
125 |  [2.99334056 5.         3.         3.         3.        ]
126 |  [2.         4.63458935 3.16542905 3.         3.        ]
127 |  [3.         4.         3.         3.         3.        ]
128 |  [1.         5.         3.         4.00170707 1.        ]
129 |  [4.         5.         3.         4.         3.        ]
130 |  [1.         4.         4.50628741 5.         2.        ]
131 |  [1.         5.         3.         3.         5.        ]
132 |  [3.         5.         3.         4.         3.        ]
133 |  [3.         4.40437447 4.03220151 4.         2.        ]
134 |  [3.         5.         3.         5.         1.        ]
135 |  [2.         4.         5.         5.         2.3116484 ]
136 |  [2.         5.         4.         4.         2.        ]]
137 | RMSE: 0.3152301205749675
138 | 


--------------------------------------------------------------------------------
/P4/resources_netflix/netflix/toy_data.txt:
--------------------------------------------------------------------------------
  1 | -1.636 2.413
  2 | -2.957 2.296
  3 | -2.871 1.832
  4 | -2.696 1.327
  5 | -2.272 1.738
  6 | -2.393 1.729
  7 | -1.762 2.649
  8 | -2.598 1.345
  9 | -2.020 2.239
 10 | -2.398 2.449
 11 | -2.414 2.569
 12 | -2.545 1.940
 13 | -0.735 2.233
 14 | -2.379 1.711
 15 | -1.895 1.823
 16 | -1.849 2.499
 17 | -2.602 2.498
 18 | -3.130 2.749
 19 | -1.913 2.100
 20 | -2.433 2.104
 21 | -2.124 1.920
 22 | -1.072 2.302
 23 | -2.481 3.094
 24 | -1.844 1.635
 25 | -2.035 1.075
 26 | -1.865 1.608
 27 | -2.703 1.533
 28 | -1.537 1.423
 29 | -1.809 1.690
 30 | -1.176 1.403
 31 | -2.877 2.207
 32 | -1.834 1.168
 33 | -1.831 2.069
 34 | -1.917 2.175
 35 | -2.482 1.634
 36 | -1.917 1.738
 37 | -1.946 2.662
 38 | -1.765 2.648
 39 | -2.604 1.462
 40 | -1.998 0.881
 41 | -1.944 2.426
 42 | -2.156 1.595
 43 | -1.098 1.515
 44 | -3.709 3.208
 45 | -2.474 2.107
 46 | -2.571 1.991
 47 | -1.886 2.837
 48 | -2.014 1.867
 49 | -1.873 1.569
 50 | -1.937 2.120
 51 | -1.986 2.493
 52 | 0.307 -1.047
 53 | -4.123 -0.378
 54 | 3.704 0.539
 55 | -5.962 0.918
 56 | -4.240 -0.327
 57 | -0.608 0.512
 58 | -3.443 1.569
 59 | -0.804 1.247
 60 | -2.958 0.248
 61 | 0.924 -3.151
 62 | -2.115 0.805
 63 | -4.662 1.184
 64 | 1.019 1.455
 65 | -2.393 1.492
 66 | 1.123 -2.130
 67 | 0.168 0.846
 68 | -3.872 2.626
 69 | -2.089 0.381
 70 | -2.326 -1.748
 71 | -2.838 3.114
 72 | -2.223 0.857
 73 | 0.925 0.604
 74 | -0.706 5.280
 75 | 3.936 0.304
 76 | 0.160 1.607
 77 | -3.245 -0.569
 78 | -0.417 1.161
 79 | -2.833 -2.492
 80 | -3.010 -2.303
 81 | -3.356 2.212
 82 | -4.991 -2.235
 83 | 1.204 0.342
 84 | -2.079 1.078
 85 | -3.762 1.349
 86 | -1.082 -2.931
 87 | -0.823 2.491
 88 | -8.023 0.024
 89 | -1.760 0.168
 90 | -0.209 1.988
 91 | -2.356 -2.281
 92 | -0.734 -0.621
 93 | -3.045 0.969
 94 | -5.206 -0.443
 95 | 2.203 -1.010
 96 | 1.072 -1.978
 97 | -3.230 0.118
 98 | -3.674 -0.838
 99 | -3.255 -0.399
100 | -1.287 3.037
101 | -4.385 -2.846
102 | -2.053 1.211
103 | -2.713 0.836
104 | -4.073 -0.909
105 | -1.655 0.357
106 | 0.535 -0.393
107 | -0.460 1.790
108 | -2.315 2.848
109 | -0.658 -0.788
110 | -0.079 1.463
111 | -4.871 -0.099
112 | -4.295 1.703
113 | -2.091 -0.367
114 | -4.903 4.633
115 | -2.534 0.165
116 | -2.828 -2.721
117 | -1.212 -3.761
118 | -1.151 1.551
119 | -3.236 1.111
120 | -1.239 -0.034
121 | -1.724 -1.367
122 | 2.660 -4.041
123 | -1.818 0.661
124 | -5.944 2.558
125 | -4.394 2.244
126 | -2.540 0.806
127 | -4.883 -0.049
128 | -3.113 -0.457
129 | -4.103 2.683
130 | 0.283 3.226
131 | -0.130 -2.662
132 | 0.403 0.733
133 | -0.403 0.606
134 | -3.437 -6.852
135 | 0.404 3.359
136 | 0.251 -1.173
137 | -1.596 -2.166
138 | -1.011 -0.602
139 | -1.703 1.109
140 | -1.739 -4.371
141 | -1.746 0.817
142 | -4.544 -2.334
143 | -6.628 -1.493
144 | -4.571 -2.514
145 | -2.724 -0.013
146 | -2.473 1.586
147 | 0.030 1.018
148 | -2.421 0.238
149 | -2.342 -1.351
150 | -5.096 -0.911
151 | 5.527 3.552
152 | 10.888 1.802
153 | 5.429 0.757
154 | 3.231 -3.239
155 | 8.987 4.426
156 | 4.898 -1.611
157 | 4.626 0.283
158 | 5.537 0.983
159 | 7.987 -2.235
160 | 6.143 -0.569
161 | 6.666 0.271
162 | 5.999 1.284
163 | 9.017 -0.999
164 | 3.806 0.903
165 | 6.154 0.241
166 | 6.672 2.719
167 | 5.871 0.196
168 | 3.813 -1.135
169 | 7.881 1.362
170 | 9.100 2.771
171 | 3.387 1.612
172 | 8.083 0.210
173 | 3.750 2.651
174 | 5.369 -0.193
175 | 7.204 -0.956
176 | 8.016 -0.652
177 | 7.353 -1.839
178 | 4.238 1.050
179 | 4.381 3.427
180 | 4.572 -1.886
181 | 6.042 0.937
182 | 5.477 2.724
183 | 3.756 -0.198
184 | 7.936 -1.808
185 | 4.342 2.428
186 | 6.148 4.070
187 | 6.538 -0.435
188 | 6.053 2.881
189 | 3.857 -0.509
190 | 7.087 -1.040
191 | 7.807 0.818
192 | 6.928 -1.860
193 | 3.942 -1.397
194 | 4.344 0.454
195 | 8.254 0.110
196 | 2.058 0.728
197 | 10.822 1.194
198 | 3.415 -2.787
199 | 5.439 -1.172
200 | 1.760 -3.119
201 | 1.482 -1.594
202 | 1.563 3.877
203 | 3.382 1.668
204 | 5.983 2.140
205 | 5.351 2.078
206 | 3.195 -0.862
207 | 4.795 1.614
208 | 5.706 1.580
209 | 3.445 1.636
210 | 6.017 -0.076
211 | 10.823 0.710
212 | 7.026 0.657
213 | 5.780 0.131
214 | 2.183 0.358
215 | 6.830 -0.553
216 | 4.837 -2.774
217 | 8.678 -2.050
218 | 5.581 -2.388
219 | 4.828 1.030
220 | 8.406 -1.226
221 | 5.062 -3.970
222 | 5.406 3.626
223 | 5.479 1.345
224 | 4.330 -2.106
225 | 5.000 2.141
226 | 7.317 2.091
227 | 4.434 -1.446
228 | 6.710 -1.961
229 | 6.589 4.504
230 | 5.809 -2.593
231 | 8.715 -3.667
232 | 5.699 -1.188
233 | 1.925 -0.678
234 | 6.246 1.263
235 | 6.461 0.576
236 | 7.807 1.323
237 | 5.766 0.125
238 | 6.075 -1.896
239 | 6.340 1.511
240 | 3.371 4.829
241 | 5.120 1.404
242 | 2.831 0.480
243 | 6.816 0.654
244 | 5.559 -1.168
245 | 7.542 -2.702
246 | 8.661 0.265
247 | 4.604 -2.113
248 | 5.909 -0.765
249 | 5.419 3.515
250 | 5.866 -0.023
251 | 


--------------------------------------------------------------------------------
/P4/resources_netflix_Abel.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P4/resources_netflix_Abel.rar


--------------------------------------------------------------------------------
/P4/resources_netflix_solution.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P4/resources_netflix_solution.tar.gz


--------------------------------------------------------------------------------
/P5 Text-Based Game.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P5 Text-Based Game.txt


--------------------------------------------------------------------------------
/P5/resources_rl.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P5/resources_rl.tar.gz


--------------------------------------------------------------------------------
/P5/resources_rl/rl/__pycache__/framework.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P5/resources_rl/rl/__pycache__/framework.cpython-36.pyc


--------------------------------------------------------------------------------
/P5/resources_rl/rl/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P5/resources_rl/rl/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/P5/resources_rl/rl/agent_dqn.py:
--------------------------------------------------------------------------------
  1 | """Tabular QL agent"""
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.optim as optim
  5 | import torch.nn.functional as F
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | from tqdm import tqdm
  9 | import framework
 10 | import utils
 11 | 
 12 | DEBUG = False
 13 | 
 14 | GAMMA = 0.5  # discounted factor
 15 | TRAINING_EP = 0.5  # epsilon-greedy parameter for training
 16 | TESTING_EP = 0.05  # epsilon-greedy parameter for testing
 17 | NUM_RUNS = 10
 18 | NUM_EPOCHS = 300
 19 | NUM_EPIS_TRAIN = 25  # number of episodes for training at each epoch
 20 | NUM_EPIS_TEST = 50  # number of episodes for testing
 21 | ALPHA = 0.1  # learning rate for training
 22 | 
 23 | ACTIONS = framework.get_actions()
 24 | OBJECTS = framework.get_objects()
 25 | NUM_ACTIONS = len(ACTIONS)
 26 | NUM_OBJECTS = len(OBJECTS)
 27 | 
 28 | model = None
 29 | optimizer = None
 30 | 
 31 | # pragma: coderesponse template
 32 | def epsilon_greedy(state_vector, epsilon):
 33 |     """Returns an action selected by an epsilon-greedy exploration policy
 34 | 
 35 |     Args:
 36 |         state_vector (torch.FloatTensor): extracted vector representation
 37 |         epsilon (float): the probability of choosing a random command
 38 | 
 39 |     Returns:
 40 |         (int, int): the indices describing the action/object to take
 41 |     """
 42 |     # TODO Your code here
 43 |     # recall model by state_vector 
 44 |     act_arr, obj_arr = model(state_vector)
 45 | 
 46 |     # random selection with assigned p
 47 |     res = np.random.choice(a = [0, 1], p = [epsilon, 1-epsilon])
 48 | 
 49 |     # optimal policy
 50 |     if res == 1:
 51 |         action_index = torch.argmax(act_arr)
 52 |         object_index = torch.argmax(obj_arr)
 53 |     # random select action-object 
 54 |     if res == 0:
 55 |         action_index = np.random.randint(0, NUM_ACTIONS)
 56 |         object_index = np.random.randint(0, NUM_OBJECTS)
 57 | 
 58 | 
 59 |     return (action_index, object_index)
 60 | 
 61 | # pragma: coderesponse end
 62 | 
 63 | 
 64 | class DQN(nn.Module):
 65 |     """A simple deep Q network implementation.
 66 |     Computes Q values for each (action, object) tuple given an input state vector
 67 |     """
 68 | 
 69 |     def __init__(self, state_dim, action_dim, object_dim, hidden_size=100):
 70 |         super(DQN, self).__init__()
 71 |         self.state_encoder = nn.Linear(state_dim, hidden_size)
 72 |         self.state2action = nn.Linear(hidden_size, action_dim)
 73 |         self.state2object = nn.Linear(hidden_size, object_dim)
 74 | 
 75 |     def forward(self, x):
 76 |         state = F.relu(self.state_encoder(x))
 77 |         return self.state2action(state), self.state2object(state)
 78 | 
 79 | 
 80 | # pragma: coderesponse template
 81 | def deep_q_learning(current_state_vector, action_index, object_index, reward,
 82 |                     next_state_vector, terminal):
 83 |     """Updates the weights of the DQN for a given transition
 84 | 
 85 |     Args:
 86 |         current_state_vector (torch.FloatTensor): vector representation of current state
 87 |         action_index (int): index of the current action
 88 |         object_index (int): index of the current object
 89 |         reward (float): the immediate reward the agent recieves from playing current command
 90 |         next_state_vector (torch.FloatTensor): vector representation of next state
 91 |         terminal (bool): True if this epsiode is over
 92 | 
 93 |     Returns:
 94 |         None
 95 |     """
 96 |     with torch.no_grad():
 97 |         q_values_action_next, q_values_object_next = model(next_state_vector)
 98 |     
 99 |     # recall    
100 |     # structure not nderstand yet 
101 |     q_value_cur_state = model(current_state_vector)
102 |   
103 |     # TODO Your code here
104 |     # under Tensor class
105 |     maxq_next = 0.5 * (q_values_action_next.max() + q_values_object_next.max())      
106 |     
107 |     # extract current value 
108 |     current = 0.5 * (q_value_cur_state[0][action_index] + q_value_cur_state[1][object_index]) 
109 |   
110 |     if not terminal:
111 |         y = reward + (GAMMA * maxq_next)
112 |         loss = 0.5 * (y - current)**2
113 |     else:   
114 |         y = reward
115 |         loss = 0.5 * (y - current)**2
116 | 
117 |     # torch objects
118 |     optimizer.zero_grad()
119 |     loss.backward()
120 |     optimizer.step()
121 | 
122 | # pragma: coderesponse end
123 | 
124 | 
125 | def run_episode(for_training):
126 |     """
127 |         Runs one episode
128 |         If for training, update Q function
129 |         If for testing, computes and return cumulative discounted reward
130 |     """
131 |     epsilon = TRAINING_EP if for_training else TESTING_EP
132 | 
133 |     # initialize for each episode
134 |     # TODO Your code here
135 | 
136 |     (current_room_desc, current_quest_desc, terminal) = framework.newGame()
137 | 
138 |     count = 0
139 |     epi_reward = 0
140 | 
141 |     while not terminal:
142 |         # Choose next action and execute
143 |         current_state = current_room_desc + current_quest_desc
144 |         current_state_vector = torch.FloatTensor(utils.extract_bow_feature_vector(current_state, dictionary))
145 | 
146 |         (action_index, object_index) = epsilon_greedy(current_state_vector, epsilon)
147 | 
148 |         (next_room_desc, next_quest_desc, reward, terminal) \
149 |             = framework.step_game(current_room_desc, current_quest_desc, action_index, object_index) 
150 | 
151 |         next_state = next_room_desc + next_quest_desc
152 |         next_state_vector = torch.FloatTensor(utils.extract_bow_feature_vector(next_state, dictionary))
153 | 
154 |         # TODO Your code here
155 | 
156 |         if for_training:
157 |             # update Q-function.
158 |             # TODO Your code here
159 |             deep_q_learning(current_state_vector, action_index, object_index, reward, next_state_vector, terminal)
160 | 
161 |         if not for_training:
162 |             # update reward
163 |             # TODO Your code here
164 |             epi_reward += np.power(GAMMA, count)*reward
165 | 
166 |         # prepare next step
167 |         # TODO Your code here
168 |         count += 1
169 |         current_room_desc = next_room_desc 
170 |         current_quest_desc = next_quest_desc
171 | 
172 |     if not for_training:
173 |         return epi_reward
174 | 
175 | 
176 | def run_epoch():
177 |     """Runs one epoch and returns reward averaged over test episodes"""
178 |     rewards = []
179 | 
180 |     for _ in range(NUM_EPIS_TRAIN):
181 |         run_episode(for_training=True)
182 | 
183 |     for _ in range(NUM_EPIS_TEST):
184 |         rewards.append(run_episode(for_training=False))
185 | 
186 |     return np.mean(np.array(rewards))
187 | 
188 | 
189 | def run():
190 |     """Returns array of test reward per epoch for one run"""
191 |     global model
192 |     global optimizer
193 |     model = DQN(state_dim, NUM_ACTIONS, NUM_OBJECTS)
194 |     optimizer = optim.SGD(model.parameters(), lr=ALPHA)
195 | 
196 |     single_run_epoch_rewards_test = []
197 |     pbar = tqdm(range(NUM_EPOCHS), ncols=80)
198 |     for _ in pbar:
199 |         single_run_epoch_rewards_test.append(run_epoch())
200 |         pbar.set_description(
201 |             "Avg reward: {:0.6f} | Ewma reward: {:0.6f}".format(
202 |                 np.mean(single_run_epoch_rewards_test),
203 |                 utils.ewma(single_run_epoch_rewards_test)))
204 |     return single_run_epoch_rewards_test
205 | 
206 | 
207 | if __name__ == '__main__':
208 |     state_texts = utils.load_data('game.tsv')
209 |     dictionary = utils.bag_of_words(state_texts)
210 |     state_dim = len(dictionary)
211 | 
212 |     # set up the game
213 |     framework.load_game_data()
214 | 
215 |     epoch_rewards_test = []  # shape NUM_RUNS * NUM_EPOCHS
216 | 
217 |     for _ in range(NUM_RUNS):
218 |         epoch_rewards_test.append(run())
219 | 
220 |     epoch_rewards_test = np.array(epoch_rewards_test)
221 | 
222 |     x = np.arange(NUM_EPOCHS)
223 |     fig, axis = plt.subplots()
224 |     axis.plot(x, np.mean(epoch_rewards_test,
225 |                          axis=0))  # plot reward per epoch averaged per run
226 |     axis.set_xlabel('Epochs')
227 |     axis.set_ylabel('reward')
228 |     axis.set_title(('Linear: nRuns=%d, Epilon=%.2f, Epi=%d, alpha=%.4f' %
229 |                     (NUM_RUNS, TRAINING_EP, NUM_EPIS_TRAIN, ALPHA)))
230 |     plt.show()
231 | 


--------------------------------------------------------------------------------
/P5/resources_rl/rl/agent_linear.py:
--------------------------------------------------------------------------------
  1 | """Linear QL agent"""
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | from tqdm import tqdm
  5 | import framework
  6 | import utils
  7 | 
  8 | DEBUG = False
  9 | 
 10 | 
 11 | GAMMA = 0.5  # discounted factor
 12 | TRAINING_EP = 0.5  # epsilon-greedy parameter for training
 13 | TESTING_EP = 0.05  # epsilon-greedy parameter for testing
 14 | NUM_RUNS = 10
 15 | NUM_EPOCHS = 600
 16 | NUM_EPIS_TRAIN = 25  # number of episodes for training at each epoch
 17 | NUM_EPIS_TEST = 50  # number of episodes for testing
 18 | ALPHA = 0.001  # learning rate for training
 19 | 
 20 | ACTIONS = framework.get_actions()
 21 | OBJECTS = framework.get_objects()
 22 | NUM_ACTIONS = len(ACTIONS)
 23 | NUM_OBJECTS = len(OBJECTS)
 24 | 
 25 | 
 26 | def tuple2index(action_index, object_index):
 27 |     """Converts a tuple (a,b) to an index c"""
 28 |     return action_index * NUM_OBJECTS + object_index
 29 | 
 30 | 
 31 | def index2tuple(index):
 32 |     """Converts an index c to a tuple (a,b)"""
 33 |     return index // NUM_OBJECTS, index % NUM_OBJECTS
 34 | 
 35 | 
 36 | # pragma: coderesponse template name="linear_epsilon_greedy"
 37 | def epsilon_greedy(state_vector, theta, epsilon):
 38 |     """Returns an action selected by an epsilon-greedy exploration policy
 39 | 
 40 |     Args:
 41 |         state_vector (np.ndarray): extracted vector representation
 42 |         theta (np.ndarray): current weight matrix
 43 |         epsilon (float): the probability of choosing a random command
 44 | 
 45 |     Returns:
 46 |         (int, int): the indices describing the action/object to take
 47 |     """
 48 |     # TODO Your code here
 49 | 
 50 |     res = np.random.choice(a = [0, 1], p = [epsilon, 1-epsilon])
 51 |     
 52 |     # optimal policy
 53 |     if res == 1:
 54 |         Q = np.inner(theta, state_vector) # for each value of Q is superiposed by a state vector and theta (weighting) 
 55 |         # np class is more complicated to fetch index for max value with its arguement 
 56 |         ind = np.unravel_index(np.argmax(Q, axis=None), Q.shape)[0] # (n,) -> into numpy.int64
 57 |         action_index, object_index = index2tuple(ind)
 58 |     # random select action-object 
 59 |     if res == 0:
 60 |         action_index = np.random.randint(0, NUM_ACTIONS)
 61 |         object_index = np.random.randint(0, NUM_OBJECTS)
 62 | 
 63 |     return (action_index, object_index)
 64 | 
 65 | # pragma: coderesponse end
 66 | 
 67 | 
 68 | # pragma: coderesponse template
 69 | def linear_q_learning(theta, current_state_vector, action_index, object_index,
 70 |                       reward, next_state_vector, terminal):
 71 |     """Update theta for a given transition
 72 | 
 73 |     Args:
 74 |         theta (np.ndarray): current weight matrix
 75 |         current_state_vector (np.ndarray): vector representation of current state
 76 |         action_index (int): index of the current action
 77 |         object_index (int): index of the current object
 78 |         reward (float): the immediate reward the agent recieves from playing current command
 79 |         next_state_vector (np.ndarray): vector representation of next state
 80 |         terminal (bool): True if this epsiode is over
 81 | 
 82 |     Returns:
 83 |         None
 84 |     """
 85 |     # TODO Your code here
 86 |     # Q is local variable here
 87 |     # data structure is not clear yet 
 88 | 
 89 |     # preparation
 90 |     q_value = (theta @ current_state_vector)[tuple2index(action_index, object_index)]
 91 |     max_q_value_next  = 0 if terminal else np.max(theta @ next_state_vector)
 92 |     y = reward + (GAMMA * max_q_value_next)
 93 |     delta_theta = (y - q_value) * current_state_vector
 94 |     
 95 |     # update 
 96 |     theta[tuple2index(action_index, object_index)] \
 97 |         = theta[tuple2index(action_index, object_index)] + (ALPHA * delta_theta)     
 98 | 
 99 | # pragma: coderesponse end
100 | 
101 | 
102 | def run_episode(for_training):
103 |     """ Runs one episode
104 |     If for training, update Q function
105 |     If for testing, computes and return cumulative discounted reward
106 | 
107 |     Args:
108 |         for_training (bool): True if for training
109 | 
110 |     Returns:
111 |         None
112 |     """
113 |     epsilon = TRAINING_EP if for_training else TESTING_EP
114 | 
115 |     # initialize for each episode
116 |     # TODO Your code here
117 | 
118 |     (current_room_desc, current_quest_desc, terminal) = framework.newGame()
119 | 
120 |     count = 0
121 |     epi_reward = 0
122 | 
123 |     while not terminal:
124 | 
125 |         # Choose next action and execute
126 |         # recall 
127 |         current_state = current_room_desc + current_quest_desc
128 |         current_state_vector = utils.extract_bow_feature_vector(current_state, dictionary)
129 | 
130 |         (action_index, object_index) = epsilon_greedy(current_state_vector, theta, epsilon)
131 | 
132 |         (next_room_desc, next_quest_desc, reward, terminal) \
133 |             = framework.step_game(current_room_desc, current_quest_desc, action_index, object_index) 
134 | 
135 |         next_state = next_room_desc + next_quest_desc
136 |         next_state_vector = utils.extract_bow_feature_vector(next_state, dictionary)
137 | 
138 |         if for_training:
139 |             # update Q-function.
140 |             # TODO Your code here
141 |             linear_q_learning(theta, current_state_vector, action_index, object_index, reward, 
142 |             next_state_vector, terminal)     
143 | 
144 | 
145 |         if not for_training:
146 |             # update reward
147 |             # TODO Your code here
148 |             epi_reward += np.power(GAMMA, count)*reward
149 | 
150 |         # prepare next step
151 |         # TODO Your code here
152 |         count += 1
153 |         current_room_desc = next_room_desc
154 |         current_quest_desc = next_quest_desc
155 | 
156 |     if not for_training:
157 |         return epi_reward
158 | 
159 | 
160 | def run_epoch():
161 |     """Runs one epoch and returns reward averaged over test episodes"""
162 |     rewards = []
163 | 
164 |     for _ in range(NUM_EPIS_TRAIN):
165 |         run_episode(for_training=True)
166 | 
167 |     for _ in range(NUM_EPIS_TEST):
168 |         rewards.append(run_episode(for_training=False))
169 | 
170 |     return np.mean(np.array(rewards))
171 | 
172 | 
173 | def run():
174 |     """Returns array of test reward per epoch for one run"""
175 |     global theta
176 |     theta = np.zeros([action_dim, state_dim])
177 | 
178 |     single_run_epoch_rewards_test = []
179 |     pbar = tqdm(range(NUM_EPOCHS), ncols=80)
180 |     for _ in pbar:
181 |         single_run_epoch_rewards_test.append(run_epoch())
182 |         pbar.set_description(
183 |             "Avg reward: {:0.6f} | Ewma reward: {:0.6f}".format(
184 |                 np.mean(single_run_epoch_rewards_test),
185 |                 utils.ewma(single_run_epoch_rewards_test)))
186 |     return single_run_epoch_rewards_test
187 | 
188 | 
189 | if __name__ == '__main__':
190 |     state_texts = utils.load_data('game.tsv')
191 |     dictionary = utils.bag_of_words(state_texts)
192 |     state_dim = len(dictionary)
193 |     action_dim = NUM_ACTIONS * NUM_OBJECTS
194 | 
195 |     # set up the game
196 |     framework.load_game_data()
197 | 
198 |     epoch_rewards_test = []  # shape NUM_RUNS * NUM_EPOCHS
199 | 
200 |     for _ in range(NUM_RUNS):
201 |         epoch_rewards_test.append(run())
202 | 
203 |     epoch_rewards_test = np.array(epoch_rewards_test)
204 | 
205 |     x = np.arange(NUM_EPOCHS)
206 |     fig, axis = plt.subplots()
207 |     axis.plot(x, np.mean(epoch_rewards_test,
208 |                          axis=0))  # plot reward per epoch averaged per run
209 |     axis.set_xlabel('Epochs')
210 |     axis.set_ylabel('reward')
211 |     axis.set_title(('Linear: nRuns=%d, Epilon=%.2f, Epi=%d, alpha=%.4f' %
212 |                     (NUM_RUNS, TRAINING_EP, NUM_EPIS_TRAIN, ALPHA)))
213 | 
214 |     plt.show()
215 | 
216 | 
217 | 


--------------------------------------------------------------------------------
/P5/resources_rl/rl/agent_tabular_ql.py:
--------------------------------------------------------------------------------
  1 | """Tabular QL agent"""
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | from tqdm import tqdm
  5 | import framework
  6 | import utils
  7 | 
  8 | DEBUG = False
  9 | 
 10 | GAMMA = 0.5  # discounted factor
 11 | TRAINING_EP = 0.5  # epsilon-greedy parameter for training
 12 | TESTING_EP = 0.05  # epsilon-greedy parameter for testing
 13 | NUM_RUNS = 10
 14 | NUM_EPOCHS = 200
 15 | NUM_EPIS_TRAIN = 25  # number of episodes for training at each epoch
 16 | NUM_EPIS_TEST = 50  # number of episodes for testing
 17 | ALPHA = 1e-6  # learning rate for training
 18 | 
 19 | ACTIONS = framework.get_actions()
 20 | OBJECTS = framework.get_objects()
 21 | NUM_ACTIONS = len(ACTIONS)
 22 | NUM_OBJECTS = len(OBJECTS)
 23 | 
 24 | 
 25 | 
 26 | # pragma: coderesponse template
 27 | def epsilon_greedy(state_1, state_2, q_func, epsilon):
 28 |     """Returns an action selected by an epsilon-Greedy exploration policy
 29 | 
 30 |     Args:
 31 |         state_1, state_2 (int, int): two indices describing the current state
 32 |         q_func (np.ndarray): current Q-function
 33 |         epsilon (float): the probability of choosing a random command
 34 | 
 35 |     Returns:
 36 |         (int, int): the indices describing the action/object to take
 37 |     """
 38 |     # TODO Your code here
 39 |     
 40 |     # initial
 41 |     action_index, object_index = None, None
 42 | 
 43 |     res = np.random.choice(a = [0, 1], p = [epsilon, 1-epsilon])
 44 | 
 45 |     # optimal policy
 46 |     if res == 1:
 47 |         # fetch max argument index by np.unravel_index 
 48 |         ind = np.unravel_index(np.argmax(q_func[state_1, state_2], axis=None), q_func[state_1, state_2].shape)
 49 |         action_index = ind[0]
 50 |         object_index = ind[1]
 51 |     # random select action-object 
 52 |     if res == 0:
 53 |         action_index = np.random.randint(NUM_ACTIONS)
 54 |         object_index = np.random.randint(NUM_OBJECTS)
 55 | 
 56 |     return (action_index, object_index)
 57 | 
 58 | # pragma: coderesponse end
 59 | 
 60 | 
 61 | # pragma: coderesponse template
 62 | def tabular_q_learning(q_func, current_state_1, current_state_2, action_index,
 63 |                        object_index, reward, next_state_1, next_state_2,
 64 |                        terminal):
 65 |     """Update q_func for a given transition
 66 | 
 67 |     Args:
 68 |         q_func (np.ndarray): current Q-function
 69 |         current_state_1, current_state_2 (int, int): two indices describing the current state
 70 |         action_index (int): index of the current action
 71 |         object_index (int): index of the current object
 72 |         reward (float): the immediate reward the agent recieves from playing current command
 73 |         next_state_1, next_state_2 (int, int): two indices describing the next state
 74 |         terminal (bool): True if this epsiode is over
 75 | 
 76 |     Returns:
 77 |         None
 78 |     """
 79 |     # TODO Your code here
 80 |     # global update 
 81 |     # parameter value will be overwritten even outside the function 
 82 |     
 83 |     # q_func = np.zeros((NUM_ROOM_DESC, NUM_QUESTS, NUM_ACTIONS, NUM_OBJECTS))
 84 |     # should give some insight in what each dimension represents.  
 85 | 
 86 |     # TODO Your update here
 87 | 
 88 |     if terminal == False:   
 89 | 
 90 |         q_func[current_state_1, current_state_2, action_index, object_index] \
 91 |             =  (1-ALPHA)*q_func[current_state_1, current_state_2, action_index, object_index] \
 92 |                 + ALPHA*(reward + GAMMA*np.max(q_func[next_state_1, next_state_2]))
 93 | 
 94 |     # in terminal state there is nothing to update from the next state 
 95 |     # but quest the reward still count a state with discounted factor  
 96 |     else:
 97 |         q_func[current_state_1, current_state_2, action_index, object_index] \
 98 |             =  (1-ALPHA)*q_func[current_state_1, current_state_2, action_index, object_index] \
 99 |                 + ALPHA*reward
100 | 
101 |     return None  # This function shouldn't return anything
102 | 
103 | 
104 | # pragma: coderesponse end
105 | 
106 | 
107 | # pragma: coderesponse template
108 | def run_episode(for_training):
109 |     """ Runs one episode
110 |     If for training, update Q function
111 |     If for testing, computes and return cumulative discounted reward
112 | 
113 |     Args:
114 |         for_training (bool): True if for training
115 | 
116 |     Returns:
117 |         None
118 |     """
119 |     epsilon = TRAINING_EP if for_training else TESTING_EP
120 | 
121 |     # initialize for each episode
122 |     # TODO Your code here
123 | 
124 |     (current_room_desc, current_quest_desc, terminal) = framework.newGame()
125 |     
126 |     # initial value 
127 |     count = 0
128 |     epi_reward = 0
129 | 
130 |     while not terminal:
131 |         # Choose next action and execute
132 |         # TODO Your code here
133 | 
134 |         # recall index from dictionary by "description" key
135 |         current_state_1 = dict_room_desc[current_room_desc]
136 |         current_state_2 = dict_quest_desc[current_quest_desc]
137 | 
138 |         (action_index, object_index) = epsilon_greedy(current_state_1, current_state_2, q_func, epsilon)
139 | 
140 |         (next_room_desc, next_quest_desc, reward, terminal) \
141 |             = framework.step_game(current_room_desc, current_quest_desc, action_index, object_index) 
142 | 
143 |         next_state_1 = dict_room_desc[next_room_desc]
144 |         next_state_2 = dict_quest_desc[next_quest_desc]
145 | 
146 |         if for_training:
147 |             # update Q-function.
148 |             # TODO Your code here
149 |             tabular_q_learning(q_func, current_state_1, current_state_2, action_index, object_index, 
150 |             reward, next_state_1, next_state_2, terminal)
151 | 
152 | 
153 |         if not for_training:
154 |             # update reward
155 |             # TODO Your code here
156 |             epi_reward += np.power(GAMMA, count)*reward
157 | 
158 |         # prepare next step
159 |         # TODO Your code here
160 |         count += 1
161 |         current_room_desc = next_room_desc
162 |         current_quest_desc = next_quest_desc
163 | 
164 |     if not for_training:
165 |         return epi_reward
166 | 
167 | 
168 | # pragma: coderesponse end
169 | 
170 | 
171 | def run_epoch():
172 |     """Runs one epoch and returns reward averaged over test episodes"""
173 |     rewards = []
174 | 
175 |     for _ in range(NUM_EPIS_TRAIN):
176 |         run_episode(for_training=True)
177 | 
178 |     for _ in range(NUM_EPIS_TEST):
179 |         rewards.append(run_episode(for_training=False))
180 | 
181 |     return np.mean(np.array(rewards))
182 | 
183 | 
184 | def run():
185 |     """Returns array of test reward per epoch for one run"""
186 |     global q_func
187 |     q_func = np.zeros((NUM_ROOM_DESC, NUM_QUESTS, NUM_ACTIONS, NUM_OBJECTS))
188 | 
189 |     single_run_epoch_rewards_test = []
190 |     pbar = tqdm(range(NUM_EPOCHS), ncols=80)
191 |     for _ in pbar:
192 |         single_run_epoch_rewards_test.append(run_epoch())
193 |         pbar.set_description(
194 |             "Avg reward: {:0.6f} | Ewma reward: {:0.6f}".format(
195 |                 np.mean(single_run_epoch_rewards_test),
196 |                 utils.ewma(single_run_epoch_rewards_test)))
197 |     return single_run_epoch_rewards_test
198 | 
199 | 
200 | if __name__ == '__main__':
201 |     # Data loading and build the dictionaries that use unique index for each state
202 |     (dict_room_desc, dict_quest_desc) = framework.make_all_states_index()
203 |     NUM_ROOM_DESC = len(dict_room_desc)
204 |     NUM_QUESTS = len(dict_quest_desc)
205 | 
206 |     # set up the game
207 |     framework.load_game_data()
208 | 
209 |     epoch_rewards_test = []  # shape NUM_RUNS * NUM_EPOCHS
210 | 
211 |     for _ in range(NUM_RUNS):
212 |         epoch_rewards_test.append(run())
213 | 
214 |     epoch_rewards_test = np.array(epoch_rewards_test)
215 | 
216 |     x = np.arange(NUM_EPOCHS)
217 |     fig, axis = plt.subplots()
218 |     axis.plot(x, np.mean(epoch_rewards_test,
219 |                          axis=0))  # plot reward per epoch averaged per run
220 |     axis.set_xlabel('Epochs')
221 |     axis.set_ylabel('reward')
222 |     axis.set_title(('Tablular: nRuns=%d, Epilon=%.2f, Epi=%d, alpha=%.4f' %
223 |                     (NUM_RUNS, TRAINING_EP, NUM_EPIS_TRAIN, ALPHA)))
224 |     plt.show()
225 | 
226 | 
227 | 


--------------------------------------------------------------------------------
/P5/resources_rl/rl/framework.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | import random
  4 | 
  5 | DEBUG = False
  6 | DEFAULT_REWARD = -0.01 # Negative reward for each non-terminal step
  7 | JUNK_CMD_REWARD = -0.1 # Negative reward for invalid commands
  8 | QUEST_REWARD = 1 # positive reward for finishing quest
  9 | STEP_COUNT = 0  #count the number of steps in current episode
 10 | MAX_STEPS = 20
 11 | 
 12 | # --Simple quests
 13 | quests = ['You are bored.', 'You are getting fat.', 'You are hungry.','You are sleepy.']
 14 | quests_map = {}
 15 | # --(somewhat) complex quests
 16 | # -- quests = {'You are not sleepy but hungry.',
 17 | # --                     'You are not hungry but sleepy.',
 18 | # --                     'You are not getting fat but bored.',
 19 | # --                     'You are not bored but getting fat.'}
 20 | 
 21 | quest_actions = ['watch', 'exercise', 'eat', 'sleep'] #aligned to quests above
 22 | quest_objects = ['tv', 'bike', 'apple', 'bed'] #aligned to quest actions above
 23 | 
 24 | rooms = ['Living', 'Garden', 'Kitchen','Bedroom']
 25 | living_desc = ['This room has a couch, chairs and TV.',
 26 |           'You have entered the living room. You can watch TV here.',
 27 |           'This room has two sofas, chairs and a chandelier.',
 28 |           'A huge television that is great for watching games.']
 29 | garden_desc = ['This space has a swing, flowers and trees.',
 30 |           'You have arrived at the garden. You can exercise here',
 31 |           'This area has plants, grass and rabbits.',
 32 |           'A nice shiny bike that is fun to ride.',]
 33 | kitchen_desc = ['This room has a fridge, oven, and a sink.',
 34 |            'You have arrived in the kitchen. You can find food and drinks here.',
 35 |            'This living area has pizza, coke, and icecream.',
 36 |            'A red juicy fruit.']
 37 | bedroom_desc = ['This area has a bed, desk and a dresser.',
 38 |            'You have arrived in the bedroom. You can rest here.',
 39 |            'You see a wooden cot and a mattress on top of it.',
 40 |            'A nice, comfortable bed with pillows and sheets.']
 41 | rooms_desc = {'Living': living_desc, 'Garden': garden_desc, 'Kitchen': kitchen_desc, 'Bedroom': bedroom_desc}
 42 | rooms_desc_map = {}
 43 | 
 44 | 
 45 | actions = ['eat', 'sleep', 'watch', 'exercise', 'go']
 46 | objects = ['apple', 'bed', 'tv', 'bike', 'north','south','east','west']
 47 | 
 48 | living_valid_act = ['go', 'go', 'watch']
 49 | living_valid_obj = ['south', 'west', 'tv']
 50 | living_transit = ['Bedroom', 'Garden', 'Living']
 51 | garden_valid_act = ['go', 'go', 'exercise']
 52 | garden_valid_obj = ['south', 'east', 'bike']
 53 | garden_transit = ['Kitchen', 'Living', 'Garden']
 54 | kitchen_valid_act = ['go', 'go', 'eat']
 55 | kitchen_valid_obj = ['north', 'east', 'apple']
 56 | kitchen_transit = ['Garden', 'Bedroom', 'Kitchen']
 57 | bedroom_valid_act =['go', 'go', 'sleep']
 58 | bedroom_valid_obj =['north', 'west', 'bed']
 59 | bedroom_transit = ['Living', 'Kitchen', 'Bedroom']
 60 | 
 61 | rooms_valid_acts = {'Living': living_valid_act, 'Garden': garden_valid_act, 'Kitchen': kitchen_valid_act, 'Bedroom': bedroom_valid_act}
 62 | rooms_valid_objs = {'Living': living_valid_obj, 'Garden': garden_valid_obj, 'Kitchen': kitchen_valid_obj, 'Bedroom': bedroom_valid_obj}
 63 | rooms_transit = {'Living': living_transit, 'Garden': garden_transit, 'Kitchen': kitchen_transit, 'Bedroom': bedroom_transit}
 64 | 
 65 | NUM_ROOMS = len(rooms)
 66 | NUM_QUESTS = len(quests)
 67 | NUM_ACTIONS = len(actions)
 68 | NUM_OBJECTS = len(objects)
 69 | 
 70 | 
 71 | command_is_valid = np.zeros((NUM_ROOMS,NUM_ACTIONS,NUM_OBJECTS))
 72 | transit_matrix = np.zeros((NUM_ROOMS,NUM_ACTIONS,NUM_OBJECTS,NUM_ROOMS))
 73 | 
 74 | #build a map rooms_desc_map that maps a room description to the corresponding room index.
 75 | # A map quests_map that maps quest text to the quest index
 76 | def text_to_hidden_state_mapping():
 77 |     for i in range(NUM_ROOMS):
 78 |         room_name = rooms[i]
 79 |         for room_desc in rooms_desc[room_name]:
 80 |             rooms_desc_map[room_desc] = i
 81 | 
 82 |     for i in range(NUM_QUESTS):
 83 |         quest_text = quests[i]
 84 |         quests_map[quest_text] = i
 85 | 
 86 | 
 87 | def load_game_data():
 88 |     # each state:(room, quest), where "room" is a hidden state
 89 |     # observable state: (room description, quest)
 90 | 
 91 |     for room_name in rooms_valid_acts:
 92 | 
 93 |         room_index = rooms.index(room_name)
 94 |         valid_acts = rooms_valid_acts[room_name]
 95 |         valid_objs = rooms_valid_objs[room_name]
 96 |         transit = rooms_transit[room_name]
 97 | 
 98 |         for valid_index, act in enumerate(valid_acts):
 99 |             obj = valid_objs[valid_index]
100 |             act_index = actions.index(act)
101 |             obj_index = objects.index(obj)
102 |             # valid commands: A(h,(a,o))=1 if (a,o) is valid for hidden state h.
103 |             command_is_valid[room_index, act_index, obj_index] = 1;
104 | 
105 |             next_room_name = transit[valid_index]
106 |             next_room_index = rooms.index(next_room_name)
107 |             #deterministic transition
108 |             transit_matrix[room_index, act_index, obj_index, next_room_index] = 1;
109 | 
110 |     text_to_hidden_state_mapping()
111 | 
112 | 
113 | # take a step in the game
114 | def step_game(current_room_desc, current_quest_desc, action_index, object_index):
115 |     global STEP_COUNT
116 |     STEP_COUNT = STEP_COUNT+1
117 |     terminal = (STEP_COUNT >= MAX_STEPS)
118 |     #print('Step=%d' %(STEP_COUNT))
119 |     #print(terminal)
120 | 
121 |     # room_index: the hidden state.
122 |     current_room_index = rooms_desc_map[current_room_desc]
123 |     quest_index = quests_map[current_quest_desc]
124 | 
125 |     if (command_is_valid[current_room_index, action_index, object_index]==1):
126 |         # quest has been finished
127 |         if ((actions[action_index]==quest_actions[quest_index]) and (objects[object_index]==quest_objects[quest_index])):
128 |             terminal = True
129 |             reward = QUEST_REWARD
130 | 
131 |             if DEBUG:
132 |                 print('Finish quest: %s at Room %s with command %s %s' %(current_quest_desc, current_room_desc, actions[action_index],objects[object_index]))
133 | 
134 |         else:
135 |             reward = DEFAULT_REWARD
136 | 
137 |         # probability distribution of next room.
138 |         next_room_dist = transit_matrix[current_room_index, action_index, object_index, :]
139 |         next_room_index = np.random.choice(NUM_ROOMS, p=next_room_dist)
140 |         next_room_name = rooms[next_room_index]
141 |         next_room_desc_index = np.random.randint(len(rooms_desc[next_room_name]))
142 |         next_room_desc = rooms_desc[next_room_name][next_room_desc_index]
143 |         #if DEBUG:
144 |             #print('Reward: %1.3f' % (reward,))
145 |             #print('Transit to Room %d:%s. %s' %(next_room_index, rooms[next_room_index],rooms_desc[next_room_name][next_room_desc_index]))
146 | 
147 |     else:
148 |         # penalty for invalid command
149 |         reward = DEFAULT_REWARD + JUNK_CMD_REWARD
150 |         # state remains the same when invalid command executed
151 |         next_room_desc = current_room_desc
152 | 
153 |         # if DEBUG:
154 |         #     print('Invalid command!')
155 |         #     print('Reward: %1.3f' % (reward,))
156 |         #     print('Remain in Room %d:%s' %(next_room_index, rooms[next_room_index],))
157 | 
158 |     # quest remains the same during each episode
159 |     next_quest_desc = current_quest_desc
160 |     return (next_room_desc, next_quest_desc, reward, terminal)
161 | 
162 | # start a new game
163 | def newGame():
164 |     global STEP_COUNT
165 |     STEP_COUNT = 0
166 |     # random initial state: room_index + quest_index
167 |     room_index = np.random.randint(NUM_ROOMS)
168 |     room_name = rooms[room_index]
169 |     room_desc_index = np.random.randint(len(rooms_desc[room_name]))
170 |     room_desc = rooms_desc[room_name][room_desc_index]
171 | 
172 |     quest_index = np.random.randint(len(quests))
173 |     quest_desc = quests[quest_index]
174 | 
175 |     terminal = False
176 |     if DEBUG:
177 |         print('Start a new game')
178 |         print('Start Room %d: %s. %s' % (room_index, room_name, room_desc,))
179 |         print('Start quest: %s' % (quest_desc,))
180 | 
181 |     return (room_desc, quest_desc, terminal)
182 | 
183 | def get_actions():
184 |     return (actions)
185 | 
186 | def get_objects():
187 |     return (objects)
188 | 
189 | def make_all_states_index():
190 |     """
191 |     Returns tow dictionaries:
192 |     1: one for all unique room descriptions occur in the game
193 |     2: one for all unique quests in the game
194 |     """
195 |     dictionary_room_desc = {}
196 |     dictionary_quest_desc = {}
197 |     for room in rooms_desc:
198 |         for desc in rooms_desc[room]:
199 |             if desc not in dictionary_room_desc:
200 |                 dictionary_room_desc[desc] = len(dictionary_room_desc)
201 | 
202 |     for quest in quests:
203 |         if quest not in dictionary_quest_desc:
204 |             dictionary_quest_desc[quest] = len(dictionary_quest_desc)
205 | 
206 |     return (dictionary_room_desc, dictionary_quest_desc)
207 | 
208 | # def gameOver(room_index, quest_index, action_index, object_index):
209 | #     if (command_is_valid[room_index, action_index, object_index]==1):
210 | #         # quest has been finished
211 | #         if ((actions[action_index]==quest_actions[quest_index]) and (objects[object_index]==quest_objects[quest_index])):
212 | #             return (True)
213 | 
214 | #     return (False)
215 | 
216 | # def output_state(room_index, room_desc_index, quest_index):
217 | #     room_name = rooms[room_index]
218 | #     #print('Room: %s. %s.' %(room_name, rooms_desc[room_name][room_desc_index]))
219 | #     #print('Quest: %s' %(quests[quest_index]))
220 | #     room_desc = rooms_desc[room_name][room_desc_index]
221 | #     quest_desc = quests[quest_index]
222 | #     return (room_desc, quest_desc)
223 | 
224 | # def output_command(action_index, object_index):
225 | #     print('Command: %s %s' %(actions[action_index], objects[object_index]))
226 | 
227 | 
228 | # load_game_data()
229 | # reward_cnt = 0
230 | # step = 0
231 | # max_steps = 300
232 | # game_count = 0
233 | 
234 | # (current_room_desc, current_quest_desc, terminal) = newGame()
235 | 
236 | 
237 | # while step<max_steps:
238 | #     step = step +1
239 | 
240 | #     # pure random policy
241 | #     action_index = np.random.randint(NUM_ACTIONS)
242 | #     object_index = np.random.randint(NUM_OBJECTS)
243 | 
244 | #     if DEBUG:
245 | #         print('Step %d: %s %s with Command: %s %s' % (step, current_room_desc, current_quest_desc, actions[action_index], objects[object_index],))
246 | 
247 | #     (next_room_desc, next_quest_desc, reward, terminal) = step_game(current_room_desc, current_quest_desc, action_index, object_index)
248 | #     reward_cnt = reward_cnt + reward
249 | 
250 | #     if terminal:
251 | #         (current_room_desc, current_quest_desc, terminal) = newGame()
252 | #         game_count = game_count + 1
253 | #     else:
254 | #         current_room_desc = next_room_desc
255 | #         current_quest_desc = next_quest_desc
256 | 
257 | 
258 | # print('Finish %d games. Total reward %6.3f.' % (game_count, reward_cnt,))
259 | 


--------------------------------------------------------------------------------
/P5/resources_rl/rl/game.tsv:
--------------------------------------------------------------------------------
 1 | This room has a couch, chairs and TV.
 2 | You have entered the living room. You can watch TV here
 3 | This room has two sofas, chairs and a chandelier.
 4 | A huge television that is great for watching games.
 5 | This space has a swing, flowers and trees.
 6 | You have arrived at the garden. You can exercise here 
 7 | This area has plants, grass and rabbits.
 8 | A nice shiny bike that is fun to ride.
 9 | This room has a fridge, oven, and a sink.
10 | You have arrived in the kitchen. You can find food and drinks here.
11 | This living area has pizza, coke, and icecream. 
12 | A red juicy fruit.
13 | This area has a bed, desk and a dresser.
14 | You have arrived in the bedroom. You can rest here.
15 | You see a wooden cot and a mattress on top of it.
16 | A nice, comfortable bed with pillows and sheets.
17 | You are bored.
18 | You are getting fat.
19 | You are hungry.
20 | You are sleepy.
21 | 


--------------------------------------------------------------------------------
/P5/resources_rl/rl/utils.py:
--------------------------------------------------------------------------------
 1 | from string import punctuation, digits
 2 | import csv
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | import sys
 7 | 
 8 | if sys.version_info[0] < 3:
 9 |     PYTHON3 = False
10 | else:
11 |     PYTHON3 = True
12 | 
13 | def load_data(path_data):
14 |     """Return a dictionary for the state descriptions displayed to player"""
15 |     global PYTHON3
16 | 
17 |     data = []
18 |     if PYTHON3:
19 |         f_data = open(path_data, encoding="latin1")
20 |     else:
21 |         f_data = open(path_data)
22 | 
23 |     reader = csv.reader(f_data, delimiter='\t')
24 | 
25 |     for row in reader:
26 |     	data.append(row)
27 | 
28 |     f_data.close()
29 | 
30 |     return data
31 | 
32 | 
33 | def ewma(a, alpha=0.9):
34 |     """Computes the exponentially weighted moving average of a"""
35 |     b = np.array(a)
36 |     n = b.size
37 |     w0 = np.ones(n) * alpha
38 |     p = np.arange(n - 1, -1, -1)
39 |     return np.average(b, weights=w0 ** p)
40 | 
41 | 
42 | def extract_words(input_string):
43 |     """
44 |     Helper function for bag_of_words()
45 |     Inputs a text string
46 |     Returns a list of lowercase words in the string.
47 |     Punctuation and digits are separated out into their own words.
48 |     """
49 |     for c in punctuation + digits:
50 |         input_string = input_string.replace(c, ' ' + c + ' ')
51 |     return input_string.lower().split()
52 | 
53 | 
54 | def bag_of_words(texts):
55 |     """
56 |     Inputs a list of string descriptions
57 |     Returns a dictionary of unique unigrams occurring over the input
58 |     """
59 |     dictionary = {}  # maps word to unique index
60 |     for text in texts:
61 |         word_list = extract_words(text[0])
62 |         for word in word_list:
63 |             if word not in dictionary:
64 |                 dictionary[word] = len(dictionary)
65 |     return dictionary
66 | 
67 | 
68 | def extract_bow_feature_vector(state_desc, dictionary):
69 |     """
70 |     Inputs a string state description
71 |     Inputs the dictionary of words as given by bag_of_words
72 |     Returns the bag-of-words vector representation of the state
73 |     The returned vector is of dimension m, where m the total number of entries in the dictionary.
74 |     """
75 |     state_vector = np.zeros([len(dictionary)])
76 |     word_list = extract_words(state_desc)
77 |     for word in word_list:
78 |         if word in dictionary:
79 |             state_vector[dictionary[word]] += 1
80 | 
81 |     return state_vector
82 | 


--------------------------------------------------------------------------------
/P5/resources_rl_Abel.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P5/resources_rl_Abel.rar


--------------------------------------------------------------------------------
/P5/resources_rl_solution.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/figureedge/MITx-6.86x-Machine-Learning-with-Python/3791b31a1546025bdbd9692a33b45fe93ad8813f/P5/resources_rl_solution.tar.gz


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MITx-6.86x-Machine-Learning-with-Python
2 | 
3 | 


--------------------------------------------------------------------------------