├── .gitignore ├── Deep Learning.pdf ├── Machine Learning.pdf ├── README.md ├── doc ├── image │ ├── ex1_1.png │ ├── ex1_2.png │ ├── ex1_3.png │ ├── ex1_multi_1.png │ ├── ex2_1.png │ ├── ex2_2.png │ ├── ex2_LR_1.png │ ├── ex2_LR_2.png │ ├── ex3_1.png │ ├── ex5_1.png │ ├── ex5_2.png │ ├── ex5_3.png │ ├── ex6_1.png │ ├── ex6_2.png │ ├── ex6_3.png │ ├── ex7_1.png │ ├── ex7_2.png │ ├── ex7_3.png │ ├── ex7_4.png │ ├── ex7_5.png │ ├── ex7_6.png │ ├── ex7_7.png │ └── ex8_1.png └── pdf │ ├── ex1.pdf │ ├── ex2.pdf │ ├── ex3.pdf │ ├── ex4.pdf │ ├── ex5.pdf │ ├── ex6.pdf │ ├── ex7.pdf │ └── ex8.pdf ├── ex1 Linear Regression ├── Optional Exercises │ ├── computeCostMulti.py │ ├── ex1_multi.py │ ├── ex1data2.txt │ ├── featureNormalize.py │ ├── gradientDescentMulti.py │ └── normalEqn.py ├── computeCost.py ├── ex1.py ├── ex1data1.txt └── gradientDescent.py ├── ex2 Logistic Regression ├── Regularized logistic regression │ ├── costFunctionReg.py │ ├── ex2_reg.py │ ├── ex2data2.txt │ ├── fminunc_reg.py │ └── mapFeature.py ├── costFunction.py ├── ex2.py ├── ex2data1.txt ├── fminunc.py ├── plotData.py ├── plotDecisionBoundary.py ├── predict.py └── sigmoid.py ├── ex3 Multi-class Classification and Neural Networks ├── Neural Networks FP │ ├── ex3_nn.py │ ├── ex3weights.mat │ └── predict_nn_fp.py ├── displayData.py ├── ex3.py ├── ex3data1.mat ├── fminunc_lr.py ├── lrCostFunction.py ├── oneVsAll.py ├── predictOneVsAll.py └── sigmoid.py ├── ex4 Neural Networks Learning ├── Training_NN.py ├── checkNNGradients.py ├── computeNumericalGradient.py ├── debugInitializeWeights.py ├── displayData.py ├── ex4.py ├── ex4data1.mat ├── ex4weights.mat ├── nnCostFunction.py ├── predict_NN.py ├── randInitializeWeights.py ├── sigmoid.py └── sigmoidGradient.py ├── ex5 Regularized Linear Regression and Bias v.s Variance ├── ex5.py ├── ex5data1.mat ├── featureNormalize.py ├── learningCurve.py ├── linearRegCostFunction.py ├── plotFit.py ├── polyFeatures.py ├── trainLinearReg.py └── validationCurve.py ├── ex6 Support Vector Machines ├── Spam Classification │ ├── emailFeatures.py │ ├── ex6_spam.py │ ├── getVocabList.py │ └── processEmail.py ├── data │ ├── emailSample1.txt │ ├── emailSample2.txt │ ├── ex6data1.mat │ ├── ex6data2.mat │ ├── ex6data3.mat │ ├── spamSample1.txt │ ├── spamSample2.txt │ ├── spamTest.mat │ ├── spamTrain.mat │ └── vocab.txt ├── ex6.py ├── gaussianKernel.py └── plotData.py ├── ex7 K-means Clustering and Principal Component Analysis ├── computeCentroids.py ├── data │ ├── bird_small.mat │ ├── bird_small.png │ ├── ex7data1.mat │ ├── ex7data2.mat │ └── ex7faces.mat ├── displayData.py ├── drawLine.py ├── ex7.py ├── ex7_pca.py ├── featureNormalize.py ├── findClosestCentroids.py ├── kMeansInitCentroids.py ├── pca.py ├── plotDataPoints.py ├── plotProgresskMeans.py ├── projectData.py ├── recoverData.py └── runkMeans.py └── ex8 Anomaly Detection and Recommender ├── checkGradients.py ├── cofiCostFunc.py ├── computeNumericalGradient.py ├── data ├── ex8_movieParams.mat ├── ex8_movies.mat ├── ex8data1.mat ├── ex8data2.mat └── movie_ids.txt ├── estimateGaussian.py ├── ex8.py ├── ex8_cofi.py ├── fminunc_recommender.py ├── loadMovieList.py ├── multivariateGaussian.py ├── normalizeRatings.py ├── selectThreshold.py └── visualizeFit.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* 2 | venv/* 3 | __pycache__* -------------------------------------------------------------------------------- /Deep Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/Deep Learning.pdf -------------------------------------------------------------------------------- /Machine Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/Machine Learning.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ML-EX-Python 2 | These are Exercises for Coursera's MachineLearning (by Andrew Ng) by Python. 3 | 4 | ## Image of EX-example 5 | 6 | 7 | 8 | ### ex1 9 | #### ex1_1 Linear regression with one variable 10 | ![image_ex1_1](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex1_1.png) 11 |
12 |
13 |
14 | #### ex1_2 Visualizing J(θ) (Surface) 15 | ![image_ex1_2](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex1_2.png) 16 |
17 |
18 |
19 | #### ex1_2 Visualizing J(θ) (Contour) 20 | ![image_ex1_3](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex1_3.png) 21 |
22 |
23 |
24 | #### ex1_multi_1 Linear regression with multiple variables 25 | Convergence of gradient descent with an appropriate learning rate 26 | ![image_ex1_multi_1](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex1_multi_1.png) 27 | 28 | --- 29 | ### ex2 30 | #### ex2_1 Logistic Regression 31 | ![image_ex2_1](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex2_1.png) 32 |
33 |
34 |
35 | #### ex2_2 Logistic Regression 36 | Training data with decision boundary 37 | ![image_ex2_2](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex2_2.png) 38 |
39 |
40 |
41 | #### ex2_LR_1 Regularized Logistic Regression 42 | ![image_ex2_LR_1](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex2_LR_1.png) 43 |
44 |
45 |
46 | #### ex2_LR_2 Regularized Logistic Regression 47 | Training data with decision boundary (λ = 1) 48 | ![image_ex2_LR_2](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex2_LR_2.png) 49 | 50 | --- 51 | ### ex3 52 | #### ex3_1 Multi-class Classification(MNIST) 53 | ![image_ex3_1](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex3_1.png) 54 | 55 | 56 | --- 57 | ### ex5 58 | #### ex5_1 Polynomial Regression Fit 59 | ![image_ex5_1](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex5_1.png) 60 |
61 |
62 |
63 | #### ex5_2 Polynomial Regression Learning Curve 64 | ![image_ex5_2](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex5_2.png) 65 |
66 |
67 |
68 | #### ex5_3 Regularization and Bias/Variance 69 | ![image_ex5_3](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex5_3.png) 70 | 71 | 72 | --- 73 | ### ex6 74 | #### ex6_1 SVM Decision Boundary with C = 1 75 | ![image_ex6_1](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex6_1.png) 76 |
77 |
78 |
79 | #### ex6_2 SVM (Gaussian Kernel) Decision Boundary (Example Dataset 2) 80 | ![image_ex6_2](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex6_2.png) 81 |
82 |
83 |
84 | #### ex6_3 SVM (Gaussian Kernel) Decision Boundary (Example Dataset 3) 85 | ![image_ex6_3](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex6_3.png) 86 | 87 | 88 | --- 89 | ### ex7 90 | #### ex7_1 K-means on example dataset 91 | ![image_ex7_1](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex7_1.png) 92 |
93 |
94 |
95 | #### ex7_2 Original and reconstructed image (when using K-means to compress the image) 96 | ![image_ex7_2](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex7_2.png) 97 |
98 |
99 |
100 | #### ex7_3 PCA - Computed eigenvectors of the dataset 101 | ![image_ex7_3](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex7_3.png) 102 |
103 |
104 |
105 | #### ex7_4 The normalized and projected data after PCA 106 | ![image_ex7_4](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex7_4.png) 107 |
108 |
109 |
110 | #### ex7_5 Original images of faces and ones reconstructed from only the top 100 principal components 111 | ![image_ex7_5](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex7_5.png) 112 |
113 |
114 |
115 | #### ex7_6 PCA for visualization - 3D 116 | ![image_ex7_6](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex7_6.png) 117 |
118 |
119 |
120 | #### ex7_7 2D visualization produced using PCA 121 | ![image_ex7_7](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex7_7.png) 122 | 123 | 124 | --- 125 | ### ex8 126 | #### ex8_1 The classified anomalies 127 | ![image_ex8_1](https://github.com/X-21/ML-EX-Python/blob/master/doc/image/ex8_1.png) 128 |
-------------------------------------------------------------------------------- /doc/image/ex1_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex1_1.png -------------------------------------------------------------------------------- /doc/image/ex1_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex1_2.png -------------------------------------------------------------------------------- /doc/image/ex1_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex1_3.png -------------------------------------------------------------------------------- /doc/image/ex1_multi_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex1_multi_1.png -------------------------------------------------------------------------------- /doc/image/ex2_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex2_1.png -------------------------------------------------------------------------------- /doc/image/ex2_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex2_2.png -------------------------------------------------------------------------------- /doc/image/ex2_LR_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex2_LR_1.png -------------------------------------------------------------------------------- /doc/image/ex2_LR_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex2_LR_2.png -------------------------------------------------------------------------------- /doc/image/ex3_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex3_1.png -------------------------------------------------------------------------------- /doc/image/ex5_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex5_1.png -------------------------------------------------------------------------------- /doc/image/ex5_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex5_2.png -------------------------------------------------------------------------------- /doc/image/ex5_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex5_3.png -------------------------------------------------------------------------------- /doc/image/ex6_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex6_1.png -------------------------------------------------------------------------------- /doc/image/ex6_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex6_2.png -------------------------------------------------------------------------------- /doc/image/ex6_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex6_3.png -------------------------------------------------------------------------------- /doc/image/ex7_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex7_1.png -------------------------------------------------------------------------------- /doc/image/ex7_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex7_2.png -------------------------------------------------------------------------------- /doc/image/ex7_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex7_3.png -------------------------------------------------------------------------------- /doc/image/ex7_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex7_4.png -------------------------------------------------------------------------------- /doc/image/ex7_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex7_5.png -------------------------------------------------------------------------------- /doc/image/ex7_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex7_6.png -------------------------------------------------------------------------------- /doc/image/ex7_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex7_7.png -------------------------------------------------------------------------------- /doc/image/ex8_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/image/ex8_1.png -------------------------------------------------------------------------------- /doc/pdf/ex1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/pdf/ex1.pdf -------------------------------------------------------------------------------- /doc/pdf/ex2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/pdf/ex2.pdf -------------------------------------------------------------------------------- /doc/pdf/ex3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/pdf/ex3.pdf -------------------------------------------------------------------------------- /doc/pdf/ex4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/pdf/ex4.pdf -------------------------------------------------------------------------------- /doc/pdf/ex5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/pdf/ex5.pdf -------------------------------------------------------------------------------- /doc/pdf/ex6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/pdf/ex6.pdf -------------------------------------------------------------------------------- /doc/pdf/ex7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/pdf/ex7.pdf -------------------------------------------------------------------------------- /doc/pdf/ex8.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/doc/pdf/ex8.pdf -------------------------------------------------------------------------------- /ex1 Linear Regression/Optional Exercises/computeCostMulti.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def compute_cost_multi(x, y, theta): 8 | m = len(y) 9 | hypothesis = np.dot(x, theta) 10 | err = (hypothesis - y) ** 2 11 | return (1 / (2 * m)) * (np.sum(err)) 12 | -------------------------------------------------------------------------------- /ex1 Linear Regression/Optional Exercises/ex1_multi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from featureNormalize import feature_normalize 7 | from gradientDescentMulti import gradient_descent_multi 8 | from normalEqn import normal_eqn 9 | 10 | 11 | def pause_func(): 12 | print('Program paused. Press enter to continue.\n') 13 | while input() != '': 14 | pass 15 | 16 | 17 | def load_data(filename): 18 | data_load = np.loadtxt(filename, delimiter=",") 19 | return data_load 20 | 21 | 22 | if __name__ == '__main__': 23 | # ================ Part 1: Feature Normalization ================ 24 | print('Loading data ...\n') 25 | # Load Data 26 | data = load_data('ex1data2.txt') 27 | data = np.split(data, [2], axis=1) 28 | X = data[0] 29 | y = data[1] 30 | m = len(y) 31 | # Print out some data points 32 | print('First 10 examples from the dataset: \n') 33 | for i in range(10): 34 | print(' x = [%.0f %.0f], y = %.0f \n' % (X[i][0], X[i][1], y[i])) 35 | # pause_func() 36 | 37 | # Scale features and set them to zero mean 38 | print('Normalizing Features ...\n') 39 | X, mu, sigma = feature_normalize(X) 40 | # Add intercept term to X 41 | X = np.append(np.ones((m, 1)), X, axis=1) 42 | 43 | # ================ Part 2: Gradient Descent ================ 44 | print('Running gradient descent ...\n') 45 | # Number of iterations (loops) 46 | num_iters = 400 47 | # Try some other values of alpha 48 | alpha = 1 49 | theta = np.zeros((3, 1)) 50 | theta, J_history_0 = gradient_descent_multi(X, y, theta, alpha, num_iters) 51 | print('theta is \n', theta, '\n') 52 | 53 | alpha = 0.3 54 | theta = np.zeros((3, 1)) 55 | theta, J_history_1 = gradient_descent_multi(X, y, theta, alpha, num_iters) 56 | print('theta is \n', theta, '\n') 57 | 58 | alpha = 0.01 59 | theta = np.zeros((3, 1)) 60 | theta, J_history = gradient_descent_multi(X, y, theta, alpha, num_iters) 61 | print('theta is \n', theta, '\n') 62 | 63 | plt.ion() 64 | plt.figure() 65 | j_history_plt_x = np.linspace(1, num_iters, num_iters).reshape(400, 1) 66 | plt.plot(j_history_plt_x, J_history_0, "-r") 67 | plt.plot(j_history_plt_x, J_history_1, "-g") 68 | plt.plot(j_history_plt_x, J_history, "-b") 69 | plt.xlabel('Number of iterations') 70 | plt.ylabel('Cost J') 71 | plt.pause(0.5) 72 | plt.close() 73 | # Display gradient descent's result 74 | print('Theta computed from gradient descent: \n', theta, '\n') 75 | 76 | # Estimate the price of a 1650 sq-ft, 3 br house 77 | price = np.dot( 78 | np.array(([1, (1650 - mu[0]) / sigma[0], (3 - mu[1]) / sigma[1]])), 79 | theta) 80 | print('Predicted price of a 1650 sq-ft, 3 br house (using gradient descent):\n $%f\n' % price) 81 | print('Program paused. Press enter to continue.\n') 82 | # pause_func() 83 | 84 | # ================ Part 3: Normal Equations ================ 85 | print('Solving with normal equations...\n') 86 | data = load_data('ex1data2.txt') 87 | data = np.split(data, [2], axis=1) 88 | X = data[0] 89 | y = data[1] 90 | X = np.append(np.ones((m, 1)), X, axis=1) 91 | # Calculate the parameters from the normal equation 92 | theta = normal_eqn(X, y) 93 | # Display normal equation's result 94 | print('Theta computed from the normal equations: \n ', theta, '\n\n') 95 | # Estimate the price of a 1650 sq-ft, 3 br house 96 | price = np.dot( 97 | np.array(([1, 1650, 3])), 98 | theta) 99 | print('Predicted price of a 1650 sq-ft, 3 br house (using normal equations):\n $%f\n' % price) 100 | -------------------------------------------------------------------------------- /ex1 Linear Regression/Optional Exercises/ex1data2.txt: -------------------------------------------------------------------------------- 1 | 2104,3,399900 2 | 1600,3,329900 3 | 2400,3,369000 4 | 1416,2,232000 5 | 3000,4,539900 6 | 1985,4,299900 7 | 1534,3,314900 8 | 1427,3,198999 9 | 1380,3,212000 10 | 1494,3,242500 11 | 1940,4,239999 12 | 2000,3,347000 13 | 1890,3,329999 14 | 4478,5,699900 15 | 1268,3,259900 16 | 2300,4,449900 17 | 1320,2,299900 18 | 1236,3,199900 19 | 2609,4,499998 20 | 3031,4,599000 21 | 1767,3,252900 22 | 1888,2,255000 23 | 1604,3,242900 24 | 1962,4,259900 25 | 3890,3,573900 26 | 1100,3,249900 27 | 1458,3,464500 28 | 2526,3,469000 29 | 2200,3,475000 30 | 2637,3,299900 31 | 1839,2,349900 32 | 1000,1,169900 33 | 2040,4,314900 34 | 3137,3,579900 35 | 1811,4,285900 36 | 1437,3,249900 37 | 1239,3,229900 38 | 2132,4,345000 39 | 4215,4,549000 40 | 2162,4,287000 41 | 1664,2,368500 42 | 2238,3,329900 43 | 2567,4,314000 44 | 1200,3,299000 45 | 852,2,179900 46 | 1852,4,299900 47 | 1203,3,239500 48 | -------------------------------------------------------------------------------- /ex1 Linear Regression/Optional Exercises/featureNormalize.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def feature_normalize(x): 8 | x_norm = x.copy() 9 | # np.shape(x)[1] is the column of x 10 | mu = np.mean(x, axis=0) 11 | sigma = np.std(x, axis=0, ddof=1) 12 | for i in range(np.shape(x)[0]): 13 | x_norm[i] = (x[i] - mu) / sigma 14 | return x_norm, mu, sigma 15 | -------------------------------------------------------------------------------- /ex1 Linear Regression/Optional Exercises/gradientDescentMulti.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | from computeCostMulti import compute_cost_multi 6 | 7 | 8 | def gradient_descent_multi(x, y, theta, alpha, num_iters): 9 | m = len(y) 10 | j_history = np.zeros((num_iters, 1)) 11 | for i in range(num_iters): 12 | hypothesis = np.dot(x, theta) 13 | sub = hypothesis - y 14 | theta = theta - (alpha / m) * (np.dot(x.T, sub)) 15 | j_history[i] = compute_cost_multi(x, y, theta) 16 | return theta, j_history 17 | -------------------------------------------------------------------------------- /ex1 Linear Regression/Optional Exercises/normalEqn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def normal_eqn(x, y): 8 | temp_x = np.dot(x.T, x) 9 | matrix_x_inverse = np.mat(temp_x).I.getA() 10 | temp_x = np.dot(matrix_x_inverse, x.T) 11 | return np.dot(temp_x, y) 12 | -------------------------------------------------------------------------------- /ex1 Linear Regression/computeCost.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def compute_cost(x, y, theta): 8 | m = len(y) 9 | h_theta = np.dot(x, theta) 10 | err = h_theta - y 11 | err_sum = sum(err ** 2) 12 | j = err_sum / (2 * m) 13 | return j 14 | -------------------------------------------------------------------------------- /ex1 Linear Regression/ex1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from mpl_toolkits.mplot3d.axes3d import Axes3D as Axes3D 7 | 8 | from computeCost import compute_cost 9 | from gradientDescent import gradient_descent 10 | 11 | 12 | def pause_func(): 13 | print('Program paused. Press enter to continue.\n') 14 | while input() != '': 15 | pass 16 | 17 | 18 | def load_data(filename): 19 | data_load = np.loadtxt(filename, delimiter=",") 20 | return data_load 21 | 22 | 23 | def warm_up_exercise(): 24 | print(np.eye(5)) 25 | 26 | 27 | if __name__ == '__main__': 28 | # ==================== Part 1: Basic Function ==================== 29 | print('Running warmUpExercise ... \n') 30 | print('5x5 Identity Matrix: \n') 31 | warm_up_exercise() 32 | # pause_func() 33 | 34 | # ======================= Part 2: Plotting ======================= 35 | data = load_data("ex1data1.txt") 36 | X = data[:, 0] 37 | y = data[:, 1] 38 | m = len(y) 39 | # reshape that will convert vector into matrix 40 | X = X.reshape(m, 1) 41 | y = y.reshape(m, 1) 42 | 43 | plt.ion() 44 | plt.figure() 45 | plt.plot(X, y, 'rx', label="Training data") 46 | plt.xlabel("Population of City in 10,000s") 47 | plt.ylabel("Profit in $10,000s") 48 | # pause_func() 49 | 50 | # =================== Part 3: Cost and Gradient descent =================== 51 | X = np.append(np.ones((m, 1)), X, axis=1) # Add a column of ones to x 52 | theta = np.zeros((2, 1)) # initialize fitting parameters 53 | # Some gradient descent settings 54 | iterations = 1500 55 | alpha = 0.01 56 | print('\nTesting the cost function ...\n') 57 | # compute and display initial cost 58 | J = compute_cost(X, y, theta) 59 | print('With theta = [0 ; 0]\nCost computed = %f\n' % J[0]) 60 | print('Expected cost value (approx) 32.07\n') 61 | J = compute_cost(X, y, np.array(([-1], [2]))) 62 | print('\nWith theta = [-1 ; 2]\nCost computed = %f\n' % J[0]) 63 | print('Expected cost value (approx) 54.24\n') 64 | print('Program paused. Press enter to continue.\n') 65 | # pause_func() 66 | 67 | print('\nRunning Gradient Descent ...\n') 68 | # run gradient descent 69 | theta = gradient_descent(X, y, theta, alpha, iterations) 70 | # print theta to screen 71 | print('Theta found by gradient descent:\n') 72 | print(theta) 73 | print('Expected theta values (approx)\n') 74 | print(' -3.6303\n 1.1664\n\n') 75 | 76 | plt.plot(X[:, 1], np.dot(X, theta), '-', label="Linear regression") 77 | plt.legend() 78 | plt.pause(3) 79 | plt.close() 80 | 81 | predict1 = np.dot(np.array(([1, 3.5])), theta) 82 | print('For population = 35,000, we predict a profit of ', predict1 * 10000, '\n') 83 | predict2 = np.dot(np.array(([1, 7])), theta) 84 | print('For population = 70,000, we predict a profit of ', predict2 * 10000, '\n') 85 | print('Program paused. Press enter to continue.\n') 86 | # pause_func() 87 | 88 | # ============= Part 4: Visualizing J(theta_0, theta_1) ============= 89 | print('Visualizing J(theta_0, theta_1) ...\n') 90 | # Grid over which we will calculate J 91 | theta0_vals = np.linspace(-10, 10, 100) 92 | theta1_vals = np.linspace(-1, 4, 100) 93 | # initialize J_vals to a matrix of 0's 94 | J_vals = np.zeros((len(theta0_vals), len(theta1_vals))) 95 | 96 | # Fill out J_vals 97 | for i in range(len(theta0_vals)): 98 | for j in range(len(theta1_vals)): 99 | t = np.array(([theta0_vals[i]], [theta1_vals[j]])) 100 | J_vals[i, j] = compute_cost(X, y, t) 101 | 102 | # Because of the way meshgrids work in the surf command, we need to 103 | # transpose J_vals before calling surf, or else the axes will be flipped 104 | J_vals = J_vals.T 105 | 106 | # Surface plot 107 | surface_figure = plt.figure() 108 | ax = Axes3D(surface_figure) 109 | theta0_vals, theta1_vals = np.meshgrid(theta0_vals, theta1_vals) 110 | ax.plot_surface(theta0_vals, theta1_vals, J_vals, cmap="jet") 111 | plt.xlabel(r'$\theta_0$') 112 | plt.ylabel(r'$\theta_1$') 113 | plt.pause(3) 114 | plt.close() 115 | 116 | plt.figure() 117 | plt.contour(theta0_vals, theta1_vals, J_vals, np.logspace(-2, 3, 20)) 118 | plt.plot(theta[0], theta[1], "rx", markersize=20) 119 | plt.xlabel(r'$\theta_0$') 120 | plt.ylabel(r'$\theta_1$', rotation=0) 121 | plt.pause(3) 122 | plt.close() 123 | -------------------------------------------------------------------------------- /ex1 Linear Regression/ex1data1.txt: -------------------------------------------------------------------------------- 1 | 6.1101,17.592 2 | 5.5277,9.1302 3 | 8.5186,13.662 4 | 7.0032,11.854 5 | 5.8598,6.8233 6 | 8.3829,11.886 7 | 7.4764,4.3483 8 | 8.5781,12 9 | 6.4862,6.5987 10 | 5.0546,3.8166 11 | 5.7107,3.2522 12 | 14.164,15.505 13 | 5.734,3.1551 14 | 8.4084,7.2258 15 | 5.6407,0.71618 16 | 5.3794,3.5129 17 | 6.3654,5.3048 18 | 5.1301,0.56077 19 | 6.4296,3.6518 20 | 7.0708,5.3893 21 | 6.1891,3.1386 22 | 20.27,21.767 23 | 5.4901,4.263 24 | 6.3261,5.1875 25 | 5.5649,3.0825 26 | 18.945,22.638 27 | 12.828,13.501 28 | 10.957,7.0467 29 | 13.176,14.692 30 | 22.203,24.147 31 | 5.2524,-1.22 32 | 6.5894,5.9966 33 | 9.2482,12.134 34 | 5.8918,1.8495 35 | 8.2111,6.5426 36 | 7.9334,4.5623 37 | 8.0959,4.1164 38 | 5.6063,3.3928 39 | 12.836,10.117 40 | 6.3534,5.4974 41 | 5.4069,0.55657 42 | 6.8825,3.9115 43 | 11.708,5.3854 44 | 5.7737,2.4406 45 | 7.8247,6.7318 46 | 7.0931,1.0463 47 | 5.0702,5.1337 48 | 5.8014,1.844 49 | 11.7,8.0043 50 | 5.5416,1.0179 51 | 7.5402,6.7504 52 | 5.3077,1.8396 53 | 7.4239,4.2885 54 | 7.6031,4.9981 55 | 6.3328,1.4233 56 | 6.3589,-1.4211 57 | 6.2742,2.4756 58 | 5.6397,4.6042 59 | 9.3102,3.9624 60 | 9.4536,5.4141 61 | 8.8254,5.1694 62 | 5.1793,-0.74279 63 | 21.279,17.929 64 | 14.908,12.054 65 | 18.959,17.054 66 | 7.2182,4.8852 67 | 8.2951,5.7442 68 | 10.236,7.7754 69 | 5.4994,1.0173 70 | 20.341,20.992 71 | 10.136,6.6799 72 | 7.3345,4.0259 73 | 6.0062,1.2784 74 | 7.2259,3.3411 75 | 5.0269,-2.6807 76 | 6.5479,0.29678 77 | 7.5386,3.8845 78 | 5.0365,5.7014 79 | 10.274,6.7526 80 | 5.1077,2.0576 81 | 5.7292,0.47953 82 | 5.1884,0.20421 83 | 6.3557,0.67861 84 | 9.7687,7.5435 85 | 6.5159,5.3436 86 | 8.5172,4.2415 87 | 9.1802,6.7981 88 | 6.002,0.92695 89 | 5.5204,0.152 90 | 5.0594,2.8214 91 | 5.7077,1.8451 92 | 7.6366,4.2959 93 | 5.8707,7.2029 94 | 5.3054,1.9869 95 | 8.2934,0.14454 96 | 13.394,9.0551 97 | 5.4369,0.61705 98 | -------------------------------------------------------------------------------- /ex1 Linear Regression/gradientDescent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | from computeCost import compute_cost 6 | 7 | 8 | def gradient_descent(x, y, theta, alpha, iterations): 9 | m = len(y) 10 | j_history = [] 11 | while iterations: 12 | temp_a = np.dot(x, theta) - y 13 | theta = theta - (alpha / m) * np.dot(x.T, temp_a) 14 | j_history.append(compute_cost(x, y, theta)) 15 | iterations -= 1 16 | return theta 17 | -------------------------------------------------------------------------------- /ex2 Logistic Regression/Regularized logistic regression/costFunctionReg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import sys 6 | 7 | sys.path.append("../") 8 | from sigmoid import sigmoid 9 | 10 | 11 | def cost_function_reg(theta, x, y, reg_lambda): 12 | m = len(y) 13 | sub1 = np.dot((-1 * y).T, np.log(sigmoid(np.dot(x, theta)))) 14 | sub2 = np.dot((1 - y.T), np.log(1 - sigmoid(np.dot(x, theta)))) 15 | j = (1 / m) * np.sum(sub1 - sub2) 16 | j = j + (reg_lambda / (2 * m)) * np.sum((theta[1:]) ** 2) 17 | grad = (1 / m) * np.dot(x.T, (sigmoid(np.dot(x, theta)) - y)) 18 | grad = grad + (reg_lambda / m) * theta 19 | grad[0] = grad[0] - (reg_lambda / m) * theta[0] 20 | return j, grad 21 | -------------------------------------------------------------------------------- /ex2 Logistic Regression/Regularized logistic regression/ex2_reg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from mapFeature import map_feature 7 | from costFunctionReg import cost_function_reg 8 | from fminunc_reg import my_fminunc_reg 9 | import sys 10 | 11 | sys.path.append("../") 12 | from plotData import plot_data 13 | from plotDecisionBoundary import plot_decision_boundary 14 | from predict import predict 15 | 16 | 17 | def pause_func(): 18 | print('Program paused. Press enter to continue.\n') 19 | while input() != '': 20 | pass 21 | 22 | 23 | def load_data(filename): 24 | data_load = np.loadtxt(filename, delimiter=",") 25 | return data_load 26 | 27 | 28 | if __name__ == '__main__': 29 | data = load_data('ex2data2.txt') 30 | data = np.split(data, [2], axis=1) 31 | X = data[0] 32 | y = data[1] 33 | plot_data(X, y) 34 | plt.xlabel('Microchip Test 1') 35 | plt.ylabel('Microchip Test 2') 36 | plt.legend(["y = 1", "y = 0"]) 37 | plt.pause(1.5) 38 | plt.close() 39 | 40 | # =========== Part 1: Regularized Logistic Regression ============ 41 | X = map_feature(X[:, 0], X[:, 1]) 42 | # Initialize fitting parameters 43 | initial_theta = np.zeros((X.shape[1], 1)) 44 | # Set regularization parameter lambda to 1 45 | reg_lambda = 1 46 | # Compute and display initial cost and gradient for regularized logistic regression 47 | cost, grad = cost_function_reg(initial_theta, X, y, reg_lambda) 48 | print('Cost at initial theta (zeros): ', cost, '\nExpected cost (approx): 0.693\n') 49 | np.set_printoptions(suppress=True) 50 | print('Gradient at initial theta (zeros) - first five values only:\n', grad[0: 5]) 51 | print('\nExpected gradients (approx) - first five values only:\n 0.0085\n 0.0188\n 0.0001\n 0.0503\n 0.0115\n') 52 | print('\nProgram paused. Press enter to continue.\n') 53 | # pause_func() 54 | 55 | # Compute and display cost and gradient with all-ones theta and lambda = 10 56 | test_theta = np.ones((X.shape[1], 1)) 57 | cost, grad = cost_function_reg(test_theta, X, y, 10) 58 | print('Cost at test theta (with lambda = 10): ', cost, '\nExpected cost (approx): 3.16\n') 59 | np.set_printoptions(suppress=True) 60 | print('Gradient at test theta - first five values only:\n', grad[0: 5]) 61 | print('\nExpected gradients (approx) - first five values only:\n 0.3460\n 0.1614\n 0.1948\n 0.2269\n 0.0922\n') 62 | print('\nProgram paused. Press enter to continue.\n') 63 | # pause_func() 64 | 65 | # ============= Part 2: Regularization and Accuracies ============= 66 | reg_lambda = 1 67 | result = my_fminunc_reg(X, y, initial_theta, reg_lambda) 68 | theta = result["x"] 69 | # Plot Boundary 70 | plot_decision_boundary(theta, X, y) 71 | plt.xlabel('Microchip Test 1') 72 | plt.ylabel('Microchip Test 2') 73 | plt.legend() 74 | plt.title('lambda = %g' % reg_lambda) 75 | plt.pause(2) 76 | plt.close() 77 | # Compute accuracy on our training set 78 | p = predict(theta, X).reshape(118, 1) 79 | print('Train Accuracy: ', np.mean((p == y)) * 100) 80 | print('\nExpected accuracy (approx): 83.1\n') 81 | -------------------------------------------------------------------------------- /ex2 Logistic Regression/Regularized logistic regression/ex2data2.txt: -------------------------------------------------------------------------------- 1 | 0.051267,0.69956,1 2 | -0.092742,0.68494,1 3 | -0.21371,0.69225,1 4 | -0.375,0.50219,1 5 | -0.51325,0.46564,1 6 | -0.52477,0.2098,1 7 | -0.39804,0.034357,1 8 | -0.30588,-0.19225,1 9 | 0.016705,-0.40424,1 10 | 0.13191,-0.51389,1 11 | 0.38537,-0.56506,1 12 | 0.52938,-0.5212,1 13 | 0.63882,-0.24342,1 14 | 0.73675,-0.18494,1 15 | 0.54666,0.48757,1 16 | 0.322,0.5826,1 17 | 0.16647,0.53874,1 18 | -0.046659,0.81652,1 19 | -0.17339,0.69956,1 20 | -0.47869,0.63377,1 21 | -0.60541,0.59722,1 22 | -0.62846,0.33406,1 23 | -0.59389,0.005117,1 24 | -0.42108,-0.27266,1 25 | -0.11578,-0.39693,1 26 | 0.20104,-0.60161,1 27 | 0.46601,-0.53582,1 28 | 0.67339,-0.53582,1 29 | -0.13882,0.54605,1 30 | -0.29435,0.77997,1 31 | -0.26555,0.96272,1 32 | -0.16187,0.8019,1 33 | -0.17339,0.64839,1 34 | -0.28283,0.47295,1 35 | -0.36348,0.31213,1 36 | -0.30012,0.027047,1 37 | -0.23675,-0.21418,1 38 | -0.06394,-0.18494,1 39 | 0.062788,-0.16301,1 40 | 0.22984,-0.41155,1 41 | 0.2932,-0.2288,1 42 | 0.48329,-0.18494,1 43 | 0.64459,-0.14108,1 44 | 0.46025,0.012427,1 45 | 0.6273,0.15863,1 46 | 0.57546,0.26827,1 47 | 0.72523,0.44371,1 48 | 0.22408,0.52412,1 49 | 0.44297,0.67032,1 50 | 0.322,0.69225,1 51 | 0.13767,0.57529,1 52 | -0.0063364,0.39985,1 53 | -0.092742,0.55336,1 54 | -0.20795,0.35599,1 55 | -0.20795,0.17325,1 56 | -0.43836,0.21711,1 57 | -0.21947,-0.016813,1 58 | -0.13882,-0.27266,1 59 | 0.18376,0.93348,0 60 | 0.22408,0.77997,0 61 | 0.29896,0.61915,0 62 | 0.50634,0.75804,0 63 | 0.61578,0.7288,0 64 | 0.60426,0.59722,0 65 | 0.76555,0.50219,0 66 | 0.92684,0.3633,0 67 | 0.82316,0.27558,0 68 | 0.96141,0.085526,0 69 | 0.93836,0.012427,0 70 | 0.86348,-0.082602,0 71 | 0.89804,-0.20687,0 72 | 0.85196,-0.36769,0 73 | 0.82892,-0.5212,0 74 | 0.79435,-0.55775,0 75 | 0.59274,-0.7405,0 76 | 0.51786,-0.5943,0 77 | 0.46601,-0.41886,0 78 | 0.35081,-0.57968,0 79 | 0.28744,-0.76974,0 80 | 0.085829,-0.75512,0 81 | 0.14919,-0.57968,0 82 | -0.13306,-0.4481,0 83 | -0.40956,-0.41155,0 84 | -0.39228,-0.25804,0 85 | -0.74366,-0.25804,0 86 | -0.69758,0.041667,0 87 | -0.75518,0.2902,0 88 | -0.69758,0.68494,0 89 | -0.4038,0.70687,0 90 | -0.38076,0.91886,0 91 | -0.50749,0.90424,0 92 | -0.54781,0.70687,0 93 | 0.10311,0.77997,0 94 | 0.057028,0.91886,0 95 | -0.10426,0.99196,0 96 | -0.081221,1.1089,0 97 | 0.28744,1.087,0 98 | 0.39689,0.82383,0 99 | 0.63882,0.88962,0 100 | 0.82316,0.66301,0 101 | 0.67339,0.64108,0 102 | 1.0709,0.10015,0 103 | -0.046659,-0.57968,0 104 | -0.23675,-0.63816,0 105 | -0.15035,-0.36769,0 106 | -0.49021,-0.3019,0 107 | -0.46717,-0.13377,0 108 | -0.28859,-0.060673,0 109 | -0.61118,-0.067982,0 110 | -0.66302,-0.21418,0 111 | -0.59965,-0.41886,0 112 | -0.72638,-0.082602,0 113 | -0.83007,0.31213,0 114 | -0.72062,0.53874,0 115 | -0.59389,0.49488,0 116 | -0.48445,0.99927,0 117 | -0.0063364,0.99927,0 118 | 0.63265,-0.030612,0 119 | -------------------------------------------------------------------------------- /ex2 Logistic Regression/Regularized logistic regression/fminunc_reg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import scipy.optimize as sciopt 6 | 7 | 8 | def my_fminunc_reg(x, y, theta, reg_lambda): 9 | return sciopt.minimize(fun=cost_function, x0=theta, args=(x, y, reg_lambda), method="TNC", jac=gradient) 10 | 11 | 12 | def gradient(theta, x, y, reg_lambda): 13 | m, n = x.shape 14 | theta = theta.reshape((n, 1)) 15 | y = y.reshape((m, 1)) 16 | 17 | grad = (x.T.dot(sigmoid(np.dot(x, theta)) - y)) / m 18 | grad = grad + (reg_lambda / m) * theta 19 | grad[0] = grad[0] - (reg_lambda / m) * theta[0] 20 | 21 | return grad.flatten() 22 | 23 | 24 | def cost_function(theta, x, y, reg_lambda): 25 | m, n = x.shape 26 | theta = theta.reshape((n, 1)) 27 | y = y.reshape((m, 1)) 28 | 29 | s1 = np.log(sigmoid(np.dot(x, theta))) 30 | s2 = np.log(1 - sigmoid(np.dot(x, theta))) 31 | 32 | s1 = s1.reshape((m, 1)) 33 | s2 = s2.reshape((m, 1)) 34 | 35 | s = y * s1 + (1 - y) * s2 36 | j = -(np.sum(s)) / m 37 | j = j + (reg_lambda / (2 * m)) * np.sum((theta[1:]) ** 2) 38 | 39 | return j 40 | 41 | 42 | def sigmoid(x): 43 | return 1 / (1 + np.exp(-1 * x)) 44 | -------------------------------------------------------------------------------- /ex2 Logistic Regression/Regularized logistic regression/mapFeature.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def map_feature(x1, x2): 8 | # MAPFEATURE Feature mapping function to polynomial features 9 | # MAPFEATURE(X1, X2) maps the two input features 10 | # to quadratic features used in the regularization exercise. 11 | # Returns a new feature array with more features, comprising of 12 | # X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc.. 13 | # Inputs X1, X2 must be the same size 14 | 15 | degree = 6 16 | x1 = x1.reshape(x1.shape[0], 1) 17 | x2 = x2.reshape(x2.shape[0], 1) 18 | out = np.ones(x1.shape) 19 | for i in range(1, degree + 1): 20 | for j in range(i + 1): 21 | out = np.append(out, ((x1 ** (i - j)) * (x2 ** j)), axis=1) 22 | return out 23 | -------------------------------------------------------------------------------- /ex2 Logistic Regression/costFunction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | from sigmoid import sigmoid 6 | 7 | 8 | def cost_function(theta, x, y): 9 | m = len(y) 10 | sub1 = np.dot((-1 * y).T, np.log(sigmoid(np.dot(x, theta)))) 11 | sub2 = np.dot((1 - y.T), np.log(1 - sigmoid(np.dot(x, theta)))) 12 | j = (1 / m) * np.sum(sub1 - sub2) 13 | grad = (1 / m) * np.dot(x.T, (sigmoid(np.dot(x, theta)) - y)) 14 | return j, grad 15 | -------------------------------------------------------------------------------- /ex2 Logistic Regression/ex2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from plotData import plot_data 7 | from costFunction import cost_function 8 | from fminunc import my_fminunc 9 | from plotDecisionBoundary import plot_decision_boundary 10 | from sigmoid import sigmoid 11 | from predict import predict 12 | 13 | 14 | def pause_func(): 15 | print('Program paused. Press enter to continue.\n') 16 | while input() != '': 17 | pass 18 | 19 | 20 | def load_data(filename): 21 | data_load = np.loadtxt(filename, delimiter=",") 22 | return data_load 23 | 24 | 25 | if __name__ == '__main__': 26 | data = load_data('ex2data1.txt') 27 | data = np.split(data, [2], axis=1) 28 | X = data[0] 29 | y = data[1] 30 | 31 | # ==================== Part 1: Plotting ==================== 32 | print('Plotting data with + indicating (y = 1) examples and o indicating (y = 0) examples.\n') 33 | plot_data(X, y) 34 | plt.xlabel('Exam 1 score') 35 | plt.ylabel('Exam 2 score') 36 | plt.legend(["Admitted", "Not admitted"]) 37 | plt.pause(3) 38 | plt.close() 39 | print('Program paused. Press enter to continue.\n') 40 | # pause_func() 41 | 42 | # ============ Part 2: Compute Cost and Gradient ============ 43 | # Setup the data matrix appropriately, and add ones for the intercept term 44 | m, n = X.shape 45 | # Add intercept term to x and X_test 46 | X = np.append(np.ones((m, 1)), X, axis=1) 47 | # Initialize fitting parameters 48 | initial_theta = np.zeros((n + 1, 1)) 49 | # Compute and display initial cost and gradient 50 | cost, grad = cost_function(initial_theta, X, y) 51 | print('Cost at initial theta (zeros): \n', cost, '\nExpected cost (approx): 0.693\n') 52 | print('Gradient at initial theta (zeros): \n', grad, 53 | '\nExpected gradients (approx):\n -0.1000\n -12.0092\n -11.2628\n') 54 | # Compute and display cost and gradient with non-zero theta 55 | test_theta = np.array(([-24], [0.2], [0.2])) 56 | cost, grad = cost_function(test_theta, X, y) 57 | print('\nCost at test theta: \n', cost, '\nExpected cost (approx): 0.218\n') 58 | print('Gradient at test theta: \n', grad, '\nExpected gradients (approx):\n 0.043\n 2.566\n 2.647\n') 59 | print('\nProgram paused. Press enter to continue.\n') 60 | # pause_func() 61 | 62 | # ============= Part 3: Optimizing using fminunc ============= 63 | result = my_fminunc(X, y, initial_theta) 64 | theta = result["x"] 65 | # Print theta to screen 66 | print('Cost at theta found by fminunc: \n', result["fun"], '\nExpected cost (approx): 0.203\n') 67 | print('theta: \n', theta, '\nExpected theta (approx):\n -25.161\n 0.206\n 0.201\n') 68 | # Plot Boundary 69 | plot_decision_boundary(theta, X, y) 70 | plt.xlabel('Exam 1 score') 71 | plt.ylabel('Exam 2 score') 72 | # Legend, specific for the exercise 73 | plt.legend(loc='upper right') 74 | plt.pause(2) 75 | plt.close() 76 | print('\nProgram paused. Press enter to continue.\n') 77 | # pause_func() 78 | 79 | # ============== Part 4: Predict and Accuracies ============== 80 | prob = sigmoid(np.dot(np.array([1, 45, 85]), theta)) 81 | print('For a student with scores 45 and 85, we predict an admission probability of \n', prob) 82 | print('\nExpected value: 0.775 +/- 0.002\n\n') 83 | # Compute accuracy on our training set 84 | p = predict(theta, X).reshape(100, 1) 85 | print('Train Accuracy: ', np.mean((p == y)) * 100) 86 | print('\nExpected accuracy (approx): 89.0\n') 87 | -------------------------------------------------------------------------------- /ex2 Logistic Regression/ex2data1.txt: -------------------------------------------------------------------------------- 1 | 34.62365962451697,78.0246928153624,0 2 | 30.28671076822607,43.89499752400101,0 3 | 35.84740876993872,72.90219802708364,0 4 | 60.18259938620976,86.30855209546826,1 5 | 79.0327360507101,75.3443764369103,1 6 | 45.08327747668339,56.3163717815305,0 7 | 61.10666453684766,96.51142588489624,1 8 | 75.02474556738889,46.55401354116538,1 9 | 76.09878670226257,87.42056971926803,1 10 | 84.43281996120035,43.53339331072109,1 11 | 95.86155507093572,38.22527805795094,0 12 | 75.01365838958247,30.60326323428011,0 13 | 82.30705337399482,76.48196330235604,1 14 | 69.36458875970939,97.71869196188608,1 15 | 39.53833914367223,76.03681085115882,0 16 | 53.9710521485623,89.20735013750205,1 17 | 69.07014406283025,52.74046973016765,1 18 | 67.94685547711617,46.67857410673128,0 19 | 70.66150955499435,92.92713789364831,1 20 | 76.97878372747498,47.57596364975532,1 21 | 67.37202754570876,42.83843832029179,0 22 | 89.67677575072079,65.79936592745237,1 23 | 50.534788289883,48.85581152764205,0 24 | 34.21206097786789,44.20952859866288,0 25 | 77.9240914545704,68.9723599933059,1 26 | 62.27101367004632,69.95445795447587,1 27 | 80.1901807509566,44.82162893218353,1 28 | 93.114388797442,38.80067033713209,0 29 | 61.83020602312595,50.25610789244621,0 30 | 38.78580379679423,64.99568095539578,0 31 | 61.379289447425,72.80788731317097,1 32 | 85.40451939411645,57.05198397627122,1 33 | 52.10797973193984,63.12762376881715,0 34 | 52.04540476831827,69.43286012045222,1 35 | 40.23689373545111,71.16774802184875,0 36 | 54.63510555424817,52.21388588061123,0 37 | 33.91550010906887,98.86943574220611,0 38 | 64.17698887494485,80.90806058670817,1 39 | 74.78925295941542,41.57341522824434,0 40 | 34.1836400264419,75.2377203360134,0 41 | 83.90239366249155,56.30804621605327,1 42 | 51.54772026906181,46.85629026349976,0 43 | 94.44336776917852,65.56892160559052,1 44 | 82.36875375713919,40.61825515970618,0 45 | 51.04775177128865,45.82270145776001,0 46 | 62.22267576120188,52.06099194836679,0 47 | 77.19303492601364,70.45820000180959,1 48 | 97.77159928000232,86.7278223300282,1 49 | 62.07306379667647,96.76882412413983,1 50 | 91.56497449807442,88.69629254546599,1 51 | 79.94481794066932,74.16311935043758,1 52 | 99.2725269292572,60.99903099844988,1 53 | 90.54671411399852,43.39060180650027,1 54 | 34.52451385320009,60.39634245837173,0 55 | 50.2864961189907,49.80453881323059,0 56 | 49.58667721632031,59.80895099453265,0 57 | 97.64563396007767,68.86157272420604,1 58 | 32.57720016809309,95.59854761387875,0 59 | 74.24869136721598,69.82457122657193,1 60 | 71.79646205863379,78.45356224515052,1 61 | 75.3956114656803,85.75993667331619,1 62 | 35.28611281526193,47.02051394723416,0 63 | 56.25381749711624,39.26147251058019,0 64 | 30.05882244669796,49.59297386723685,0 65 | 44.66826172480893,66.45008614558913,0 66 | 66.56089447242954,41.09209807936973,0 67 | 40.45755098375164,97.53518548909936,1 68 | 49.07256321908844,51.88321182073966,0 69 | 80.27957401466998,92.11606081344084,1 70 | 66.74671856944039,60.99139402740988,1 71 | 32.72283304060323,43.30717306430063,0 72 | 64.0393204150601,78.03168802018232,1 73 | 72.34649422579923,96.22759296761404,1 74 | 60.45788573918959,73.09499809758037,1 75 | 58.84095621726802,75.85844831279042,1 76 | 99.82785779692128,72.36925193383885,1 77 | 47.26426910848174,88.47586499559782,1 78 | 50.45815980285988,75.80985952982456,1 79 | 60.45555629271532,42.50840943572217,0 80 | 82.22666157785568,42.71987853716458,0 81 | 88.9138964166533,69.80378889835472,1 82 | 94.83450672430196,45.69430680250754,1 83 | 67.31925746917527,66.58935317747915,1 84 | 57.23870631569862,59.51428198012956,1 85 | 80.36675600171273,90.96014789746954,1 86 | 68.46852178591112,85.59430710452014,1 87 | 42.0754545384731,78.84478600148043,0 88 | 75.47770200533905,90.42453899753964,1 89 | 78.63542434898018,96.64742716885644,1 90 | 52.34800398794107,60.76950525602592,0 91 | 94.09433112516793,77.15910509073893,1 92 | 90.44855097096364,87.50879176484702,1 93 | 55.48216114069585,35.57070347228866,0 94 | 74.49269241843041,84.84513684930135,1 95 | 89.84580670720979,45.35828361091658,1 96 | 83.48916274498238,48.38028579728175,1 97 | 42.2617008099817,87.10385094025457,1 98 | 99.31500880510394,68.77540947206617,1 99 | 55.34001756003703,64.9319380069486,1 100 | 74.77589300092767,89.52981289513276,1 101 | -------------------------------------------------------------------------------- /ex2 Logistic Regression/fminunc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import scipy.optimize as sciopt 6 | from sigmoid import sigmoid 7 | 8 | 9 | def my_fminunc(x, y, theta): 10 | return sciopt.minimize(fun=cost_function, x0=theta, args=(x, y), method="TNC", jac=gradient) 11 | 12 | 13 | def gradient(theta, x, y): 14 | m, n = x.shape 15 | theta = theta.reshape((n, 1)) 16 | y = y.reshape((m, 1)) 17 | 18 | grad = (x.T.dot(sigmoid(np.dot(x, theta)) - y)) / m 19 | 20 | return grad.flatten() 21 | 22 | 23 | def cost_function(theta, x, y): 24 | m, n = x.shape 25 | theta = theta.reshape((n, 1)) 26 | y = y.reshape((m, 1)) 27 | 28 | s1 = np.log(sigmoid(np.dot(x, theta))) 29 | s2 = np.log(1 - sigmoid(np.dot(x, theta))) 30 | 31 | s1 = s1.reshape((m, 1)) 32 | s2 = s2.reshape((m, 1)) 33 | 34 | s = y * s1 + (1 - y) * s2 35 | j = -(np.sum(s)) / m 36 | 37 | return j 38 | -------------------------------------------------------------------------------- /ex2 Logistic Regression/plotData.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | # import pandas as pd 9 | # from pandas import DataFrame 10 | 11 | 12 | def plot_data(x, y): 13 | # pd_data = DataFrame(data, columns=['Exam 1 score', 'Exam 2 score', 'y']) 14 | # pd_data_y_plus_1 = pd_data.loc[pd_data['y'] == 1.0][['Exam 1 score', 'Exam 2 score']] 15 | # pd_data_y_plus_0 = pd_data.loc[pd_data['y'] == 0.0][['Exam 1 score', 'Exam 2 score']] 16 | # np_data_y_plus_1 = np.array(pd_data_y_plus_1) 17 | # np_data_y_plus_0 = np.array(pd_data_y_plus_0) 18 | 19 | pos = np.where(y[:, 0] == 1.0) 20 | neg = np.where(y[:, 0] == 0.0) 21 | 22 | plt.ion() 23 | plt.figure() 24 | plt.scatter(x[pos, 0], x[pos, 1], marker="+") 25 | plt.scatter(x[neg, 0], x[neg, 1], marker="o") 26 | # plt.xlabel('Exam 1 score') 27 | # plt.ylabel('Exam 2 score') 28 | # plt.legend() 29 | # plt.pause(0.5) 30 | # plt.close() 31 | -------------------------------------------------------------------------------- /ex2 Logistic Regression/plotDecisionBoundary.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import sys 7 | 8 | sys.path.append("./Regularized logistic regression") 9 | from mapFeature import map_feature 10 | 11 | 12 | def plot_decision_boundary(theta, x, y): 13 | # Plot Data 14 | pos = np.where(y[:, 0] == 1.0) 15 | neg = np.where(y[:, 0] == 0.0) 16 | temp_x = x[:, [1, 2]] 17 | 18 | plt.ion() 19 | plt.figure() 20 | plt.scatter(temp_x[pos, 0], temp_x[pos, 1], marker="+", label="Admitted") 21 | plt.scatter(temp_x[neg, 0], temp_x[neg, 1], marker="o", label="Not admitted") 22 | 23 | m, n = x.shape 24 | theta = theta.reshape(theta.shape[0], 1) 25 | if n <= 3: 26 | # Only need 2 points to define a line, so choose two endpoints 27 | plot_x = np.array(([x[:, 1].min(), x[:, 1].max()])).reshape(2, 1) 28 | plot_y = ((-1 / theta[2]) * (theta[1] * plot_x + theta[0])).reshape(2, 1) 29 | # Plot, and adjust axes for better viewing 30 | plt.plot(plot_x[0], plot_y[0], 'rx', markersize=10) 31 | plt.plot(plot_x[1], plot_y[1], 'rx', markersize=10) 32 | plt.plot(plot_x, plot_y, '-', label="Linear regression") 33 | plt.axis([15, 120, 15, 120]) 34 | 35 | else: 36 | u = np.linspace(-1, 1.5, 50) 37 | v = np.linspace(-1, 1.5, 50) 38 | z = np.zeros((len(u), len(v))) 39 | 40 | for i in range(len(u)): 41 | for j in range(len(v)): 42 | z[i, j] = np.dot(map_feature(np.array([u[i]]), np.array([v[j]])), theta) 43 | plt.contour(u, v, z.T, [0]).collections[0].set_label("Decision boundary") 44 | -------------------------------------------------------------------------------- /ex2 Logistic Regression/predict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | from sigmoid import sigmoid 6 | 7 | 8 | def predict(theta, x): 9 | return np.floor(sigmoid(np.dot(x, theta)) + 0.5) 10 | -------------------------------------------------------------------------------- /ex2 Logistic Regression/sigmoid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | # import numpy as np 5 | from scipy.special import expit 6 | 7 | 8 | def sigmoid(x): 9 | # return 1 / (1 + np.exp(-1 * x)) 10 | return expit(x) 11 | -------------------------------------------------------------------------------- /ex3 Multi-class Classification and Neural Networks/Neural Networks FP/ex3_nn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | from scipy.io import loadmat 6 | from predict_nn_fp import predict 7 | 8 | import sys 9 | 10 | sys.path.append("../") 11 | from displayData import display_data 12 | 13 | 14 | def pause_func(): 15 | print('Program paused. Press enter to continue.\n') 16 | while input() != '': 17 | pass 18 | 19 | 20 | def load_mat_data(filename): 21 | return loadmat(filename) 22 | 23 | 24 | if __name__ == '__main__': 25 | # Setup the parameters you will use for this part of the exercise 26 | # 20x20 Input Imc c.reshape(2,8,order='F')= a.ravel(order='F')ages of Digits 27 | input_layer_size = 400 28 | # 25 hidden units 29 | hidden_layer_size = 25 30 | # 10 labels, from 1 to 10 31 | num_labels = 10 32 | 33 | # =========== Part 1: Loading and Visualizing Data ============= 34 | data = load_mat_data("../ex3data1.mat") 35 | X = data['X'] 36 | y = data['y'] 37 | m = len(y) 38 | # Load Training Data 39 | print('Loading and Visualizing Data ...\n') 40 | # Randomly select 100 data points to display 41 | shuffle_100_X = np.arange(0, m, 1, dtype=int) 42 | np.random.shuffle(shuffle_100_X) 43 | sel = X[shuffle_100_X[0:100], :] 44 | display_data(sel) 45 | print('Program paused. Press enter to continue.\n') 46 | # pause_func() 47 | 48 | # ================ Part 2: Loading Pameters ================ 49 | # In this part of the exercise, we load some pre-initialized neural network parameters. 50 | print('\nLoading Saved Neural Network Parameters ...\n') 51 | # Load the weights into variables Theta1 and Theta2 52 | theta1_2 = load_mat_data('ex3weights.mat') 53 | Theta1 = theta1_2['Theta1'] 54 | Theta2 = theta1_2['Theta2'] 55 | 56 | # ================= Part 3: Implement Predict ================= 57 | # After training the neural network, we would like to use it to predict 58 | # the labels. You will now implement the "predict" function to use the 59 | # neural network to predict the labels of the training set. This lets 60 | # you compute the training set accuracy. 61 | pred = predict(Theta1, Theta2, X) 62 | print('\nTraining Set Accuracy: \n', np.mean((pred == y) * 100)) 63 | print('Program paused. Press enter to continue.\n') 64 | # pause_func() 65 | 66 | rp = np.arange(0, m, 1, dtype=int) 67 | np.random.shuffle(rp) 68 | for i in range(m): 69 | print('\nDisplaying Example Image\n') 70 | display_data(X[rp[i], :].reshape(1, 400)) 71 | pred = predict(Theta1, Theta2, X[rp[i], :].reshape(1, 400)) 72 | print('\nNeural Network Prediction: %d (digit %d)\n' % (pred, np.mod(pred, 10))) 73 | print('Paused - press enter to continue, q to exit:\n') 74 | if input() == 'q': 75 | break 76 | -------------------------------------------------------------------------------- /ex3 Multi-class Classification and Neural Networks/Neural Networks FP/ex3weights.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex3 Multi-class Classification and Neural Networks/Neural Networks FP/ex3weights.mat -------------------------------------------------------------------------------- /ex3 Multi-class Classification and Neural Networks/Neural Networks FP/predict_nn_fp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import sys 6 | 7 | sys.path.append('../') 8 | from sigmoid import sigmoid 9 | 10 | 11 | def predict(theta1, theta2, x): 12 | m = x.shape[0] 13 | p = np.zeros((m, 1)) 14 | print('\nSize of x : ', x.shape) 15 | x = np.append(np.ones((m, 1)), x, axis=1) 16 | a2 = sigmoid(np.dot(theta1, x.T)) 17 | print('\nSize of a2 : ', a2.shape) 18 | a2 = np.append(np.ones((1, a2.shape[1])), a2, axis=0) 19 | a3 = sigmoid(np.dot(theta2, a2)) 20 | print('\nSize of a3 : ', a3.shape) 21 | for i in range(m): 22 | p[i] = np.argmax(a3[:, i]) 23 | return p + 1 24 | -------------------------------------------------------------------------------- /ex3 Multi-class Classification and Neural Networks/displayData.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def display_data(x, example_width=None): 9 | m, n = x.shape 10 | # Set example_width automatically if not passed in 11 | if not example_width: 12 | example_width = int(np.round(np.sqrt(n))) 13 | example_height = int((n / example_width)) 14 | 15 | # Compute number of items to display 16 | display_rows = int(np.floor(np.sqrt(m))) 17 | display_cols = int(np.ceil(m / display_rows)) 18 | 19 | # Between images padding 20 | pad = 1 21 | # Setup blank display 22 | display_array = - np.ones((pad + display_rows * (example_height + pad), 23 | pad + display_cols * (example_width + pad))) 24 | 25 | # Copy each example into a patch on the display array 26 | curr_ex = 0 27 | for j in range(display_rows): 28 | for i in range(display_cols): 29 | if curr_ex > m: 30 | break 31 | # Get the max value of the patch 32 | max_val = np.max(np.abs(x[curr_ex, :])) 33 | 34 | wait_set_temp = np.reshape(x[curr_ex, :], 35 | (example_height, example_width), order='F') / max_val 36 | height_min_temp = pad + (j - 0) * (example_height + pad) 37 | height_max_temp = height_min_temp + example_height 38 | width_min_temp = pad + (i - 0) * (example_width + pad) 39 | width_max_temp = width_min_temp + example_width 40 | display_array[height_min_temp:height_max_temp, width_min_temp:width_max_temp] = wait_set_temp 41 | curr_ex = curr_ex + 1 42 | if curr_ex > m: 43 | break 44 | plt.ion() 45 | plt.imshow(display_array, cmap="gray") # 选一个漂亮的颜色 46 | plt.pause(1) 47 | -------------------------------------------------------------------------------- /ex3 Multi-class Classification and Neural Networks/ex3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | from scipy.io import loadmat 6 | from displayData import display_data 7 | from lrCostFunction import lr_cost_function 8 | from oneVsAll import one_vs_all 9 | from predictOneVsAll import predict_one_vs_all 10 | 11 | 12 | def pause_func(): 13 | print('Program paused. Press enter to continue.\n') 14 | while input() != '': 15 | pass 16 | 17 | 18 | def load_mat_data(filename): 19 | return loadmat(filename) 20 | 21 | 22 | if __name__ == '__main__': 23 | # Setup the parameters you will use for this part of the exercise 24 | # 20x20 Input Imc c.reshape(2,8,order='F')= a.ravel(order='F')ages of Digits 25 | input_layer_size = 400 26 | # 10 labels, from 1 to 10 27 | num_labels = 10 28 | # =========== Part 1: Loading and Visualizing Data ============= 29 | data = load_mat_data("ex3data1.mat") 30 | X = data['X'] 31 | y = data['y'] 32 | m = len(y) 33 | # Load Training Data 34 | print('Loading and Visualizing Data ...\n') 35 | # Randomly select 100 data points to display 36 | shuffle_100_X = np.arange(0, m, 1, dtype=int) 37 | np.random.shuffle(shuffle_100_X) 38 | sel = X[shuffle_100_X[0:100], :] 39 | display_data(sel) 40 | print('Program paused. Press enter to continue.\n') 41 | # pause_func() 42 | 43 | # ============ Part 2a: Vectorize Logistic Regression ============ 44 | # Test case for lrCostFunction 45 | print('\nTesting lrCostFunction() with regularization') 46 | theta_t = np.array([[-2], [-1], [1], [2]]) 47 | X_t = np.append(np.ones((5, 1)), np.arange(1, 16).reshape(5, 3, order='F') / 10, axis=1) 48 | y_t = np.array([[1], [0], [1], [0], [1]]) 49 | lambda_t = 3 50 | J, grad = lr_cost_function(theta_t, X_t, y_t, lambda_t) 51 | print('\nCost: \n', J, '\nExpected cost: 2.534819\n') 52 | print('Gradients:\n', grad, '\nExpected gradients:\n', ' 0.146561\n -0.548558\n 0.724722\n 1.398003\n') 53 | print('Program paused. Press enter to continue.\n') 54 | # pause_func() 55 | # ============ Part 2b: One-vs-All Training ============ 56 | print('\nTraining One-vs-All Logistic Regression...\n') 57 | ova_lambda = 0.1 58 | all_theta = one_vs_all(X, y, num_labels, ova_lambda) 59 | print('Program paused. Press enter to continue.\n') 60 | # pause_func() 61 | 62 | # ================ Part 3: Predict for One-Vs-All ================ 63 | pred = predict_one_vs_all(all_theta, X) + 1 64 | print('\nTraining Set Accuracy: \n', np.mean((pred == y) * 100)) 65 | -------------------------------------------------------------------------------- /ex3 Multi-class Classification and Neural Networks/ex3data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex3 Multi-class Classification and Neural Networks/ex3data1.mat -------------------------------------------------------------------------------- /ex3 Multi-class Classification and Neural Networks/fminunc_lr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import scipy.optimize as sciopt 6 | from sigmoid import sigmoid 7 | 8 | 9 | def my_fminunc_lr(x, y, theta, lr_lambda): 10 | return sciopt.minimize(fun=cost_function, x0=theta, args=(x, y, lr_lambda), method="L-BFGS-B", jac=gradient) 11 | 12 | 13 | def gradient(theta, x, y, lr_lambda): 14 | m, n = x.shape 15 | theta = theta.reshape((n, 1)) 16 | y = y.reshape((m, 1)) 17 | 18 | grad = (x.T.dot(sigmoid(np.dot(x, theta)) - y)) / m 19 | grad = grad + (lr_lambda / m) * theta 20 | grad[0] = grad[0] - (lr_lambda / m) * theta[0] 21 | 22 | return grad.flatten() 23 | 24 | 25 | def cost_function(theta, x, y, lr_lambda): 26 | m, n = x.shape 27 | theta = theta.reshape((n, 1)) 28 | y = y.reshape((m, 1)) 29 | 30 | s1 = np.log(sigmoid(np.dot(x, theta))) 31 | s2 = np.log(1 - sigmoid(np.dot(x, theta))) 32 | 33 | s1 = s1.reshape((m, 1)) 34 | s2 = s2.reshape((m, 1)) 35 | 36 | s = y * s1 + (1 - y) * s2 37 | j = -(np.sum(s)) / m 38 | j = j + (lr_lambda / (2 * m)) * np.sum((theta[1:]) ** 2) 39 | 40 | return j 41 | -------------------------------------------------------------------------------- /ex3 Multi-class Classification and Neural Networks/lrCostFunction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | from sigmoid import sigmoid 6 | 7 | 8 | def lr_cost_function(theta, x, y, lr_lambda): 9 | m = len(y) 10 | sub1 = np.dot((-1 * y).T, np.log(sigmoid(np.dot(x, theta)))) 11 | sub2 = np.dot((1 - y.T), np.log(1 - sigmoid(np.dot(x, theta)))) 12 | j = (1 / m) * np.sum(sub1 - sub2) 13 | j = j + (lr_lambda / (2 * m)) * np.sum((theta[1:]) ** 2) 14 | grad = (1 / m) * np.dot(x.T, (sigmoid(np.dot(x, theta)) - y)) 15 | grad = grad + (lr_lambda / m) * theta 16 | grad[0] = grad[0] - (lr_lambda / m) * theta[0] 17 | return j, grad 18 | -------------------------------------------------------------------------------- /ex3 Multi-class Classification and Neural Networks/oneVsAll.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | from fminunc_lr import my_fminunc_lr 6 | 7 | 8 | def one_vs_all(x, y, num_labels, ova_lambda): 9 | m, n = x.shape 10 | all_theta = np.zeros((num_labels, n + 1)) 11 | x = np.append(np.ones((m, 1)), x, axis=1) 12 | for i in range(1, num_labels + 1): 13 | initial_theta = np.zeros((n + 1, 1)) 14 | y_temp = (y == i).astype(np.int32) 15 | result = my_fminunc_lr(x, y_temp, initial_theta, ova_lambda) 16 | print('\nIteration: %4d' % result['nit'], ' | Cost: ', result['fun']) 17 | all_theta[i - 1, :] = result['x'].T 18 | return all_theta 19 | -------------------------------------------------------------------------------- /ex3 Multi-class Classification and Neural Networks/predictOneVsAll.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def predict_one_vs_all(all_theta, x): 8 | m = x.shape[0] 9 | p = np.zeros((m, 1)) 10 | x = np.append(np.ones((m, 1)), x, axis=1) 11 | prob = np.dot(x, all_theta.T) 12 | for i in range(m): 13 | p[i] = np.argmax(prob[i, :]) 14 | return p 15 | -------------------------------------------------------------------------------- /ex3 Multi-class Classification and Neural Networks/sigmoid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | # import numpy as np 5 | from scipy.special import expit 6 | 7 | 8 | def sigmoid(x): 9 | # return 1 / (1 + np.exp(-1 * x)) 10 | return expit(x) 11 | -------------------------------------------------------------------------------- /ex4 Neural Networks Learning/Training_NN.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import scipy.optimize as sciopt 6 | from sigmoid import sigmoid 7 | from sigmoidGradient import sigmoid_gradient 8 | 9 | static_grad = np.arange(0) 10 | 11 | 12 | def training_nn_fmincg(nn_params, 13 | input_layer_size, 14 | hidden_layer_size, 15 | num_labels, 16 | x, y, nn_cost_lambda, 17 | maxiter=50): 18 | global static_grad 19 | static_grad = nn_params 20 | print('\n') 21 | return sciopt.minimize(fun=cost_function, x0=nn_params, 22 | args=(input_layer_size, hidden_layer_size, x, y, num_labels, nn_cost_lambda), 23 | method="CG", jac=gradient, options={"maxiter": maxiter, "disp": False}) 24 | 25 | 26 | def gradient(*args): 27 | return static_grad.flatten() 28 | 29 | 30 | def cost_function(nn_params, 31 | input_layer_size, 32 | hidden_layer_size, 33 | x, 34 | y, 35 | num_labels, 36 | nn_cost_lambda): 37 | # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices for our 2 layer neural network 38 | theta1 = np.reshape(nn_params[0:hidden_layer_size * (input_layer_size + 1)], 39 | (hidden_layer_size, input_layer_size + 1), order='F') 40 | theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], 41 | (num_labels, hidden_layer_size + 1), order='F') 42 | 43 | # Setup some useful variables 44 | m = x.shape[0] 45 | j = 0 46 | # Part 1: Feedforward the neural network and return the cost in the 47 | # variable J. After implementing Part 1, you can verify that your 48 | # cost function computation is correct by verifying the cost 49 | # computed in ex4.m 50 | 51 | # Part 2: Implement the backpropagation algorithm to compute the gradients 52 | Delta1 = np.zeros(theta1.shape) 53 | Delta2 = np.zeros(theta2.shape) 54 | x = np.append(np.ones((m, 1)), x, axis=1) 55 | for i in range(m): 56 | # i-th training example info 57 | cur_x = x[i, :].T 58 | cur_y = ((np.arange(num_labels) + 1) == y[i]).T * 1 59 | # calculate hypothesis function (feedforward) 60 | a1 = cur_x 61 | z2 = np.dot(theta1, a1) 62 | a2 = np.append(np.array([1]), sigmoid(z2), axis=0) 63 | z3 = np.dot(theta2, a2) 64 | a3 = sigmoid(z3) # = hypothesis function 65 | 66 | # backpropagation 67 | delta3 = a3 - cur_y 68 | delta2 = np.dot(theta2.T, delta3)[1:] * sigmoid_gradient(z2) 69 | 70 | Delta1 = Delta1 + np.dot(delta2.reshape((delta2.shape[0], 1)), a1.reshape((1, a1.shape[0]))) 71 | Delta2 = Delta2 + np.dot(delta3.reshape((delta3.shape[0], 1)), a2.reshape((1, a2.shape[0]))) 72 | 73 | # calculate the cost of this training example 74 | j = j + np.sum(((-cur_y) * np.log(a3)) - ((1 - cur_y) * np.log(1 - a3))) 75 | j = j / m 76 | theta1_grad = Delta1 / m 77 | theta2_grad = Delta2 / m 78 | # Regularization 79 | j = j + (nn_cost_lambda / (2 * m)) * \ 80 | (np.sum(theta1[:, 1:input_layer_size + 1] ** 2) + np.sum(theta2[:, 1:hidden_layer_size + 1] ** 2)) 81 | 82 | theta1_grad += nn_cost_lambda / m * np.append(np.zeros((theta1.shape[0], 1)), theta1[:, 1:], axis=1) 83 | theta2_grad += nn_cost_lambda / m * np.append(np.zeros((theta2.shape[0], 1)), theta2[:, 1:], axis=1) 84 | # Unroll gradients 85 | global static_grad 86 | static_grad = np.append(np.ravel(theta1_grad, order='F'), np.ravel(theta2_grad, order='F')) 87 | print('\rCost:%f ' % j, end='') 88 | return j 89 | -------------------------------------------------------------------------------- /ex4 Neural Networks Learning/checkNNGradients.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | from debugInitializeWeights import debug_initialize_weights 7 | from nnCostFunction import nn_cost_function 8 | from computeNumericalGradient import compute_numerical_gradient 9 | 10 | 11 | def check_nn_gradients(check_nn_lambda=0): 12 | input_layer_size = 3 13 | hidden_layer_size = 5 14 | num_labels = 3 15 | m = 5 16 | # We generate some 'random' test data 17 | theta1 = debug_initialize_weights(hidden_layer_size, input_layer_size) 18 | theta2 = debug_initialize_weights(num_labels, hidden_layer_size) 19 | # Reusing debugInitializeWeights to generate X 20 | x = debug_initialize_weights(m, input_layer_size - 1) 21 | y = 1 + np.mod(np.arange(m) + 1, num_labels) 22 | # Unroll parameters 23 | nn_params = np.append(np.ravel(theta1, order='F'), np.ravel(theta2, order='F')) 24 | cost, grad = nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, x, y, check_nn_lambda) 25 | numgrad = compute_numerical_gradient(nn_cost_function, nn_params, 26 | input_layer_size, hidden_layer_size, num_labels, x, y, check_nn_lambda) 27 | # Visually examine the two gradient computations. The two columns 28 | # you get should be very similar. 29 | print(np.append(numgrad.reshape(numgrad.size, 1), grad.reshape(grad.size, 1), axis=1)) 30 | print('The above two columns you get should be very similar.\n' + 31 | '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n') 32 | diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad) 33 | print('If your backpropagation implementation is correct, then \n' + 34 | 'the relative difference will be small (less than 1e-9). \n' + 35 | '\nRelative Difference: %e\n' % diff) 36 | -------------------------------------------------------------------------------- /ex4 Neural Networks Learning/computeNumericalGradient.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | 7 | 8 | def compute_numerical_gradient(j, theta, *args): 9 | numgrad = np.zeros(theta.shape) 10 | perturb = np.zeros(theta.shape) 11 | e = 1e-4 12 | for p in range(theta.size): 13 | # Set perturbation vector 14 | perturb[p] = e 15 | loss1 = j(theta - perturb, args[0], args[1], args[2], args[3], args[4], args[5])[0] 16 | loss2 = j(theta + perturb, args[0], args[1], args[2], args[3], args[4], args[5])[0] 17 | # Compute Numerical Gradient 18 | numgrad[p] = (loss2 - loss1) / (2 * e) 19 | perturb[p] = 0 20 | return numgrad 21 | -------------------------------------------------------------------------------- /ex4 Neural Networks Learning/debugInitializeWeights.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | 7 | 8 | def debug_initialize_weights(fan_out, fan_in): 9 | # Set W to zeros 10 | w = np.zeros((fan_out, 1 + fan_in)) 11 | # Initialize W using "sin", this ensures that W is always of the same 12 | # values and will be useful for debugging 13 | return np.reshape(np.sin(np.arange(w.size) + 1), w.shape, order='F') / 10 14 | -------------------------------------------------------------------------------- /ex4 Neural Networks Learning/displayData.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def display_data(x, example_width=None): 9 | m, n = x.shape 10 | # Set example_width automatically if not passed in 11 | if not example_width: 12 | example_width = int(np.round(np.sqrt(n))) 13 | example_height = int((n / example_width)) 14 | 15 | # Compute number of items to display 16 | display_rows = int(np.floor(np.sqrt(m))) 17 | display_cols = int(np.ceil(m / display_rows)) 18 | 19 | # Between images padding 20 | pad = 1 21 | # Setup blank display 22 | display_array = - np.ones((pad + display_rows * (example_height + pad), 23 | pad + display_cols * (example_width + pad))) 24 | 25 | # Copy each example into a patch on the display array 26 | curr_ex = 0 27 | for j in range(display_rows): 28 | for i in range(display_cols): 29 | if curr_ex > m: 30 | break 31 | # Get the max value of the patch 32 | max_val = np.max(np.abs(x[curr_ex, :])) 33 | 34 | wait_set_temp = np.reshape(x[curr_ex, :], 35 | (example_height, example_width), order='F') / max_val 36 | height_min_temp = pad + (j - 0) * (example_height + pad) 37 | height_max_temp = height_min_temp + example_height 38 | width_min_temp = pad + (i - 0) * (example_width + pad) 39 | width_max_temp = width_min_temp + example_width 40 | display_array[height_min_temp:height_max_temp, width_min_temp:width_max_temp] = wait_set_temp 41 | curr_ex = curr_ex + 1 42 | if curr_ex > m: 43 | break 44 | plt.ion() 45 | plt.imshow(display_array, cmap="gray") # 选一个漂亮的颜色 46 | plt.pause(1) 47 | -------------------------------------------------------------------------------- /ex4 Neural Networks Learning/ex4.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | from scipy.io import loadmat 6 | from nnCostFunction import nn_cost_function 7 | from displayData import display_data 8 | from sigmoidGradient import sigmoid_gradient 9 | from randInitializeWeights import rand_initialize_weights 10 | from contextlib import contextmanager 11 | from checkNNGradients import check_nn_gradients 12 | from Training_NN import training_nn_fmincg 13 | from predict_NN import predict 14 | 15 | 16 | def pause_func(): 17 | print('Program paused. Press enter to continue.\n') 18 | while input() != '': 19 | pass 20 | 21 | 22 | def load_mat_data(filename): 23 | return loadmat(filename) 24 | 25 | 26 | @contextmanager 27 | def precision_print(precision=3): 28 | original_options = np.get_printoptions() 29 | np.set_printoptions(precision=precision, suppress=True) 30 | try: 31 | yield 32 | finally: 33 | np.set_printoptions(**original_options) 34 | 35 | 36 | if __name__ == '__main__': 37 | # Setup the parameters you will use for this part of the exercise 38 | # 20x20 Input Imc c.reshape(2,8,order='F')= a.ravel(order='F')ages of Digits 39 | input_layer_size = 400 40 | # 25 hidden units 41 | hidden_layer_size = 25 42 | # 10 labels, from 1 to 10 43 | num_labels = 10 44 | # =========== Part 1: Loading and Visualizing Data ============= 45 | data = load_mat_data("ex4data1.mat") 46 | X = data['X'] 47 | y = data['y'] 48 | m = len(y) 49 | # Load Training Data 50 | print('Loading and Visualizing Data ...\n') 51 | # Randomly select 100 data points to display 52 | shuffle_100_X = np.arange(0, m, 1, dtype=int) 53 | np.random.shuffle(shuffle_100_X) 54 | sel = X[shuffle_100_X[0:100], :] 55 | display_data(sel) 56 | print('Program paused. Press enter to continue.\n') 57 | # pause_func() 58 | 59 | # ================ Part 2: Loading Parameters ================ 60 | # In this part of the exercise, we load some pre-initialized neural network parameters. 61 | print('\nLoading Saved Neural Network Parameters ...\n') 62 | # Load the weights into variables Theta1 and Theta2 63 | theta1_2 = load_mat_data('ex4weights.mat') 64 | Theta1 = theta1_2['Theta1'] 65 | Theta2 = theta1_2['Theta2'] 66 | nn_params = np.append(np.ravel(Theta1, order='F'), np.ravel(Theta2, order='F'), axis=0) 67 | 68 | # ================ Part 3: Compute Cost (Feedforward) ================ 69 | print('\nFeedforward Using Neural Network ...\n') 70 | # Weight regularization parameter (we set this to 0 here). 71 | nn_cost_lambda = 0 72 | J = nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, nn_cost_lambda)[0] 73 | print('Cost at parameters (loaded from ex4weights): ', J, ' \n(this value should be about 0.287629)\n') 74 | print('Program paused. Press enter to continue.\n') 75 | # pause_func() 76 | 77 | # =============== Part 4: Implement Regularization =============== 78 | # Once your cost function implementation is correct, you should now 79 | # continue to implement the regularization with the cost. 80 | print('\nChecking Cost Function (w/ Regularization) ... \n') 81 | # Weight regularization parameter (we set this to 1 here). 82 | nn_cost_lambda = 1 83 | J = nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, nn_cost_lambda)[0] 84 | print('Cost at parameters (loaded from ex4weights): ', J, '\n(this value should be about 0.383770)\n') 85 | print('Program paused. Press enter to continue.\n') 86 | # pause_func() 87 | 88 | # ================ Part 5: Sigmoid Gradient ================ 89 | print('\nEvaluating sigmoid gradient...\n') 90 | g = sigmoid_gradient(np.array(([-1, -0.5, 0, 0.5, 1]))) 91 | with precision_print(precision=6): 92 | print('Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:\n %s \n\n' % g) 93 | print('Program paused. Press enter to continue.\n') 94 | # pause_func() 95 | 96 | # ================ Part 6: Initializing Pameters ================ 97 | print('\nInitializing Neural Network Parameters ...\n') 98 | initial_Theta1 = rand_initialize_weights(input_layer_size, hidden_layer_size) 99 | initial_Theta2 = rand_initialize_weights(hidden_layer_size, num_labels) 100 | # Unroll parameters 101 | initial_nn_params = np.append(np.ravel(initial_Theta1, order='F'), np.ravel(initial_Theta2, order='F')) 102 | 103 | # =============== Part 7: Implement Backpropagation =============== 104 | print('\nChecking Backpropagation... \n') 105 | # Check gradients by running checkNNGradients 106 | check_nn_gradients() 107 | print('Program paused. Press enter to continue.\n') 108 | # pause_func() 109 | 110 | # =============== Part 8: Implement Regularization =============== 111 | print('\nChecking Backpropagation (w/ Regularization) ... \n') 112 | # Check gradients by running checkNNGradients 113 | check_nn_lambda = 3 114 | check_nn_gradients(check_nn_lambda) 115 | # Also output the costFunction debugging values 116 | debug_J = nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, check_nn_lambda)[0] 117 | print('\n\nCost at (fixed) debugging parameters (w/ lambda = %d): %f ' % (check_nn_lambda, debug_J)) 118 | print('\n(for lambda = 3, this value should be about 0.576051)\n\n') 119 | print('Program paused. Press enter to continue.\n') 120 | # pause_func() 121 | 122 | # =================== Part 8: Training NN =================== 123 | print('\nTraining Neural Network... \n') 124 | training_nn_fmincg_lambda = 1 125 | result = training_nn_fmincg(initial_nn_params, input_layer_size, hidden_layer_size, num_labels, 126 | X, y, training_nn_fmincg_lambda, maxiter=50) 127 | theta1 = np.reshape(result['x'][0:hidden_layer_size * (input_layer_size + 1)], 128 | (hidden_layer_size, input_layer_size + 1), order='F') 129 | theta2 = np.reshape(result['x'][hidden_layer_size * (input_layer_size + 1):], 130 | (num_labels, hidden_layer_size + 1), order='F') 131 | print('Program paused. Press enter to continue.\n') 132 | # pause_func() 133 | 134 | # ================= Part 9: Visualize Weights ================= 135 | print('\nVisualizing Neural Network... \n') 136 | display_data(theta1[:, 1:]) 137 | print('Program paused. Press enter to continue.\n') 138 | # pause_func() 139 | 140 | # ================= Part 10: Implement Predict ================= 141 | pred = predict(Theta1, Theta2, X) 142 | print('\nTraining Set Accuracy: \n', np.mean((pred == y) * 100)) 143 | -------------------------------------------------------------------------------- /ex4 Neural Networks Learning/ex4data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex4 Neural Networks Learning/ex4data1.mat -------------------------------------------------------------------------------- /ex4 Neural Networks Learning/ex4weights.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex4 Neural Networks Learning/ex4weights.mat -------------------------------------------------------------------------------- /ex4 Neural Networks Learning/nnCostFunction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | from sigmoid import sigmoid 6 | from sigmoidGradient import sigmoid_gradient 7 | 8 | 9 | def nn_cost_function(nn_params, 10 | input_layer_size, 11 | hidden_layer_size, 12 | num_labels, 13 | x, y, nn_cost_lambda): 14 | # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices for our 2 layer neural network 15 | theta1 = np.reshape(nn_params[0:hidden_layer_size * (input_layer_size + 1)], 16 | (hidden_layer_size, input_layer_size + 1), order='F') 17 | theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], 18 | (num_labels, hidden_layer_size + 1), order='F') 19 | 20 | # Setup some useful variables 21 | m = x.shape[0] 22 | j = 0 23 | # Part 1: Feedforward the neural network and return the cost in the 24 | # variable J. After implementing Part 1, you can verify that your 25 | # cost function computation is correct by verifying the cost 26 | # computed in ex4.m 27 | 28 | # Part 2: Implement the backpropagation algorithm to compute the gradients 29 | Delta1 = np.zeros(theta1.shape) 30 | Delta2 = np.zeros(theta2.shape) 31 | x = np.append(np.ones((m, 1)), x, axis=1) 32 | for i in range(m): 33 | # i-th training example info 34 | cur_x = x[i, :].T 35 | cur_y = ((np.arange(num_labels) + 1) == y[i]).T * 1 36 | # calculate hypothesis function (feedforward) 37 | a1 = cur_x 38 | z2 = np.dot(theta1, a1) 39 | a2 = np.append(np.array([1]), sigmoid(z2), axis=0) 40 | z3 = np.dot(theta2, a2) 41 | a3 = sigmoid(z3) # = hypothesis function 42 | 43 | # backpropagation 44 | delta3 = a3 - cur_y 45 | delta2 = np.dot(theta2.T, delta3)[1:] * sigmoid_gradient(z2) 46 | 47 | Delta1 = Delta1 + np.dot(delta2.reshape((delta2.shape[0], 1)), a1.reshape((1, a1.shape[0]))) 48 | Delta2 = Delta2 + np.dot(delta3.reshape((delta3.shape[0], 1)), a2.reshape((1, a2.shape[0]))) 49 | 50 | # calculate the cost of this training example 51 | j = j + np.sum(((-cur_y) * np.log(a3)) - ((1 - cur_y) * np.log(1 - a3))) 52 | j = j / m 53 | theta1_grad = Delta1 / m 54 | theta2_grad = Delta2 / m 55 | # Regularization 56 | j = j + (nn_cost_lambda / (2 * m)) * \ 57 | (np.sum(theta1[:, 1:input_layer_size + 1] ** 2) + np.sum(theta2[:, 1:hidden_layer_size + 1] ** 2)) 58 | 59 | theta1_grad += nn_cost_lambda / m * np.append(np.zeros((theta1.shape[0], 1)), theta1[:, 1:], axis=1) 60 | theta2_grad += nn_cost_lambda / m * np.append(np.zeros((theta2.shape[0], 1)), theta2[:, 1:], axis=1) 61 | # Unroll gradients 62 | grad = np.append(np.ravel(theta1_grad, order='F'), np.ravel(theta2_grad, order='F')) 63 | return j, grad 64 | -------------------------------------------------------------------------------- /ex4 Neural Networks Learning/predict_NN.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | from sigmoid import sigmoid 6 | 7 | 8 | def predict(theta1, theta2, x): 9 | m = x.shape[0] 10 | p = np.zeros((m, 1)) 11 | x = np.append(np.ones((m, 1)), x, axis=1) 12 | h1 = sigmoid(np.dot(x, theta1.T)) 13 | h1 = np.append(np.ones((m, 1)), h1, axis=1) 14 | h2 = sigmoid(np.dot(h1, theta2.T)) 15 | for i in range(m): 16 | p[i] = np.argmax(h2[i, :]) 17 | return p + 1 18 | -------------------------------------------------------------------------------- /ex4 Neural Networks Learning/randInitializeWeights.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | 7 | 8 | def rand_initialize_weights(layer_in, layer_out): 9 | epsilon_init = np.sqrt(6 / (layer_in + layer_out)) 10 | return np.random.rand(layer_out, layer_in + 1) * 2 * epsilon_init - epsilon_init 11 | -------------------------------------------------------------------------------- /ex4 Neural Networks Learning/sigmoid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | # import numpy as np 5 | from scipy.special import expit 6 | 7 | 8 | def sigmoid(x): 9 | # return 1 / (1 + np.exp(-1 * x)) 10 | return expit(x) 11 | -------------------------------------------------------------------------------- /ex4 Neural Networks Learning/sigmoidGradient.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | from sigmoid import sigmoid 5 | 6 | 7 | def sigmoid_gradient(z): 8 | return sigmoid(z) * (1 - sigmoid(z)) 9 | -------------------------------------------------------------------------------- /ex5 Regularized Linear Regression and Bias v.s Variance/ex5.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | from scipy.io import loadmat 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from linearRegCostFunction import linear_reg_cost_function 8 | from trainLinearReg import train_linear_reg 9 | from learningCurve import learning_curve 10 | from polyFeatures import poly_features 11 | from featureNormalize import feature_normalize 12 | from plotFit import plot_fit 13 | from validationCurve import validation_curve 14 | 15 | 16 | def pause_func(): 17 | print('Program paused. Press enter to continue.\n') 18 | while input() != '': 19 | pass 20 | 21 | 22 | def load_data(filename): 23 | return loadmat(filename) 24 | 25 | 26 | # Exercise 5 | Regularized Linear Regression and Bias-Variance 27 | if __name__ == '__main__': 28 | 29 | # =========== Part 1: Loading and Visualizing Data ============= 30 | print('Loading and Visualizing Data ...\n') 31 | # Load from ex5data1: 32 | # You will have X, y, Xval, yval, Xtest, ytest in your environment 33 | data = load_data('ex5data1.mat') 34 | X = data['X'] 35 | y = data['y'] 36 | Xval = data['Xval'] 37 | yval = data['yval'] 38 | Xtest = data['Xtest'] 39 | ytest = data['ytest'] 40 | # m = Number of examples 41 | m = X.shape[0] 42 | # Plot training data 43 | plt.ion() 44 | plt.figure() 45 | plt.plot(X, y, 'rx', markersize=10) 46 | plt.xlabel('Change in water level (x)') 47 | plt.ylabel('Water flowing out of the dam (y)') 48 | plt.axis([-60, 40, 0, 40]) 49 | plt.pause(2) 50 | plt.close() 51 | print('Program paused. Press enter to continue.\n') 52 | # pause_func() 53 | 54 | # =========== Part 2: Regularized Linear Regression Cost ============= 55 | theta = np.array([[1], [1]]) 56 | J, grad = linear_reg_cost_function(np.append(np.ones((m, 1)), X, axis=1), y, theta, 1) 57 | 58 | print('Cost at theta = [1 ; 1]: %f \n(this value should be about 303.993192)\n' % J) 59 | print('Program paused. Press enter to continue.\n') 60 | # pause_func() 61 | 62 | # =========== Part 3: Regularized Linear Regression Gradient ============= 63 | print('Gradient at theta = [1 ; 1]: [%f; %f] \n(this value should be about [-15.303016; 598.250744])\n' % 64 | (grad[0], grad[1])) 65 | print('Program paused. Press enter to continue.\n') 66 | 67 | # =========== Part 4: Train Linear Regression ============= 68 | # Write Up Note: The data is non-linear, so this will not give a great fit. 69 | train_lambda = 0 70 | result = train_linear_reg(np.append(np.ones((m, 1)), X, axis=1), y, train_lambda) 71 | plt.figure() 72 | plt.plot(X, y, 'rx', markersize=10) 73 | plt.plot(X, np.dot(np.append(np.ones((m, 1)), X, axis=1), result['x']), '--') 74 | plt.xlabel('Change in water level (x)') 75 | plt.ylabel('Water flowing out of the dam (y)') 76 | plt.axis([-60, 40, -10, 40]) 77 | plt.pause(2) 78 | plt.close() 79 | print('Program paused. Press enter to continue.\n') 80 | # pause_func() 81 | 82 | # =========== Part 5: Learning Curve for Linear Regression ============= 83 | curve_lambda = 0 84 | error_train, error_val = learning_curve(X, y, Xval, yval, curve_lambda) 85 | plt.figure() 86 | plt.plot(np.arange(m), error_train) 87 | plt.plot(np.arange(m), error_val) 88 | plt.title('Learning curve for linear regression') 89 | plt.xlabel('Number of training examples') 90 | plt.ylabel('Error') 91 | plt.legend(['Train', 'Cross Validation']) 92 | 93 | print('# Training Examples\tTrain Error\tCross Validation Error\n') 94 | for i in range(m): 95 | print('\t%d\t\t%f\t%f\n' % (i, error_train[i], error_val[i])) 96 | 97 | plt.pause(2) 98 | plt.close() 99 | print('Program paused. Press enter to continue.\n') 100 | # pause_func() 101 | 102 | # =========== Part 6: Feature Mapping for Polynomial Regression ============= 103 | p = 8 104 | # Map X onto Polynomial Features and Normalize 105 | X_poly = poly_features(X, p) 106 | # Normalize 107 | X_poly, mu, sigma = feature_normalize(X_poly) 108 | # Add Ones 109 | X_poly = np.append(np.ones((X_poly.shape[0], 1)), X_poly, axis=1) 110 | 111 | # Map X_poly_test and normalize (using mu and sigma) 112 | X_poly_test = poly_features(Xtest, p) 113 | for i in range(np.shape(X_poly_test)[0]): 114 | X_poly_test[i] = (X_poly_test[i] - mu) / sigma 115 | X_poly_test = np.append(np.ones((X_poly_test.shape[0], 1)), X_poly_test, axis=1) 116 | 117 | # Map X_poly_val and normalize (using mu and sigma) 118 | X_poly_val = poly_features(Xval, p) 119 | for i in range(np.shape(X_poly_val)[0]): 120 | X_poly_val[i] = (X_poly_val[i] - mu) / sigma 121 | X_poly_val = np.append(np.ones((X_poly_val.shape[0], 1)), X_poly_val, axis=1) 122 | 123 | print('Normalized Training Example 1:\n') 124 | print(X_poly[0, :], '\n') 125 | print('Program paused. Press enter to continue.\n') 126 | # pause_func() 127 | 128 | # =========== Part 7: Learning Curve for Polynomial Regression ============= 129 | # Now, you will get to experiment with polynomial regression with multiple 130 | # values of lambda. The code below runs polynomial regression with 131 | # lambda = 0. You should try running the code with different values of 132 | # lambda to see how the fit and learning curve change. 133 | 134 | lc_pr_lambda = 1 135 | theta = train_linear_reg(X_poly, y, lc_pr_lambda)['x'] 136 | # Plot training data and fit 137 | plt.figure() 138 | plt.plot(X, y, 'rx', markersize=10) 139 | plot_fit(np.min(X), np.max(X), mu, sigma, theta, p) 140 | plt.xlabel('Change in water level (x)') 141 | plt.ylabel('Water flowing out of the dam (y)') 142 | plt.title('Polynomial Regression Fit (lambda = %f)' % lc_pr_lambda) 143 | plt.pause(2) 144 | plt.close() 145 | 146 | error_train, error_val = learning_curve(X_poly, y, X_poly_val, yval, lc_pr_lambda) 147 | plt.figure() 148 | plt.plot(np.arange(m), error_train) 149 | plt.plot(np.arange(m), error_val) 150 | plt.title('Polynomial Regression Learning Curve (lambda = %f)' % lc_pr_lambda) 151 | plt.xlabel('Number of training examples') 152 | plt.ylabel('Error') 153 | plt.legend(['Train', 'Cross Validation']) 154 | 155 | print('Polynomial Regression (lambda = %f)\n\n' % lc_pr_lambda) 156 | print('# Training Examples\tTrain Error\tCross Validation Error\n') 157 | for i in range(m): 158 | print('\t%d\t\t%f\t%f\n' % (i, error_train[i], error_val[i])) 159 | 160 | plt.pause(2) 161 | plt.close() 162 | print('Program paused. Press enter to continue.\n') 163 | # pause_func 164 | 165 | # =========== Part 8: Validation for Selecting Lambda ============= 166 | lambda_vec, error_train, error_val = validation_curve(X_poly, y, X_poly_val, yval) 167 | 168 | plt.figure() 169 | plt.plot(lambda_vec, error_train) 170 | plt.plot(lambda_vec, error_val) 171 | plt.legend(['Train', 'Cross Validation']) 172 | plt.xlabel('lambda') 173 | plt.ylabel('Error') 174 | 175 | print('lambda\t\tTrain Error\tValidation Error\n') 176 | for i in range(len(lambda_vec)): 177 | print(' %f\t%f\t%f\n' % (lambda_vec[i], error_train[i], error_val[i])) 178 | 179 | plt.pause(2) 180 | plt.close() 181 | print('Program paused. Press enter to continue.\n') 182 | # pause_func 183 | -------------------------------------------------------------------------------- /ex5 Regularized Linear Regression and Bias v.s Variance/ex5data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex5 Regularized Linear Regression and Bias v.s Variance/ex5data1.mat -------------------------------------------------------------------------------- /ex5 Regularized Linear Regression and Bias v.s Variance/featureNormalize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def feature_normalize(x): 8 | x_norm = np.zeros(x.shape) 9 | mu = np.mean(x, axis=0) 10 | sigma = np.std(x, axis=0, ddof=1) 11 | for i in range(np.shape(x)[0]): 12 | x_norm[i] = (x[i] - mu) / sigma 13 | return x_norm, mu, sigma 14 | -------------------------------------------------------------------------------- /ex5 Regularized Linear Regression and Bias v.s Variance/learningCurve.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | from linearRegCostFunction import linear_reg_cost_function 6 | from trainLinearReg import train_linear_reg 7 | 8 | 9 | def learning_curve(x, y, xval, yval, curve_lambda): 10 | m = x.shape[0] 11 | m_xval = xval.shape[0] 12 | 13 | error_train = np.zeros((m, 1)) 14 | error_val = np.zeros((m, 1)) 15 | 16 | x = np.append(np.ones((m, 1)), x, axis=1) 17 | xval = np.append(np.ones((m_xval, 1)), xval, axis=1) 18 | for i in range(m): 19 | # compute parameter theta 20 | result = train_linear_reg(x[0:i + 1], y[0:i + 1], curve_lambda) 21 | theta = result['x'] 22 | 23 | # compute training error 24 | error_train[i] = linear_reg_cost_function(x[0:i + 1], y[0:i + 1], theta, 0)[0] 25 | 26 | # compute cross validation error 27 | error_val[i] = linear_reg_cost_function(xval, yval, theta, 0)[0] 28 | return error_train, error_val 29 | -------------------------------------------------------------------------------- /ex5 Regularized Linear Regression and Bias v.s Variance/linearRegCostFunction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def linear_reg_cost_function(x, y, theta, linear_lambda): 8 | theta = np.reshape(theta, (theta.shape[0], 1)) 9 | m = x.shape[0] 10 | h = np.dot(x, theta) 11 | j_without_regularization = (1 / (2 * m)) * np.sum((h - y) ** 2) 12 | j = j_without_regularization + (linear_lambda / (2 * m)) * np.sum(theta[1:] ** 2) 13 | grad_without_regularization = (np.dot(x.T, h - y) / m) 14 | grad = grad_without_regularization + (linear_lambda / m) * theta 15 | grad[0] = grad_without_regularization[0] 16 | return j, grad 17 | -------------------------------------------------------------------------------- /ex5 Regularized Linear Regression and Bias v.s Variance/plotFit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from polyFeatures import poly_features 7 | 8 | 9 | def plot_fit(min_x, max_x, mu, sigma, theta, p): 10 | x = np.arange(min_x - 15, max_x + 25, 0.05) 11 | x_poly = poly_features(x, p) 12 | for i in range(np.shape(x_poly)[0]): 13 | x_poly[i] = (x_poly[i] - mu) / sigma 14 | x_poly = np.append(np.ones((x_poly.shape[0], 1)), x_poly, axis=1) 15 | plt.plot(x, np.dot(x_poly, theta), '--') 16 | -------------------------------------------------------------------------------- /ex5 Regularized Linear Regression and Bias v.s Variance/polyFeatures.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def poly_features(x, p): 8 | m = x.shape[0] 9 | x_poly = np.zeros((m, p)) 10 | for i in range(p): 11 | x_poly[:, i] = (x ** (i + 1)).reshape(m, ) 12 | return x_poly 13 | -------------------------------------------------------------------------------- /ex5 Regularized Linear Regression and Bias v.s Variance/trainLinearReg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | from linearRegCostFunction import linear_reg_cost_function 6 | import scipy.optimize as sciopt 7 | 8 | grad = np.arange(0) 9 | 10 | 11 | def train_linear_reg(x, y, train_lambda): 12 | initial_theta = np.zeros((x.shape[1], 1)) 13 | global grad 14 | grad = np.zeros((x.shape[1], 1)) 15 | return sciopt.minimize(fun=cost_function, x0=initial_theta, args=(x, y, train_lambda), method="TNC", jac=gradient) 16 | 17 | 18 | def cost_function(theta, x, y, cf_lambad): 19 | j, new_grad = linear_reg_cost_function(x, y, theta, cf_lambad) 20 | global grad 21 | grad = new_grad 22 | return j 23 | 24 | 25 | def gradient(*args): 26 | global grad 27 | return grad.flatten() 28 | -------------------------------------------------------------------------------- /ex5 Regularized Linear Regression and Bias v.s Variance/validationCurve.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | from trainLinearReg import train_linear_reg 6 | from linearRegCostFunction import linear_reg_cost_function 7 | 8 | 9 | def validation_curve(x, y, xval, yval): 10 | # Selected values of lambda (you should not change this) 11 | lambda_vec = np.array(([0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10])) 12 | len_of_vec = len(lambda_vec) 13 | # You need to return these variables correctly. 14 | error_train = np.zeros((len_of_vec, 1)) 15 | error_val = np.zeros((len_of_vec, 1)) 16 | 17 | for i in range(len_of_vec): 18 | lambda_temp = lambda_vec[i] 19 | 20 | # compute parameter theta (learning) 21 | result = train_linear_reg(x, y, lambda_temp) 22 | theta = result['x'] 23 | 24 | # compute training error 25 | j, grad = linear_reg_cost_function(x, y, theta, 0) 26 | error_train[i] = j 27 | 28 | # compute cross validation error 29 | j, grad = linear_reg_cost_function(xval, yval, theta, 0) 30 | error_val[i] = j 31 | return lambda_vec, error_train, error_val 32 | -------------------------------------------------------------------------------- /ex6 Support Vector Machines/Spam Classification/emailFeatures.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | 7 | 8 | def email_features(word_indices): 9 | # Total number of words in the dictionary 10 | n = 1899 11 | 12 | # You need to return the following variables correctly. 13 | x = np.zeros((n, 1)) 14 | for i in range(word_indices.size): 15 | x[word_indices[i]] = 1 16 | return x 17 | -------------------------------------------------------------------------------- /ex6 Support Vector Machines/Spam Classification/ex6_spam.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | from scipy.io import loadmat 7 | from sklearn.svm import SVC 8 | 9 | from processEmail import process_email 10 | from emailFeatures import email_features 11 | from getVocabList import get_vocab_list 12 | 13 | 14 | def pause_func(): 15 | while input() != '': 16 | pass 17 | 18 | 19 | def load_mat_file(_filename): 20 | return loadmat(_filename) 21 | 22 | 23 | def predict_email_spam(_filename): 24 | _file_contents = open('../data/' + _filename).read() 25 | _word_indices = process_email(_file_contents) 26 | _features = email_features(_word_indices) 27 | _p = Classification.predict(_features.T) 28 | print('\nProcessed %s\n\nSpam Classification: %d\n' % (_filename, _p[0])) 29 | print('(1 indicates spam, 0 indicates not spam)\n\n') 30 | 31 | 32 | if __name__ == '__main__': 33 | # ==================== Part 1: Email Preprocessing ==================== 34 | # To use an SVM to classify emails into Spam v.s. Non-Spam, you first need 35 | # to convert each email into a vector of features. In this part, you will 36 | # implement the preprocessing steps for each email. You should 37 | # complete the code in processEmail.m to produce a word indices vector 38 | # for a given email. 39 | 40 | print('\nPreprocessing sample email (emailSample1.txt)\n') 41 | # Extract Features 42 | file_contents = open('../data/emailSample1.txt').read() 43 | word_indices = process_email(file_contents) 44 | # Print Stats 45 | print('Word Indices:\n') 46 | print_index = 0 47 | for print_value in word_indices: 48 | print_index += 1 49 | print("%4d" % print_value, end=' ') 50 | if print_index % 10 == 0: 51 | print('\n') 52 | print('\n') 53 | print('Program paused. Press enter to continue.\n') 54 | # pause_func() 55 | 56 | # ==================== Part 2: Feature Extraction ==================== 57 | # Now, you will convert each email into a vector of features in R^n. 58 | # You should complete the code in emailFeatures.m to produce a feature 59 | # vector for a given email. 60 | 61 | print('\nExtracting features from sample email (emailSample1.txt)\n') 62 | # Extract Features 63 | features = email_features(word_indices) 64 | # Print Stats 65 | print('Length of feature vector: %d\n' % features.size) 66 | print('Number of non-zero entries: %d\n' % np.sum((features > 0).astype(np.int32))) 67 | print('Program paused. Press enter to continue.\n') 68 | # pause_func() 69 | 70 | # =========== Part 3: Train Linear SVM for Spam Classification ======== 71 | # In this section, you will train a linear classifier to determine if an 72 | # email is Spam or Not-Spam. 73 | 74 | # Load the Spam Email dataset 75 | data = load_mat_file('../data/spamTrain.mat') 76 | print('\nTraining Linear SVM (Spam Classification)\n') 77 | print('(this may take 1 to 2 minutes) ...\n') 78 | X = data['X'] 79 | y = data['y'].ravel() 80 | C = 0.1 81 | Classification = SVC(C=C, kernel='linear') 82 | Classification.fit(X, y) 83 | p = Classification.predict(X) 84 | print('Training Accuracy: {:.2f}\n'.format((np.mean((p == y)) * 100))) 85 | print('Program paused. Press enter to continue.\n') 86 | # pause_func() 87 | 88 | # =================== Part 4: Test Spam Classification ================ 89 | data = load_mat_file('../data/spamTest.mat') 90 | Xtest = data['Xtest'] 91 | ytest = data['ytest'].ravel() 92 | p = Classification.predict(Xtest) 93 | print('Test Accuracy: {:.2f}\n'.format((np.mean((p == ytest)) * 100))) 94 | print('Program paused. Press enter to continue.\n') 95 | # pause_func() 96 | 97 | # ================= Part 5: Top Predictors of Spam ==================== 98 | index_array = np.argsort(Classification.coef_).ravel()[::-1] 99 | vocab_list = get_vocab_list()[:, 1] 100 | for i in range(15): 101 | print(' %-15s (%f) \n' % (vocab_list[index_array[i]], Classification.coef_[:, index_array[i]])) 102 | 103 | print('Program paused. Press enter to continue.\n') 104 | # pause_func() 105 | 106 | # =================== Part 6: Try Your Own Emails ===================== 107 | filename = 'emailSample1.txt' 108 | predict_email_spam(filename) 109 | 110 | filename = 'spamSample1.txt' 111 | predict_email_spam(filename) 112 | 113 | filename = 'emailSample2.txt' 114 | predict_email_spam(filename) 115 | 116 | filename = 'spamSample2.txt' 117 | predict_email_spam(filename) 118 | -------------------------------------------------------------------------------- /ex6 Support Vector Machines/Spam Classification/getVocabList.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | 7 | 8 | def get_vocab_list(): 9 | return np.loadtxt('../data/vocab.txt', dtype=str) 10 | -------------------------------------------------------------------------------- /ex6 Support Vector Machines/Spam Classification/processEmail.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | import re 7 | from getVocabList import get_vocab_list 8 | from nltk.stem import PorterStemmer 9 | 10 | 11 | def process_email(email_contents): 12 | # Load Vocabulary 13 | vocab_list = get_vocab_list()[:, 1] 14 | word_indices = [] 15 | # ========================== Preprocess Email =========================== 16 | # Lower case 17 | email_contents = str(email_contents) 18 | email_contents = email_contents.lower() 19 | 20 | # Strip all HTML 21 | # Looks for any expression that starts with < and ends with > and replace 22 | # and does not have any < or > in the tag it with a space 23 | email_contents = re.sub(r'<[^<>]+>', ' ', email_contents) 24 | 25 | # Handle Numbers 26 | # Look for one or more characters between 0-9 27 | email_contents = re.sub(r'[0-9]+', 'number', email_contents) 28 | 29 | # Handle URLS 30 | # Look for strings starting with http:// or https:// 31 | email_contents = re.sub(r'(http|https)://[^\s]*', 'httpaddr', email_contents) 32 | 33 | # Handle Email Addresses 34 | # Look for strings with @ in the middle 35 | email_contents = re.sub(r'[^\s]+@[^\s]+', 'emailaddr', email_contents) 36 | 37 | # Handle $ sign 38 | email_contents = re.sub(r'[$]+', 'dollar', email_contents) 39 | 40 | # ========================== Tokenize Email =========================== 41 | # Output the email to screen as well 42 | print('\n==== Processed Email ====\n\n') 43 | 44 | # Process file 45 | l_count = 0 46 | 47 | # Tokenize and also get rid of any punctuation 48 | partition_text = re.split(r'[ @$/#.-:&*+=\[\]?!(){},\'\">_<;%\n\f]', email_contents) 49 | 50 | stemmer = PorterStemmer() 51 | 52 | for one_word in partition_text: 53 | if one_word != '': 54 | # Remove any non alphanumeric characters 55 | one_word = re.sub(r'[^a-zA-Z0-9]', '', one_word) 56 | 57 | # Stem the word 58 | # (the porterStemmer sometimes has issues, so we use a try catch block) 59 | one_word = stemmer.stem(one_word) 60 | 61 | # % Skip the word if it is too short. 62 | if str == '': 63 | continue 64 | 65 | temp = np.argwhere(vocab_list == one_word) 66 | if temp.size == 1: 67 | word_indices.append(temp.min()) 68 | 69 | # % Print to screen, ensuring that the output lines are not too long 70 | if (l_count + len(one_word) + 1) > 78: 71 | print('\n') 72 | l_count = 0 73 | print('%s' % one_word, end=' ') 74 | l_count = l_count + len(one_word) + 1 75 | print('\n') 76 | # Print footer 77 | print('\n\n=========================\n') 78 | return np.array(word_indices) 79 | -------------------------------------------------------------------------------- /ex6 Support Vector Machines/data/emailSample1.txt: -------------------------------------------------------------------------------- 1 | > Anyone knows how much it costs to host a web portal ? 2 | > 3 | Well, it depends on how many visitors you're expecting. 4 | This can be anywhere from less than 10 bucks a month to a couple of $100. 5 | You should checkout http://www.rackspace.com/ or perhaps Amazon EC2 6 | if youre running something big.. 7 | 8 | To unsubscribe yourself from this mailing list, send an email to: 9 | groupname-unsubscribe@egroups.com 10 | 11 | -------------------------------------------------------------------------------- /ex6 Support Vector Machines/data/emailSample2.txt: -------------------------------------------------------------------------------- 1 | Folks, 2 | 3 | my first time posting - have a bit of Unix experience, but am new to Linux. 4 | 5 | 6 | Just got a new PC at home - Dell box with Windows XP. Added a second hard disk 7 | for Linux. Partitioned the disk and have installed Suse 7.2 from CD, which went 8 | fine except it didn't pick up my monitor. 9 | 10 | I have a Dell branded E151FPp 15" LCD flat panel monitor and a nVidia GeForce4 11 | Ti4200 video card, both of which are probably too new to feature in Suse's default 12 | set. I downloaded a driver from the nVidia website and installed it using RPM. 13 | Then I ran Sax2 (as was recommended in some postings I found on the net), but 14 | it still doesn't feature my video card in the available list. What next? 15 | 16 | Another problem. I have a Dell branded keyboard and if I hit Caps-Lock twice, 17 | the whole machine crashes (in Linux, not Windows) - even the on/off switch is 18 | inactive, leaving me to reach for the power cable instead. 19 | 20 | If anyone can help me in any way with these probs., I'd be really grateful - 21 | I've searched the 'net but have run out of ideas. 22 | 23 | Or should I be going for a different version of Linux such as RedHat? Opinions 24 | welcome. 25 | 26 | Thanks a lot, 27 | Peter 28 | 29 | -- 30 | Irish Linux Users' Group: ilug@linux.ie 31 | http://www.linux.ie/mailman/listinfo/ilug for (un)subscription information. 32 | List maintainer: listmaster@linux.ie 33 | 34 | 35 | -------------------------------------------------------------------------------- /ex6 Support Vector Machines/data/ex6data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex6 Support Vector Machines/data/ex6data1.mat -------------------------------------------------------------------------------- /ex6 Support Vector Machines/data/ex6data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex6 Support Vector Machines/data/ex6data2.mat -------------------------------------------------------------------------------- /ex6 Support Vector Machines/data/ex6data3.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex6 Support Vector Machines/data/ex6data3.mat -------------------------------------------------------------------------------- /ex6 Support Vector Machines/data/spamSample1.txt: -------------------------------------------------------------------------------- 1 | Do You Want To Make $1000 Or More Per Week? 2 | 3 | 4 | 5 | If you are a motivated and qualified individual - I 6 | will personally demonstrate to you a system that will 7 | make you $1,000 per week or more! This is NOT mlm. 8 | 9 | 10 | 11 | Call our 24 hour pre-recorded number to get the 12 | details. 13 | 14 | 15 | 16 | 000-456-789 17 | 18 | 19 | 20 | I need people who want to make serious money. Make 21 | the call and get the facts. 22 | 23 | Invest 2 minutes in yourself now! 24 | 25 | 26 | 27 | 000-456-789 28 | 29 | 30 | 31 | Looking forward to your call and I will introduce you 32 | to people like yourself who 33 | are currently making $10,000 plus per week! 34 | 35 | 36 | 37 | 000-456-789 38 | 39 | 40 | 41 | 3484lJGv6-241lEaN9080lRmS6-271WxHo7524qiyT5-438rjUv5615hQcf0-662eiDB9057dMtVl72 42 | 43 | -------------------------------------------------------------------------------- /ex6 Support Vector Machines/data/spamSample2.txt: -------------------------------------------------------------------------------- 1 | Best Buy Viagra Generic Online 2 | 3 | Viagra 100mg x 60 Pills $125, Free Pills & Reorder Discount, Top Selling 100% Quality & Satisfaction guaranteed! 4 | 5 | We accept VISA, Master & E-Check Payments, 90000+ Satisfied Customers! 6 | http://medphysitcstech.ru 7 | 8 | 9 | -------------------------------------------------------------------------------- /ex6 Support Vector Machines/data/spamTest.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex6 Support Vector Machines/data/spamTest.mat -------------------------------------------------------------------------------- /ex6 Support Vector Machines/data/spamTrain.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex6 Support Vector Machines/data/spamTrain.mat -------------------------------------------------------------------------------- /ex6 Support Vector Machines/data/vocab.txt: -------------------------------------------------------------------------------- 1 | 1 aa 2 | 2 ab 3 | 3 abil 4 | 4 abl 5 | 5 about 6 | 6 abov 7 | 7 absolut 8 | 8 abus 9 | 9 ac 10 | 10 accept 11 | 11 access 12 | 12 accord 13 | 13 account 14 | 14 achiev 15 | 15 acquir 16 | 16 across 17 | 17 act 18 | 18 action 19 | 19 activ 20 | 20 actual 21 | 21 ad 22 | 22 adam 23 | 23 add 24 | 24 addit 25 | 25 address 26 | 26 administr 27 | 27 adult 28 | 28 advanc 29 | 29 advantag 30 | 30 advertis 31 | 31 advic 32 | 32 advis 33 | 33 ae 34 | 34 af 35 | 35 affect 36 | 36 affili 37 | 37 afford 38 | 38 africa 39 | 39 after 40 | 40 ag 41 | 41 again 42 | 42 against 43 | 43 agenc 44 | 44 agent 45 | 45 ago 46 | 46 agre 47 | 47 agreement 48 | 48 aid 49 | 49 air 50 | 50 al 51 | 51 alb 52 | 52 align 53 | 53 all 54 | 54 allow 55 | 55 almost 56 | 56 alon 57 | 57 along 58 | 58 alreadi 59 | 59 alsa 60 | 60 also 61 | 61 altern 62 | 62 although 63 | 63 alwai 64 | 64 am 65 | 65 amaz 66 | 66 america 67 | 67 american 68 | 68 among 69 | 69 amount 70 | 70 amp 71 | 71 an 72 | 72 analysi 73 | 73 analyst 74 | 74 and 75 | 75 ani 76 | 76 anim 77 | 77 announc 78 | 78 annual 79 | 79 annuiti 80 | 80 anoth 81 | 81 answer 82 | 82 anti 83 | 83 anumb 84 | 84 anybodi 85 | 85 anymor 86 | 86 anyon 87 | 87 anyth 88 | 88 anywai 89 | 89 anywher 90 | 90 aol 91 | 91 ap 92 | 92 apolog 93 | 93 app 94 | 94 appar 95 | 95 appear 96 | 96 appl 97 | 97 appli 98 | 98 applic 99 | 99 appreci 100 | 100 approach 101 | 101 approv 102 | 102 apt 103 | 103 ar 104 | 104 archiv 105 | 105 area 106 | 106 aren 107 | 107 argument 108 | 108 arial 109 | 109 arm 110 | 110 around 111 | 111 arrai 112 | 112 arriv 113 | 113 art 114 | 114 articl 115 | 115 artist 116 | 116 as 117 | 117 ascii 118 | 118 ask 119 | 119 asset 120 | 120 assist 121 | 121 associ 122 | 122 assum 123 | 123 assur 124 | 124 at 125 | 125 atol 126 | 126 attach 127 | 127 attack 128 | 128 attempt 129 | 129 attent 130 | 130 attornei 131 | 131 attract 132 | 132 audio 133 | 133 aug 134 | 134 august 135 | 135 author 136 | 136 auto 137 | 137 autom 138 | 138 automat 139 | 139 avail 140 | 140 averag 141 | 141 avoid 142 | 142 awai 143 | 143 awar 144 | 144 award 145 | 145 ba 146 | 146 babi 147 | 147 back 148 | 148 background 149 | 149 backup 150 | 150 bad 151 | 151 balanc 152 | 152 ban 153 | 153 bank 154 | 154 bar 155 | 155 base 156 | 156 basenumb 157 | 157 basi 158 | 158 basic 159 | 159 bb 160 | 160 bc 161 | 161 bd 162 | 162 be 163 | 163 beat 164 | 164 beberg 165 | 165 becaus 166 | 166 becom 167 | 167 been 168 | 168 befor 169 | 169 begin 170 | 170 behalf 171 | 171 behavior 172 | 172 behind 173 | 173 believ 174 | 174 below 175 | 175 benefit 176 | 176 best 177 | 177 beta 178 | 178 better 179 | 179 between 180 | 180 bf 181 | 181 big 182 | 182 bill 183 | 183 billion 184 | 184 bin 185 | 185 binari 186 | 186 bit 187 | 187 black 188 | 188 blank 189 | 189 block 190 | 190 blog 191 | 191 blood 192 | 192 blue 193 | 193 bnumber 194 | 194 board 195 | 195 bodi 196 | 196 boi 197 | 197 bonu 198 | 198 book 199 | 199 boot 200 | 200 border 201 | 201 boss 202 | 202 boston 203 | 203 botan 204 | 204 both 205 | 205 bottl 206 | 206 bottom 207 | 207 boundari 208 | 208 box 209 | 209 brain 210 | 210 brand 211 | 211 break 212 | 212 brian 213 | 213 bring 214 | 214 broadcast 215 | 215 broker 216 | 216 browser 217 | 217 bug 218 | 218 bui 219 | 219 build 220 | 220 built 221 | 221 bulk 222 | 222 burn 223 | 223 bush 224 | 224 busi 225 | 225 but 226 | 226 button 227 | 227 by 228 | 228 byte 229 | 229 ca 230 | 230 cabl 231 | 231 cach 232 | 232 calcul 233 | 233 california 234 | 234 call 235 | 235 came 236 | 236 camera 237 | 237 campaign 238 | 238 can 239 | 239 canada 240 | 240 cannot 241 | 241 canon 242 | 242 capabl 243 | 243 capillari 244 | 244 capit 245 | 245 car 246 | 246 card 247 | 247 care 248 | 248 career 249 | 249 carri 250 | 250 cartridg 251 | 251 case 252 | 252 cash 253 | 253 cat 254 | 254 catch 255 | 255 categori 256 | 256 caus 257 | 257 cb 258 | 258 cc 259 | 259 cd 260 | 260 ce 261 | 261 cell 262 | 262 cent 263 | 263 center 264 | 264 central 265 | 265 centuri 266 | 266 ceo 267 | 267 certain 268 | 268 certainli 269 | 269 cf 270 | 270 challeng 271 | 271 chanc 272 | 272 chang 273 | 273 channel 274 | 274 char 275 | 275 charact 276 | 276 charg 277 | 277 charset 278 | 278 chat 279 | 279 cheap 280 | 280 check 281 | 281 cheer 282 | 282 chief 283 | 283 children 284 | 284 china 285 | 285 chip 286 | 286 choic 287 | 287 choos 288 | 288 chri 289 | 289 citi 290 | 290 citizen 291 | 291 civil 292 | 292 claim 293 | 293 class 294 | 294 classifi 295 | 295 clean 296 | 296 clear 297 | 297 clearli 298 | 298 click 299 | 299 client 300 | 300 close 301 | 301 clue 302 | 302 cnet 303 | 303 cnumber 304 | 304 co 305 | 305 code 306 | 306 collect 307 | 307 colleg 308 | 308 color 309 | 309 com 310 | 310 combin 311 | 311 come 312 | 312 comfort 313 | 313 command 314 | 314 comment 315 | 315 commentari 316 | 316 commerci 317 | 317 commiss 318 | 318 commit 319 | 319 common 320 | 320 commun 321 | 321 compani 322 | 322 compar 323 | 323 comparison 324 | 324 compat 325 | 325 compet 326 | 326 competit 327 | 327 compil 328 | 328 complet 329 | 329 comprehens 330 | 330 comput 331 | 331 concentr 332 | 332 concept 333 | 333 concern 334 | 334 condit 335 | 335 conf 336 | 336 confer 337 | 337 confid 338 | 338 confidenti 339 | 339 config 340 | 340 configur 341 | 341 confirm 342 | 342 conflict 343 | 343 confus 344 | 344 congress 345 | 345 connect 346 | 346 consid 347 | 347 consolid 348 | 348 constitut 349 | 349 construct 350 | 350 consult 351 | 351 consum 352 | 352 contact 353 | 353 contain 354 | 354 content 355 | 355 continu 356 | 356 contract 357 | 357 contribut 358 | 358 control 359 | 359 conveni 360 | 360 convers 361 | 361 convert 362 | 362 cool 363 | 363 cooper 364 | 364 copi 365 | 365 copyright 366 | 366 core 367 | 367 corpor 368 | 368 correct 369 | 369 correspond 370 | 370 cost 371 | 371 could 372 | 372 couldn 373 | 373 count 374 | 374 countri 375 | 375 coupl 376 | 376 cours 377 | 377 court 378 | 378 cover 379 | 379 coverag 380 | 380 crash 381 | 381 creat 382 | 382 creativ 383 | 383 credit 384 | 384 critic 385 | 385 cross 386 | 386 cultur 387 | 387 current 388 | 388 custom 389 | 389 cut 390 | 390 cv 391 | 391 da 392 | 392 dagga 393 | 393 dai 394 | 394 daili 395 | 395 dan 396 | 396 danger 397 | 397 dark 398 | 398 data 399 | 399 databas 400 | 400 datapow 401 | 401 date 402 | 402 dave 403 | 403 david 404 | 404 dc 405 | 405 de 406 | 406 dead 407 | 407 deal 408 | 408 dear 409 | 409 death 410 | 410 debt 411 | 411 decad 412 | 412 decid 413 | 413 decis 414 | 414 declar 415 | 415 declin 416 | 416 decor 417 | 417 default 418 | 418 defend 419 | 419 defens 420 | 420 defin 421 | 421 definit 422 | 422 degre 423 | 423 delai 424 | 424 delet 425 | 425 deliv 426 | 426 deliveri 427 | 427 dell 428 | 428 demand 429 | 429 democrat 430 | 430 depart 431 | 431 depend 432 | 432 deposit 433 | 433 describ 434 | 434 descript 435 | 435 deserv 436 | 436 design 437 | 437 desir 438 | 438 desktop 439 | 439 despit 440 | 440 detail 441 | 441 detect 442 | 442 determin 443 | 443 dev 444 | 444 devel 445 | 445 develop 446 | 446 devic 447 | 447 di 448 | 448 dial 449 | 449 did 450 | 450 didn 451 | 451 diet 452 | 452 differ 453 | 453 difficult 454 | 454 digit 455 | 455 direct 456 | 456 directli 457 | 457 director 458 | 458 directori 459 | 459 disabl 460 | 460 discount 461 | 461 discov 462 | 462 discoveri 463 | 463 discuss 464 | 464 disk 465 | 465 displai 466 | 466 disposit 467 | 467 distanc 468 | 468 distribut 469 | 469 dn 470 | 470 dnumber 471 | 471 do 472 | 472 doc 473 | 473 document 474 | 474 doe 475 | 475 doer 476 | 476 doesn 477 | 477 dollar 478 | 478 dollarac 479 | 479 dollarnumb 480 | 480 domain 481 | 481 don 482 | 482 done 483 | 483 dont 484 | 484 doubl 485 | 485 doubt 486 | 486 down 487 | 487 download 488 | 488 dr 489 | 489 draw 490 | 490 dream 491 | 491 drive 492 | 492 driver 493 | 493 drop 494 | 494 drug 495 | 495 due 496 | 496 dure 497 | 497 dvd 498 | 498 dw 499 | 499 dynam 500 | 500 ea 501 | 501 each 502 | 502 earli 503 | 503 earlier 504 | 504 earn 505 | 505 earth 506 | 506 easi 507 | 507 easier 508 | 508 easili 509 | 509 eat 510 | 510 eb 511 | 511 ebai 512 | 512 ec 513 | 513 echo 514 | 514 econom 515 | 515 economi 516 | 516 ed 517 | 517 edg 518 | 518 edit 519 | 519 editor 520 | 520 educ 521 | 521 eff 522 | 522 effect 523 | 523 effici 524 | 524 effort 525 | 525 either 526 | 526 el 527 | 527 electron 528 | 528 elimin 529 | 529 els 530 | 530 email 531 | 531 emailaddr 532 | 532 emerg 533 | 533 empir 534 | 534 employ 535 | 535 employe 536 | 536 en 537 | 537 enabl 538 | 538 encod 539 | 539 encourag 540 | 540 end 541 | 541 enemi 542 | 542 enenkio 543 | 543 energi 544 | 544 engin 545 | 545 english 546 | 546 enhanc 547 | 547 enjoi 548 | 548 enough 549 | 549 ensur 550 | 550 enter 551 | 551 enterpris 552 | 552 entertain 553 | 553 entir 554 | 554 entri 555 | 555 enumb 556 | 556 environ 557 | 557 equal 558 | 558 equip 559 | 559 equival 560 | 560 error 561 | 561 especi 562 | 562 essenti 563 | 563 establish 564 | 564 estat 565 | 565 estim 566 | 566 et 567 | 567 etc 568 | 568 euro 569 | 569 europ 570 | 570 european 571 | 571 even 572 | 572 event 573 | 573 eventu 574 | 574 ever 575 | 575 everi 576 | 576 everyon 577 | 577 everyth 578 | 578 evid 579 | 579 evil 580 | 580 exactli 581 | 581 exampl 582 | 582 excel 583 | 583 except 584 | 584 exchang 585 | 585 excit 586 | 586 exclus 587 | 587 execut 588 | 588 exercis 589 | 589 exist 590 | 590 exmh 591 | 591 expand 592 | 592 expect 593 | 593 expens 594 | 594 experi 595 | 595 expert 596 | 596 expir 597 | 597 explain 598 | 598 explor 599 | 599 express 600 | 600 extend 601 | 601 extens 602 | 602 extra 603 | 603 extract 604 | 604 extrem 605 | 605 ey 606 | 606 fa 607 | 607 face 608 | 608 fact 609 | 609 factor 610 | 610 fail 611 | 611 fair 612 | 612 fall 613 | 613 fals 614 | 614 famili 615 | 615 faq 616 | 616 far 617 | 617 fast 618 | 618 faster 619 | 619 fastest 620 | 620 fat 621 | 621 father 622 | 622 favorit 623 | 623 fax 624 | 624 fb 625 | 625 fd 626 | 626 featur 627 | 627 feder 628 | 628 fee 629 | 629 feed 630 | 630 feedback 631 | 631 feel 632 | 632 femal 633 | 633 few 634 | 634 ffffff 635 | 635 ffnumber 636 | 636 field 637 | 637 fight 638 | 638 figur 639 | 639 file 640 | 640 fill 641 | 641 film 642 | 642 filter 643 | 643 final 644 | 644 financ 645 | 645 financi 646 | 646 find 647 | 647 fine 648 | 648 finish 649 | 649 fire 650 | 650 firewal 651 | 651 firm 652 | 652 first 653 | 653 fit 654 | 654 five 655 | 655 fix 656 | 656 flag 657 | 657 flash 658 | 658 flow 659 | 659 fnumber 660 | 660 focu 661 | 661 folder 662 | 662 folk 663 | 663 follow 664 | 664 font 665 | 665 food 666 | 666 for 667 | 667 forc 668 | 668 foreign 669 | 669 forev 670 | 670 forget 671 | 671 fork 672 | 672 form 673 | 673 format 674 | 674 former 675 | 675 fortun 676 | 676 forward 677 | 677 found 678 | 678 foundat 679 | 679 four 680 | 680 franc 681 | 681 free 682 | 682 freedom 683 | 683 french 684 | 684 freshrpm 685 | 685 fri 686 | 686 fridai 687 | 687 friend 688 | 688 from 689 | 689 front 690 | 690 ftoc 691 | 691 ftp 692 | 692 full 693 | 693 fulli 694 | 694 fun 695 | 695 function 696 | 696 fund 697 | 697 further 698 | 698 futur 699 | 699 ga 700 | 700 gain 701 | 701 game 702 | 702 gari 703 | 703 garrigu 704 | 704 gave 705 | 705 gcc 706 | 706 geek 707 | 707 gener 708 | 708 get 709 | 709 gif 710 | 710 gift 711 | 711 girl 712 | 712 give 713 | 713 given 714 | 714 global 715 | 715 gnome 716 | 716 gnu 717 | 717 gnupg 718 | 718 go 719 | 719 goal 720 | 720 god 721 | 721 goe 722 | 722 gold 723 | 723 gone 724 | 724 good 725 | 725 googl 726 | 726 got 727 | 727 govern 728 | 728 gpl 729 | 729 grand 730 | 730 grant 731 | 731 graphic 732 | 732 great 733 | 733 greater 734 | 734 ground 735 | 735 group 736 | 736 grow 737 | 737 growth 738 | 738 gt 739 | 739 guarante 740 | 740 guess 741 | 741 gui 742 | 742 guid 743 | 743 ha 744 | 744 hack 745 | 745 had 746 | 746 half 747 | 747 ham 748 | 748 hand 749 | 749 handl 750 | 750 happen 751 | 751 happi 752 | 752 hard 753 | 753 hardwar 754 | 754 hat 755 | 755 hate 756 | 756 have 757 | 757 haven 758 | 758 he 759 | 759 head 760 | 760 header 761 | 761 headlin 762 | 762 health 763 | 763 hear 764 | 764 heard 765 | 765 heart 766 | 766 heaven 767 | 767 hei 768 | 768 height 769 | 769 held 770 | 770 hello 771 | 771 help 772 | 772 helvetica 773 | 773 her 774 | 774 herba 775 | 775 here 776 | 776 hermio 777 | 777 hettinga 778 | 778 hi 779 | 779 high 780 | 780 higher 781 | 781 highli 782 | 782 highlight 783 | 783 him 784 | 784 histori 785 | 785 hit 786 | 786 hold 787 | 787 home 788 | 788 honor 789 | 789 hope 790 | 790 host 791 | 791 hot 792 | 792 hour 793 | 793 hous 794 | 794 how 795 | 795 howev 796 | 796 hp 797 | 797 html 798 | 798 http 799 | 799 httpaddr 800 | 800 huge 801 | 801 human 802 | 802 hundr 803 | 803 ibm 804 | 804 id 805 | 805 idea 806 | 806 ident 807 | 807 identifi 808 | 808 idnumb 809 | 809 ie 810 | 810 if 811 | 811 ignor 812 | 812 ii 813 | 813 iii 814 | 814 iiiiiiihnumberjnumberhnumberjnumberhnumb 815 | 815 illeg 816 | 816 im 817 | 817 imag 818 | 818 imagin 819 | 819 immedi 820 | 820 impact 821 | 821 implement 822 | 822 import 823 | 823 impress 824 | 824 improv 825 | 825 in 826 | 826 inc 827 | 827 includ 828 | 828 incom 829 | 829 increas 830 | 830 incred 831 | 831 inde 832 | 832 independ 833 | 833 index 834 | 834 india 835 | 835 indian 836 | 836 indic 837 | 837 individu 838 | 838 industri 839 | 839 info 840 | 840 inform 841 | 841 initi 842 | 842 inlin 843 | 843 innov 844 | 844 input 845 | 845 insert 846 | 846 insid 847 | 847 instal 848 | 848 instanc 849 | 849 instant 850 | 850 instead 851 | 851 institut 852 | 852 instruct 853 | 853 insur 854 | 854 int 855 | 855 integr 856 | 856 intel 857 | 857 intellig 858 | 858 intend 859 | 859 interact 860 | 860 interest 861 | 861 interfac 862 | 862 intern 863 | 863 internet 864 | 864 interview 865 | 865 into 866 | 866 intro 867 | 867 introduc 868 | 868 inumb 869 | 869 invest 870 | 870 investig 871 | 871 investor 872 | 872 invok 873 | 873 involv 874 | 874 ip 875 | 875 ireland 876 | 876 irish 877 | 877 is 878 | 878 island 879 | 879 isn 880 | 880 iso 881 | 881 isp 882 | 882 issu 883 | 883 it 884 | 884 item 885 | 885 itself 886 | 886 jabber 887 | 887 jame 888 | 888 java 889 | 889 jim 890 | 890 jnumberiiiiiiihepihepihf 891 | 891 job 892 | 892 joe 893 | 893 john 894 | 894 join 895 | 895 journal 896 | 896 judg 897 | 897 judgment 898 | 898 jul 899 | 899 juli 900 | 900 jump 901 | 901 june 902 | 902 just 903 | 903 justin 904 | 904 keep 905 | 905 kei 906 | 906 kept 907 | 907 kernel 908 | 908 kevin 909 | 909 keyboard 910 | 910 kid 911 | 911 kill 912 | 912 kind 913 | 913 king 914 | 914 kingdom 915 | 915 knew 916 | 916 know 917 | 917 knowledg 918 | 918 known 919 | 919 la 920 | 920 lack 921 | 921 land 922 | 922 languag 923 | 923 laptop 924 | 924 larg 925 | 925 larger 926 | 926 largest 927 | 927 laser 928 | 928 last 929 | 929 late 930 | 930 later 931 | 931 latest 932 | 932 launch 933 | 933 law 934 | 934 lawrenc 935 | 935 le 936 | 936 lead 937 | 937 leader 938 | 938 learn 939 | 939 least 940 | 940 leav 941 | 941 left 942 | 942 legal 943 | 943 lender 944 | 944 length 945 | 945 less 946 | 946 lesson 947 | 947 let 948 | 948 letter 949 | 949 level 950 | 950 lib 951 | 951 librari 952 | 952 licens 953 | 953 life 954 | 954 lifetim 955 | 955 light 956 | 956 like 957 | 957 limit 958 | 958 line 959 | 959 link 960 | 960 linux 961 | 961 list 962 | 962 listen 963 | 963 littl 964 | 964 live 965 | 965 ll 966 | 966 lo 967 | 967 load 968 | 968 loan 969 | 969 local 970 | 970 locat 971 | 971 lock 972 | 972 lockergnom 973 | 973 log 974 | 974 long 975 | 975 longer 976 | 976 look 977 | 977 lose 978 | 978 loss 979 | 979 lost 980 | 980 lot 981 | 981 love 982 | 982 low 983 | 983 lower 984 | 984 lowest 985 | 985 lt 986 | 986 ma 987 | 987 mac 988 | 988 machin 989 | 989 made 990 | 990 magazin 991 | 991 mai 992 | 992 mail 993 | 993 mailer 994 | 994 main 995 | 995 maintain 996 | 996 major 997 | 997 make 998 | 998 maker 999 | 999 male 1000 | 1000 man 1001 | 1001 manag 1002 | 1002 mani 1003 | 1003 manual 1004 | 1004 manufactur 1005 | 1005 map 1006 | 1006 march 1007 | 1007 margin 1008 | 1008 mark 1009 | 1009 market 1010 | 1010 marshal 1011 | 1011 mass 1012 | 1012 master 1013 | 1013 match 1014 | 1014 materi 1015 | 1015 matter 1016 | 1016 matthia 1017 | 1017 mayb 1018 | 1018 me 1019 | 1019 mean 1020 | 1020 measur 1021 | 1021 mechan 1022 | 1022 media 1023 | 1023 medic 1024 | 1024 meet 1025 | 1025 member 1026 | 1026 membership 1027 | 1027 memori 1028 | 1028 men 1029 | 1029 mention 1030 | 1030 menu 1031 | 1031 merchant 1032 | 1032 messag 1033 | 1033 method 1034 | 1034 mh 1035 | 1035 michael 1036 | 1036 microsoft 1037 | 1037 middl 1038 | 1038 might 1039 | 1039 mike 1040 | 1040 mile 1041 | 1041 militari 1042 | 1042 million 1043 | 1043 mime 1044 | 1044 mind 1045 | 1045 mine 1046 | 1046 mini 1047 | 1047 minimum 1048 | 1048 minut 1049 | 1049 miss 1050 | 1050 mistak 1051 | 1051 mobil 1052 | 1052 mode 1053 | 1053 model 1054 | 1054 modem 1055 | 1055 modifi 1056 | 1056 modul 1057 | 1057 moment 1058 | 1058 mon 1059 | 1059 mondai 1060 | 1060 monei 1061 | 1061 monitor 1062 | 1062 month 1063 | 1063 monthli 1064 | 1064 more 1065 | 1065 morn 1066 | 1066 mortgag 1067 | 1067 most 1068 | 1068 mostli 1069 | 1069 mother 1070 | 1070 motiv 1071 | 1071 move 1072 | 1072 movi 1073 | 1073 mpnumber 1074 | 1074 mr 1075 | 1075 ms 1076 | 1076 msg 1077 | 1077 much 1078 | 1078 multi 1079 | 1079 multipart 1080 | 1080 multipl 1081 | 1081 murphi 1082 | 1082 music 1083 | 1083 must 1084 | 1084 my 1085 | 1085 myself 1086 | 1086 name 1087 | 1087 nation 1088 | 1088 natur 1089 | 1089 nbsp 1090 | 1090 near 1091 | 1091 nearli 1092 | 1092 necessari 1093 | 1093 need 1094 | 1094 neg 1095 | 1095 net 1096 | 1096 netscap 1097 | 1097 network 1098 | 1098 never 1099 | 1099 new 1100 | 1100 newslett 1101 | 1101 next 1102 | 1102 nextpart 1103 | 1103 nice 1104 | 1104 nigeria 1105 | 1105 night 1106 | 1106 no 1107 | 1107 nobodi 1108 | 1108 non 1109 | 1109 none 1110 | 1110 nor 1111 | 1111 normal 1112 | 1112 north 1113 | 1113 not 1114 | 1114 note 1115 | 1115 noth 1116 | 1116 notic 1117 | 1117 now 1118 | 1118 nt 1119 | 1119 null 1120 | 1120 number 1121 | 1121 numbera 1122 | 1122 numberam 1123 | 1123 numberanumb 1124 | 1124 numberb 1125 | 1125 numberbit 1126 | 1126 numberc 1127 | 1127 numbercb 1128 | 1128 numbercbr 1129 | 1129 numbercfont 1130 | 1130 numbercli 1131 | 1131 numbercnumb 1132 | 1132 numbercp 1133 | 1133 numberctd 1134 | 1134 numberd 1135 | 1135 numberdari 1136 | 1136 numberdnumb 1137 | 1137 numberenumb 1138 | 1138 numberf 1139 | 1139 numberfb 1140 | 1140 numberff 1141 | 1141 numberffont 1142 | 1142 numberfp 1143 | 1143 numberftd 1144 | 1144 numberk 1145 | 1145 numberm 1146 | 1146 numbermb 1147 | 1147 numberp 1148 | 1148 numberpd 1149 | 1149 numberpm 1150 | 1150 numberpx 1151 | 1151 numberst 1152 | 1152 numberth 1153 | 1153 numbertnumb 1154 | 1154 numberx 1155 | 1155 object 1156 | 1156 oblig 1157 | 1157 obtain 1158 | 1158 obvious 1159 | 1159 occur 1160 | 1160 oct 1161 | 1161 octob 1162 | 1162 of 1163 | 1163 off 1164 | 1164 offer 1165 | 1165 offic 1166 | 1166 offici 1167 | 1167 often 1168 | 1168 oh 1169 | 1169 ok 1170 | 1170 old 1171 | 1171 on 1172 | 1172 onc 1173 | 1173 onli 1174 | 1174 onlin 1175 | 1175 open 1176 | 1176 oper 1177 | 1177 opinion 1178 | 1178 opportun 1179 | 1179 opt 1180 | 1180 optim 1181 | 1181 option 1182 | 1182 or 1183 | 1183 order 1184 | 1184 org 1185 | 1185 organ 1186 | 1186 origin 1187 | 1187 os 1188 | 1188 osdn 1189 | 1189 other 1190 | 1190 otherwis 1191 | 1191 our 1192 | 1192 out 1193 | 1193 outlook 1194 | 1194 output 1195 | 1195 outsid 1196 | 1196 over 1197 | 1197 own 1198 | 1198 owner 1199 | 1199 oz 1200 | 1200 pacif 1201 | 1201 pack 1202 | 1202 packag 1203 | 1203 page 1204 | 1204 pai 1205 | 1205 paid 1206 | 1206 pain 1207 | 1207 palm 1208 | 1208 panel 1209 | 1209 paper 1210 | 1210 paragraph 1211 | 1211 parent 1212 | 1212 part 1213 | 1213 parti 1214 | 1214 particip 1215 | 1215 particular 1216 | 1216 particularli 1217 | 1217 partit 1218 | 1218 partner 1219 | 1219 pass 1220 | 1220 password 1221 | 1221 past 1222 | 1222 patch 1223 | 1223 patent 1224 | 1224 path 1225 | 1225 pattern 1226 | 1226 paul 1227 | 1227 payment 1228 | 1228 pc 1229 | 1229 peac 1230 | 1230 peopl 1231 | 1231 per 1232 | 1232 percent 1233 | 1233 percentag 1234 | 1234 perfect 1235 | 1235 perfectli 1236 | 1236 perform 1237 | 1237 perhap 1238 | 1238 period 1239 | 1239 perl 1240 | 1240 perman 1241 | 1241 permiss 1242 | 1242 person 1243 | 1243 pgp 1244 | 1244 phone 1245 | 1245 photo 1246 | 1246 php 1247 | 1247 phrase 1248 | 1248 physic 1249 | 1249 pick 1250 | 1250 pictur 1251 | 1251 piec 1252 | 1252 piiiiiiii 1253 | 1253 pipe 1254 | 1254 pjnumber 1255 | 1255 place 1256 | 1256 plai 1257 | 1257 plain 1258 | 1258 plan 1259 | 1259 planet 1260 | 1260 plant 1261 | 1261 planta 1262 | 1262 platform 1263 | 1263 player 1264 | 1264 pleas 1265 | 1265 plu 1266 | 1266 plug 1267 | 1267 pm 1268 | 1268 pocket 1269 | 1269 point 1270 | 1270 polic 1271 | 1271 polici 1272 | 1272 polit 1273 | 1273 poor 1274 | 1274 pop 1275 | 1275 popul 1276 | 1276 popular 1277 | 1277 port 1278 | 1278 posit 1279 | 1279 possibl 1280 | 1280 post 1281 | 1281 potenti 1282 | 1282 pound 1283 | 1283 powel 1284 | 1284 power 1285 | 1285 powershot 1286 | 1286 practic 1287 | 1287 pre 1288 | 1288 predict 1289 | 1289 prefer 1290 | 1290 premium 1291 | 1291 prepar 1292 | 1292 present 1293 | 1293 presid 1294 | 1294 press 1295 | 1295 pretti 1296 | 1296 prevent 1297 | 1297 previou 1298 | 1298 previous 1299 | 1299 price 1300 | 1300 principl 1301 | 1301 print 1302 | 1302 printabl 1303 | 1303 printer 1304 | 1304 privaci 1305 | 1305 privat 1306 | 1306 prize 1307 | 1307 pro 1308 | 1308 probabl 1309 | 1309 problem 1310 | 1310 procedur 1311 | 1311 process 1312 | 1312 processor 1313 | 1313 procmail 1314 | 1314 produc 1315 | 1315 product 1316 | 1316 profession 1317 | 1317 profil 1318 | 1318 profit 1319 | 1319 program 1320 | 1320 programm 1321 | 1321 progress 1322 | 1322 project 1323 | 1323 promis 1324 | 1324 promot 1325 | 1325 prompt 1326 | 1326 properti 1327 | 1327 propos 1328 | 1328 proprietari 1329 | 1329 prospect 1330 | 1330 protect 1331 | 1331 protocol 1332 | 1332 prove 1333 | 1333 proven 1334 | 1334 provid 1335 | 1335 proxi 1336 | 1336 pub 1337 | 1337 public 1338 | 1338 publish 1339 | 1339 pudg 1340 | 1340 pull 1341 | 1341 purchas 1342 | 1342 purpos 1343 | 1343 put 1344 | 1344 python 1345 | 1345 qnumber 1346 | 1346 qualifi 1347 | 1347 qualiti 1348 | 1348 quarter 1349 | 1349 question 1350 | 1350 quick 1351 | 1351 quickli 1352 | 1352 quit 1353 | 1353 quot 1354 | 1354 radio 1355 | 1355 ragga 1356 | 1356 rais 1357 | 1357 random 1358 | 1358 rang 1359 | 1359 rate 1360 | 1360 rather 1361 | 1361 ratio 1362 | 1362 razor 1363 | 1363 razornumb 1364 | 1364 re 1365 | 1365 reach 1366 | 1366 read 1367 | 1367 reader 1368 | 1368 readi 1369 | 1369 real 1370 | 1370 realiz 1371 | 1371 realli 1372 | 1372 reason 1373 | 1373 receiv 1374 | 1374 recent 1375 | 1375 recipi 1376 | 1376 recommend 1377 | 1377 record 1378 | 1378 red 1379 | 1379 redhat 1380 | 1380 reduc 1381 | 1381 refer 1382 | 1382 refin 1383 | 1383 reg 1384 | 1384 regard 1385 | 1385 region 1386 | 1386 regist 1387 | 1387 regul 1388 | 1388 regular 1389 | 1389 rel 1390 | 1390 relat 1391 | 1391 relationship 1392 | 1392 releas 1393 | 1393 relev 1394 | 1394 reliabl 1395 | 1395 remain 1396 | 1396 rememb 1397 | 1397 remot 1398 | 1398 remov 1399 | 1399 replac 1400 | 1400 repli 1401 | 1401 report 1402 | 1402 repositori 1403 | 1403 repres 1404 | 1404 republ 1405 | 1405 request 1406 | 1406 requir 1407 | 1407 research 1408 | 1408 reserv 1409 | 1409 resid 1410 | 1410 resourc 1411 | 1411 respect 1412 | 1412 respond 1413 | 1413 respons 1414 | 1414 rest 1415 | 1415 result 1416 | 1416 retail 1417 | 1417 return 1418 | 1418 reveal 1419 | 1419 revenu 1420 | 1420 revers 1421 | 1421 review 1422 | 1422 revok 1423 | 1423 rh 1424 | 1424 rich 1425 | 1425 right 1426 | 1426 risk 1427 | 1427 road 1428 | 1428 robert 1429 | 1429 rock 1430 | 1430 role 1431 | 1431 roll 1432 | 1432 rom 1433 | 1433 roman 1434 | 1434 room 1435 | 1435 root 1436 | 1436 round 1437 | 1437 rpm 1438 | 1438 rss 1439 | 1439 rule 1440 | 1440 run 1441 | 1441 sa 1442 | 1442 safe 1443 | 1443 sai 1444 | 1444 said 1445 | 1445 sale 1446 | 1446 same 1447 | 1447 sampl 1448 | 1448 san 1449 | 1449 saou 1450 | 1450 sat 1451 | 1451 satellit 1452 | 1452 save 1453 | 1453 saw 1454 | 1454 scan 1455 | 1455 schedul 1456 | 1456 school 1457 | 1457 scienc 1458 | 1458 score 1459 | 1459 screen 1460 | 1460 script 1461 | 1461 se 1462 | 1462 search 1463 | 1463 season 1464 | 1464 second 1465 | 1465 secret 1466 | 1466 section 1467 | 1467 secur 1468 | 1468 see 1469 | 1469 seed 1470 | 1470 seek 1471 | 1471 seem 1472 | 1472 seen 1473 | 1473 select 1474 | 1474 self 1475 | 1475 sell 1476 | 1476 seminar 1477 | 1477 send 1478 | 1478 sender 1479 | 1479 sendmail 1480 | 1480 senior 1481 | 1481 sens 1482 | 1482 sensit 1483 | 1483 sent 1484 | 1484 sep 1485 | 1485 separ 1486 | 1486 septemb 1487 | 1487 sequenc 1488 | 1488 seri 1489 | 1489 serif 1490 | 1490 seriou 1491 | 1491 serv 1492 | 1492 server 1493 | 1493 servic 1494 | 1494 set 1495 | 1495 setup 1496 | 1496 seven 1497 | 1497 seventh 1498 | 1498 sever 1499 | 1499 sex 1500 | 1500 sexual 1501 | 1501 sf 1502 | 1502 shape 1503 | 1503 share 1504 | 1504 she 1505 | 1505 shell 1506 | 1506 ship 1507 | 1507 shop 1508 | 1508 short 1509 | 1509 shot 1510 | 1510 should 1511 | 1511 show 1512 | 1512 side 1513 | 1513 sign 1514 | 1514 signatur 1515 | 1515 signific 1516 | 1516 similar 1517 | 1517 simpl 1518 | 1518 simpli 1519 | 1519 sinc 1520 | 1520 sincer 1521 | 1521 singl 1522 | 1522 sit 1523 | 1523 site 1524 | 1524 situat 1525 | 1525 six 1526 | 1526 size 1527 | 1527 skeptic 1528 | 1528 skill 1529 | 1529 skin 1530 | 1530 skip 1531 | 1531 sleep 1532 | 1532 slow 1533 | 1533 small 1534 | 1534 smart 1535 | 1535 smoke 1536 | 1536 smtp 1537 | 1537 snumber 1538 | 1538 so 1539 | 1539 social 1540 | 1540 societi 1541 | 1541 softwar 1542 | 1542 sold 1543 | 1543 solut 1544 | 1544 solv 1545 | 1545 some 1546 | 1546 someon 1547 | 1547 someth 1548 | 1548 sometim 1549 | 1549 son 1550 | 1550 song 1551 | 1551 soni 1552 | 1552 soon 1553 | 1553 sorri 1554 | 1554 sort 1555 | 1555 sound 1556 | 1556 sourc 1557 | 1557 south 1558 | 1558 space 1559 | 1559 spain 1560 | 1560 spam 1561 | 1561 spamassassin 1562 | 1562 spamd 1563 | 1563 spammer 1564 | 1564 speak 1565 | 1565 spec 1566 | 1566 special 1567 | 1567 specif 1568 | 1568 specifi 1569 | 1569 speech 1570 | 1570 speed 1571 | 1571 spend 1572 | 1572 sponsor 1573 | 1573 sport 1574 | 1574 spot 1575 | 1575 src 1576 | 1576 ssh 1577 | 1577 st 1578 | 1578 stabl 1579 | 1579 staff 1580 | 1580 stai 1581 | 1581 stand 1582 | 1582 standard 1583 | 1583 star 1584 | 1584 start 1585 | 1585 state 1586 | 1586 statement 1587 | 1587 statu 1588 | 1588 step 1589 | 1589 steve 1590 | 1590 still 1591 | 1591 stock 1592 | 1592 stop 1593 | 1593 storag 1594 | 1594 store 1595 | 1595 stori 1596 | 1596 strategi 1597 | 1597 stream 1598 | 1598 street 1599 | 1599 string 1600 | 1600 strip 1601 | 1601 strong 1602 | 1602 structur 1603 | 1603 studi 1604 | 1604 stuff 1605 | 1605 stupid 1606 | 1606 style 1607 | 1607 subject 1608 | 1608 submit 1609 | 1609 subscrib 1610 | 1610 subscript 1611 | 1611 substanti 1612 | 1612 success 1613 | 1613 such 1614 | 1614 suffer 1615 | 1615 suggest 1616 | 1616 suit 1617 | 1617 sum 1618 | 1618 summari 1619 | 1619 summer 1620 | 1620 sun 1621 | 1621 super 1622 | 1622 suppli 1623 | 1623 support 1624 | 1624 suppos 1625 | 1625 sure 1626 | 1626 surpris 1627 | 1627 suse 1628 | 1628 suspect 1629 | 1629 sweet 1630 | 1630 switch 1631 | 1631 system 1632 | 1632 tab 1633 | 1633 tabl 1634 | 1634 tablet 1635 | 1635 tag 1636 | 1636 take 1637 | 1637 taken 1638 | 1638 talk 1639 | 1639 tape 1640 | 1640 target 1641 | 1641 task 1642 | 1642 tax 1643 | 1643 teach 1644 | 1644 team 1645 | 1645 tech 1646 | 1646 technic 1647 | 1647 techniqu 1648 | 1648 technolog 1649 | 1649 tel 1650 | 1650 telecom 1651 | 1651 telephon 1652 | 1652 tell 1653 | 1653 temperatur 1654 | 1654 templ 1655 | 1655 ten 1656 | 1656 term 1657 | 1657 termin 1658 | 1658 terror 1659 | 1659 terrorist 1660 | 1660 test 1661 | 1661 texa 1662 | 1662 text 1663 | 1663 than 1664 | 1664 thank 1665 | 1665 that 1666 | 1666 the 1667 | 1667 thei 1668 | 1668 their 1669 | 1669 them 1670 | 1670 themselv 1671 | 1671 then 1672 | 1672 theori 1673 | 1673 there 1674 | 1674 therefor 1675 | 1675 these 1676 | 1676 thi 1677 | 1677 thing 1678 | 1678 think 1679 | 1679 thinkgeek 1680 | 1680 third 1681 | 1681 those 1682 | 1682 though 1683 | 1683 thought 1684 | 1684 thousand 1685 | 1685 thread 1686 | 1686 threat 1687 | 1687 three 1688 | 1688 through 1689 | 1689 thu 1690 | 1690 thursdai 1691 | 1691 ti 1692 | 1692 ticket 1693 | 1693 tim 1694 | 1694 time 1695 | 1695 tip 1696 | 1696 tire 1697 | 1697 titl 1698 | 1698 tm 1699 | 1699 to 1700 | 1700 todai 1701 | 1701 togeth 1702 | 1702 token 1703 | 1703 told 1704 | 1704 toll 1705 | 1705 tom 1706 | 1706 toner 1707 | 1707 toni 1708 | 1708 too 1709 | 1709 took 1710 | 1710 tool 1711 | 1711 top 1712 | 1712 topic 1713 | 1713 total 1714 | 1714 touch 1715 | 1715 toward 1716 | 1716 track 1717 | 1717 trade 1718 | 1718 tradit 1719 | 1719 traffic 1720 | 1720 train 1721 | 1721 transact 1722 | 1722 transfer 1723 | 1723 travel 1724 | 1724 treat 1725 | 1725 tree 1726 | 1726 tri 1727 | 1727 trial 1728 | 1728 trick 1729 | 1729 trip 1730 | 1730 troubl 1731 | 1731 true 1732 | 1732 truli 1733 | 1733 trust 1734 | 1734 truth 1735 | 1735 try 1736 | 1736 tue 1737 | 1737 tuesdai 1738 | 1738 turn 1739 | 1739 tv 1740 | 1740 two 1741 | 1741 type 1742 | 1742 uk 1743 | 1743 ultim 1744 | 1744 un 1745 | 1745 under 1746 | 1746 understand 1747 | 1747 unfortun 1748 | 1748 uniqu 1749 | 1749 unison 1750 | 1750 unit 1751 | 1751 univers 1752 | 1752 unix 1753 | 1753 unless 1754 | 1754 unlik 1755 | 1755 unlimit 1756 | 1756 unseen 1757 | 1757 unsolicit 1758 | 1758 unsubscrib 1759 | 1759 until 1760 | 1760 up 1761 | 1761 updat 1762 | 1762 upgrad 1763 | 1763 upon 1764 | 1764 urgent 1765 | 1765 url 1766 | 1766 us 1767 | 1767 usa 1768 | 1768 usag 1769 | 1769 usb 1770 | 1770 usd 1771 | 1771 usdollarnumb 1772 | 1772 useless 1773 | 1773 user 1774 | 1774 usr 1775 | 1775 usual 1776 | 1776 util 1777 | 1777 vacat 1778 | 1778 valid 1779 | 1779 valu 1780 | 1780 valuabl 1781 | 1781 var 1782 | 1782 variabl 1783 | 1783 varieti 1784 | 1784 variou 1785 | 1785 ve 1786 | 1786 vendor 1787 | 1787 ventur 1788 | 1788 veri 1789 | 1789 verifi 1790 | 1790 version 1791 | 1791 via 1792 | 1792 video 1793 | 1793 view 1794 | 1794 virtual 1795 | 1795 visa 1796 | 1796 visit 1797 | 1797 visual 1798 | 1798 vnumber 1799 | 1799 voic 1800 | 1800 vote 1801 | 1801 vs 1802 | 1802 vulner 1803 | 1803 wa 1804 | 1804 wai 1805 | 1805 wait 1806 | 1806 wake 1807 | 1807 walk 1808 | 1808 wall 1809 | 1809 want 1810 | 1810 war 1811 | 1811 warm 1812 | 1812 warn 1813 | 1813 warranti 1814 | 1814 washington 1815 | 1815 wasn 1816 | 1816 wast 1817 | 1817 watch 1818 | 1818 water 1819 | 1819 we 1820 | 1820 wealth 1821 | 1821 weapon 1822 | 1822 web 1823 | 1823 weblog 1824 | 1824 websit 1825 | 1825 wed 1826 | 1826 wednesdai 1827 | 1827 week 1828 | 1828 weekli 1829 | 1829 weight 1830 | 1830 welcom 1831 | 1831 well 1832 | 1832 went 1833 | 1833 were 1834 | 1834 west 1835 | 1835 what 1836 | 1836 whatev 1837 | 1837 when 1838 | 1838 where 1839 | 1839 whether 1840 | 1840 which 1841 | 1841 while 1842 | 1842 white 1843 | 1843 whitelist 1844 | 1844 who 1845 | 1845 whole 1846 | 1846 whose 1847 | 1847 why 1848 | 1848 wi 1849 | 1849 wide 1850 | 1850 width 1851 | 1851 wife 1852 | 1852 will 1853 | 1853 william 1854 | 1854 win 1855 | 1855 window 1856 | 1856 wing 1857 | 1857 winner 1858 | 1858 wireless 1859 | 1859 wish 1860 | 1860 with 1861 | 1861 within 1862 | 1862 without 1863 | 1863 wnumberp 1864 | 1864 woman 1865 | 1865 women 1866 | 1866 won 1867 | 1867 wonder 1868 | 1868 word 1869 | 1869 work 1870 | 1870 worker 1871 | 1871 world 1872 | 1872 worldwid 1873 | 1873 worri 1874 | 1874 worst 1875 | 1875 worth 1876 | 1876 would 1877 | 1877 wouldn 1878 | 1878 write 1879 | 1879 written 1880 | 1880 wrong 1881 | 1881 wrote 1882 | 1882 www 1883 | 1883 ximian 1884 | 1884 xml 1885 | 1885 xp 1886 | 1886 yahoo 1887 | 1887 ye 1888 | 1888 yeah 1889 | 1889 year 1890 | 1890 yesterdai 1891 | 1891 yet 1892 | 1892 york 1893 | 1893 you 1894 | 1894 young 1895 | 1895 your 1896 | 1896 yourself 1897 | 1897 zdnet 1898 | 1898 zero 1899 | 1899 zip 1900 | -------------------------------------------------------------------------------- /ex6 Support Vector Machines/ex6.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from scipy.io import loadmat 8 | from plotData import plot_data 9 | from sklearn.svm import SVC 10 | 11 | from gaussianKernel import gaussian_kernel 12 | 13 | 14 | def pause_func(): 15 | while input() != '': 16 | pass 17 | 18 | 19 | def load_mat_file(filename): 20 | return loadmat(filename) 21 | 22 | 23 | if __name__ == '__main__': 24 | # =============== Part 1: Loading and Visualizing Data ================ 25 | print('Loading and Visualizing Data ...\n') 26 | # Load from ex6data1: 27 | # You will have X, y in your environment 28 | data = load_mat_file('./data/ex6data1.mat') 29 | X = data['X'] 30 | y = data['y'] 31 | plt.ion() 32 | plt.figure() 33 | plot_data(X, y) 34 | plt.pause(1) 35 | plt.close() 36 | print('Program paused. Press enter to continue.\n') 37 | # pause_func() 38 | 39 | # ==================== Part 2: Training Linear SVM ==================== 40 | # You should try to change the C value below and see how the decision 41 | # boundary varies (e.g., try C = 1000) 42 | plt.figure() 43 | plot_data(X, y) 44 | 45 | C = 1 46 | Classification = SVC(C=C, kernel='linear') 47 | # fit(X, y, sample_weight=None), y : array-like, shape (n_samples,) 48 | Classification.fit(X, y.ravel()) 49 | 50 | plot_pad = 0.5 51 | plot_x_min, plot_x_max = X[:, 0].min() - plot_pad, X[:, 0].max() + plot_pad 52 | plot_y_min, plot_y_max = X[:, 1].min() - plot_pad, X[:, 1].max() + plot_pad 53 | 54 | plot_step = 0.01 55 | plot_x, plot_y = np.meshgrid(np.arange(plot_x_min, plot_x_max, plot_step), 56 | np.arange(plot_y_min, plot_y_max, plot_step)) 57 | plot_z = Classification.predict(np.c_[plot_x.ravel(), plot_y.ravel()]).reshape(plot_x.shape) 58 | plt.contourf(plot_x, plot_y, plot_z, cmap="Wistia", alpha=0.2) 59 | 60 | plt.pause(1) 61 | plt.close() 62 | 63 | print('Program paused. Press enter to continue.\n') 64 | # pause_func() 65 | 66 | # =============== Part 3: Implementing Gaussian Kernel =============== 67 | print('\nEvaluating the Gaussian Kernel ...\n') 68 | x1 = np.array([1, 2, 1]) 69 | x2 = np.array([0, 4, -1]) 70 | sigma = 2 71 | sim = gaussian_kernel(x1, x2, sigma) 72 | print('Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = %f :' 73 | '\n\t%f\n(for sigma = 2, this value should be about 0.324652)\n' % (sigma, sim)) 74 | print('Program paused. Press enter to continue.\n') 75 | # pause_func() 76 | 77 | # =============== Part 4: Visualizing Dataset 2 ================ 78 | print('Loading and Visualizing Data ...\n') 79 | data = load_mat_file('./data/ex6data2.mat') 80 | X = data['X'] 81 | y = data['y'] 82 | plt.figure() 83 | plot_data(X, y) 84 | plt.pause(1) 85 | plt.close() 86 | print('Program paused. Press enter to continue.\n') 87 | # pause_func() 88 | 89 | # ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ========== 90 | plt.figure() 91 | plot_data(X, y) 92 | 93 | Classification = SVC(C=100, kernel='rbf', gamma=6) 94 | # fit(X, y, sample_weight=None), y : array-like, shape (n_samples,) 95 | Classification.fit(X, y.ravel()) 96 | 97 | plot_pad = 0.5 98 | plot_x_min, plot_x_max = X[:, 0].min() - plot_pad, X[:, 0].max() + plot_pad 99 | plot_y_min, plot_y_max = X[:, 1].min() - plot_pad, X[:, 1].max() + plot_pad 100 | 101 | plot_step = 0.01 102 | plot_x, plot_y = np.meshgrid(np.arange(plot_x_min, plot_x_max, plot_step), 103 | np.arange(plot_y_min, plot_y_max, plot_step)) 104 | plot_z = Classification.predict(np.c_[plot_x.ravel(), plot_y.ravel()]).reshape(plot_x.shape) 105 | plt.contourf(plot_x, plot_y, plot_z, cmap="Wistia", alpha=0.2) 106 | plt.axis([-0.1, 1.1, 0.3, 1.05]) 107 | plt.pause(1) 108 | plt.close() 109 | 110 | print('Program paused. Press enter to continue.\n') 111 | # pause_func() 112 | 113 | # =============== Part 6: Visualizing Dataset 3 ================ 114 | print('Loading and Visualizing Data ...\n') 115 | data = load_mat_file('./data/ex6data3.mat') 116 | X = data['X'] 117 | y = data['y'] 118 | plt.figure() 119 | plot_data(X, y) 120 | plt.pause(1) 121 | plt.close() 122 | print('Program paused. Press enter to continue.\n') 123 | # pause_func() 124 | 125 | # ========== Part 7: Training SVM with RBF Kernel (Dataset 2) ========== 126 | plt.figure() 127 | plot_data(X, y) 128 | 129 | Classification = SVC(C=1, kernel='poly', degree=3, gamma=10) 130 | # fit(X, y, sample_weight=None), y : array-like, shape (n_samples,) 131 | Classification.fit(X, y.ravel()) 132 | 133 | plot_pad = 0.5 134 | plot_x_min, plot_x_max = X[:, 0].min() - plot_pad, X[:, 0].max() + plot_pad 135 | plot_y_min, plot_y_max = X[:, 1].min() - plot_pad, X[:, 1].max() + plot_pad 136 | 137 | plot_step = 0.01 138 | plot_x, plot_y = np.meshgrid(np.arange(plot_x_min, plot_x_max, plot_step), 139 | np.arange(plot_y_min, plot_y_max, plot_step)) 140 | plot_z = Classification.predict(np.c_[plot_x.ravel(), plot_y.ravel()]).reshape(plot_x.shape) 141 | plt.contourf(plot_x, plot_y, plot_z, cmap="Wistia", alpha=0.2) 142 | plt.axis([-0.8, 0.4, -0.8, 0.8]) 143 | plt.pause(1) 144 | plt.close() 145 | 146 | print('Program paused. Press enter to continue.\n') 147 | # pause_func() 148 | -------------------------------------------------------------------------------- /ex6 Support Vector Machines/gaussianKernel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | import numpy.linalg as linalg 7 | 8 | 9 | def gaussian_kernel(x1, x2, sigma): 10 | return np.exp(-((linalg.norm(x1 - x2)) ** 2) / (2 * (sigma ** 2))) 11 | -------------------------------------------------------------------------------- /ex6 Support Vector Machines/plotData.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | def plot_data(x, y): 10 | pos = np.where(y[:, 0] == 1)[0] 11 | neg = np.where(y[:, 0] == 0)[0] 12 | plt.scatter(x[pos, 0], x[pos, 1], marker='+') 13 | plt.scatter(x[neg, 0], x[neg, 1], marker='o') 14 | -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/computeCentroids.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | 7 | 8 | def compute_centroids(x, idx, k): 9 | # Useful variables 10 | m, n = x.shape 11 | centroids = np.zeros((k, n)) 12 | idx = np.ravel(idx) 13 | for i in range(k): 14 | centroids[i] = np.mean(x[np.where(idx == i + 1)], axis=0) 15 | return centroids 16 | -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/data/bird_small.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex7 K-means Clustering and Principal Component Analysis/data/bird_small.mat -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/data/bird_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex7 K-means Clustering and Principal Component Analysis/data/bird_small.png -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/data/ex7data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex7 K-means Clustering and Principal Component Analysis/data/ex7data1.mat -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/data/ex7data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex7 K-means Clustering and Principal Component Analysis/data/ex7data2.mat -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/data/ex7faces.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex7 K-means Clustering and Principal Component Analysis/data/ex7faces.mat -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/displayData.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def display_data(x, example_width=None): 9 | m, n = x.shape 10 | # Set example_width automatically if not passed in 11 | if not example_width: 12 | example_width = int(np.round(np.sqrt(n))) 13 | example_height = int((n / example_width)) 14 | 15 | # Compute number of items to display 16 | display_rows = int(np.floor(np.sqrt(m))) 17 | display_cols = int(np.ceil(m / display_rows)) 18 | 19 | # Between images padding 20 | pad = 1 21 | # Setup blank display 22 | display_array = - np.ones((pad + display_rows * (example_height + pad), 23 | pad + display_cols * (example_width + pad))) 24 | 25 | # Copy each example into a patch on the display array 26 | curr_ex = 0 27 | for j in range(display_rows): 28 | for i in range(display_cols): 29 | if curr_ex > m: 30 | break 31 | # Get the max value of the patch 32 | max_val = np.max(np.abs(x[curr_ex, :])) 33 | 34 | wait_set_temp = np.reshape(x[curr_ex, :], 35 | (example_height, example_width), order='F') / max_val 36 | height_min_temp = pad + (j - 0) * (example_height + pad) 37 | height_max_temp = height_min_temp + example_height 38 | width_min_temp = pad + (i - 0) * (example_width + pad) 39 | width_max_temp = width_min_temp + example_width 40 | display_array[height_min_temp:height_max_temp, width_min_temp:width_max_temp] = wait_set_temp 41 | curr_ex = curr_ex + 1 42 | if curr_ex > m: 43 | break 44 | plt.ion() 45 | plt.imshow(display_array, cmap="gray") # 选一个漂亮的颜色 46 | plt.pause(1) 47 | -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/drawLine.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | def draw_line(p1, p2,plot_kwargs=""): 10 | # my tips: in plot(x,y) , every element in x will be place in x-axis, 11 | # so use this function to draw a line. 12 | k0 = np.array(([p1[0], p2[0]])) 13 | k1 = np.array(([p1[1], p2[1]])) 14 | plt.plot(k0, k1,plot_kwargs) 15 | -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/ex7.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from scipy.io import loadmat 8 | from scipy.misc import imread 9 | 10 | from findClosestCentroids import find_closest_centroids 11 | from computeCentroids import compute_centroids 12 | from runkMeans import run_k_means 13 | from kMeansInitCentroids import k_means_init_centroids 14 | 15 | 16 | def pause_func(): 17 | while input() != '': 18 | pass 19 | 20 | 21 | def load_mat_file(filename): 22 | return loadmat(filename) 23 | 24 | 25 | if __name__ == '__main__': 26 | # plt.ioff() 27 | # ================= Part 1: Find Closest Centroids ==================== 28 | print('Finding closest centroids.\n\n') 29 | # Load an example dataset that we will be using 30 | data = load_mat_file('./data/ex7data2.mat') 31 | # Visualize the example dataset 32 | X = data['X'] 33 | # Select an initial set of centroids 34 | K = 3 # 3 Centroids 35 | initial_centroids = [[3, 3], [6, 2], [8, 5]] 36 | initial_centroids = np.array(initial_centroids) 37 | # Find the closest centroids for the examples using the initial_centroids 38 | idx = find_closest_centroids(X, initial_centroids) 39 | print('Closest centroids for the first 3 examples: \n {}'.format(idx[0: 3])) 40 | print('\n(the closest centroids should be 1, 3, 2 respectively)\n') 41 | print('Program paused. Press enter to continue.\n') 42 | # pause_func() 43 | 44 | # ===================== Part 2: Compute Means ========================= 45 | # After implementing the closest centroids function, you should now 46 | # complete the computeCentroids function. 47 | print('\nComputing centroids means.\n\n') 48 | # Compute means based on the closest centroids found in the previous part. 49 | centroids = compute_centroids(X, idx, K) 50 | print('Centroids computed after initial finding of closest centroids: \n') 51 | print(' %s \n' % centroids) 52 | print('\n(the centroids should be\n') 53 | print(' [ 2.428301 3.157924 ]\n') 54 | print(' [ 5.813503 2.633656 ]\n') 55 | print(' [ 7.119387 3.616684 ]\n\n') 56 | print('Program paused. Press enter to continue.\n') 57 | # pause_func() 58 | 59 | # =================== Part 3: K-Means Clustering ====================== 60 | # After you have completed the two functions computeCentroids and 61 | # findClosestCentroids, you have all the necessary pieces to run the 62 | # kMeans algorithm. In this part, you will run the K-Means algorithm on 63 | # the example dataset we have provided. 64 | 65 | print('\nRunning K-Means clustering on example dataset.\n\n') 66 | max_iters = 10 67 | # Run K-Means algorithm. The 'true' at the end tells our function to plot 68 | centroids, idx = run_k_means(X, initial_centroids, max_iters, True) 69 | print('\nK-Means Done.\n\n') 70 | print('Program paused. Press enter to continue.\n') 71 | # pause_func() 72 | 73 | # ============= Part 4: K-Means Clustering on Pixels =============== 74 | print('\nRunning K-Means clustering on pixels from an image.\n\n') 75 | A = imread('./data/bird_small.png') 76 | # If imread does not work for you, you can try instead 77 | # load_mat_file ('bird_small.mat'); 78 | 79 | A = A / 255 # Divide by 255 so that all values are in the range 0 - 1 80 | 81 | # Size of the image 82 | img_size = A.shape 83 | 84 | # Reshape the image into an Nx3 matrix where N = number of pixels. 85 | # Each row will contain the Red, Green and Blue pixel values 86 | # This gives us our dataset matrix X that we will use K-Means on. 87 | X = np.reshape(A, (img_size[0] * img_size[1], 3), order='F') 88 | 89 | # Run your K-Means algorithm on this data 90 | # You should try different values of K and max_iters here 91 | K = 16 92 | pixels_iters = 10 93 | 94 | # When using K-Means, it is important the initialize the centroids 95 | # randomly. 96 | # You should complete the code in kMeansInitCentroids.m before proceeding 97 | initial_centroids = k_means_init_centroids(X, K) 98 | 99 | # Run K-Means 100 | centroids_img, idx_img = run_k_means(X, initial_centroids, pixels_iters) 101 | print('Program paused. Press enter to continue.\n') 102 | # pause_func() 103 | 104 | # ================= Part 5: Image Compression ====================== 105 | print('\nApplying K-Means to compress an image.\n\n') 106 | 107 | # Find closest cluster members 108 | idx_img_2 = find_closest_centroids(X, centroids_img) 109 | X_recovered = np.zeros((idx_img_2.shape[0], X.shape[1])) 110 | for i in range(idx_img_2.shape[0]): 111 | X_recovered[i] = centroids_img[idx_img_2[i] - 1] 112 | X_recovered = np.reshape(X_recovered, (img_size[0], img_size[1], 3), order='F') 113 | 114 | plt.figure() 115 | plt.ion() 116 | plt.subplot(121) 117 | plt.imshow(A) 118 | plt.title('Original') 119 | plt.subplot(122) 120 | plt.imshow(X_recovered) 121 | plt.title('Compressed, with {} colors.'.format(K)) 122 | plt.pause(5) 123 | 124 | print('Program paused. Press enter to continue.\n') 125 | # pause_func() 126 | -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/ex7_pca.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import matplotlib.colors as mcolors 8 | from mpl_toolkits.mplot3d import Axes3D 9 | 10 | from scipy.io import loadmat 11 | from scipy.misc import imread 12 | 13 | from featureNormalize import feature_normalize 14 | from pca import pca 15 | from projectData import project_data 16 | from recoverData import recover_data 17 | from displayData import display_data 18 | from runkMeans import run_k_means 19 | from kMeansInitCentroids import k_means_init_centroids 20 | from drawLine import draw_line 21 | 22 | 23 | def pause_func(): 24 | while input() != '': 25 | pass 26 | 27 | 28 | def load_mat_file(filename): 29 | return loadmat(filename) 30 | 31 | 32 | if __name__ == '__main__': 33 | # ================== Part 1: Load Example Dataset =================== 34 | print('Visualizing example dataset for PCA.\n\n') 35 | data = load_mat_file('./data/ex7data1.mat') 36 | X = data['X'] 37 | # Visualize the example dataset 38 | plt.ion() 39 | plt.figure() 40 | plt.scatter(X[:, 0], X[:, 1]) 41 | # plt.axis([0.5, 6.5, 2, 8]) 42 | plt.axis("square") 43 | plt.pause(0.8) 44 | print('Program paused. Press enter to continue.\n') 45 | # pause_func() 46 | 47 | # =============== Part 2: Principal Component Analysis =============== 48 | print('\nRunning PCA on example dataset.\n\n') 49 | # Before running PCA, it is important to first normalize X 50 | X_norm, mu, sigma = feature_normalize(X) 51 | U, S = pca(X_norm) 52 | # Draw the eigenvectors centered at mean of data. 53 | # These lines show the directions of maximum variations in the dataset. 54 | 55 | draw_line(mu, (mu + 1.5 * np.dot(S[0], U[:, 0].T)), "-k") 56 | draw_line(mu, (mu + 1.5 * np.dot(S[1], U[:, 1].T)), "-k") 57 | 58 | plt.pause(0.8) 59 | print('Top eigenvector: \n') 60 | print(' U(:,1) = %s \n' % U[:, 0]) 61 | print('\n(you should expect to see -0.707107 -0.707107)\n') 62 | 63 | print('Program paused. Press enter to continue.\n') 64 | # pause_func() 65 | 66 | # =================== Part 3: Dimension Reduction =================== 67 | # You should now implement the projection step to map the data onto the 68 | # first k eigenvectors. The code will then plot the data in this reduced 69 | # dimensional space. This will show you what the data looks like when 70 | # using only the corresponding eigenvectors to reconstruct it. 71 | print('\nDimension reduction on example dataset.\n\n') 72 | # Plot the normalized dataset (returned from pca) 73 | plt.close() 74 | plt.figure() 75 | plt.scatter(X_norm[:, 0], X_norm[:, 1]) 76 | plt.axis("square") 77 | plt.pause(0.5) 78 | 79 | # Project the data onto K = 1 dimension 80 | K = 1 81 | Z = project_data(X_norm, U, K) 82 | print('Projection of the first example: %s\n' % Z[0]) 83 | print('\n(this value should be about 1.481274)\n\n') 84 | 85 | X_rec = recover_data(Z, U, K) 86 | print('Approximation of the first example: %s \n', X_rec[0]) 87 | print('\n(this value should be about -1.047419 -1.047419)\n\n') 88 | 89 | # Draw lines connecting the projected points to the original points 90 | plt.scatter(X_rec[:, 0], X_rec[:, 1], cmap="r") 91 | plt.pause(0.5) 92 | for i in range(X_norm.shape[0]): 93 | draw_line(X_norm[i], X_rec[i], "--k") 94 | plt.pause(0.1) 95 | 96 | print('Program paused. Press enter to continue.\n') 97 | # pause_func() 98 | 99 | # =============== Part 4: Loading and Visualizing Face Data ============= 100 | # We start the exercise by first loading and visualizing the dataset. 101 | # The following code will load the dataset into your environment 102 | 103 | # Load Face dataset 104 | face_date = load_mat_file('./data/ex7faces.mat') 105 | X = face_date['X'] 106 | plt.close() 107 | plt.figure() 108 | display_data(X[0: 100, :]) 109 | print('Program paused. Press enter to continue.\n') 110 | # pause_func() 111 | 112 | # =========== Part 5: PCA on Face Data: Eigenfaces =================== 113 | # Run PCA and visualize the eigenvectors which are in this case eigenfaces 114 | # We display the first 36 eigenfaces. 115 | print('\nRunning PCA on face dataset.\n(this might take a minute or two ...)\n\n') 116 | # Before running PCA, it is important to first normalize X by subtracting 117 | # the mean value from each feature 118 | X_norm, mu, sigma = feature_normalize(X) 119 | 120 | # Run PCA 121 | U, S = pca(X_norm) 122 | # Visualize the top 36 eigenvectors found 123 | plt.close() 124 | plt.figure() 125 | display_data(U[:, 0:36].T) 126 | print('Program paused. Press enter to continue.\n') 127 | # pause_func() 128 | 129 | # ============= Part 6: Dimension Reduction for Faces ================= 130 | # Project images to the eigen space using the top k eigenvectors 131 | # If you are applying a machine learning algorithm 132 | print('\nDimension reduction for face dataset.\n\n') 133 | 134 | K = 100 135 | Z = project_data(X_norm, U, K) 136 | 137 | print('The projected data Z has a size of: ') 138 | print(Z.shape) 139 | 140 | print('Program paused. Press enter to continue.\n') 141 | # pause_func() 142 | 143 | # ==== Part 7: Visualization of Faces after PCA Dimension Reduction ==== 144 | # Project images to the eigen space using the top K eigen vectors and 145 | # visualize only using those K dimensions 146 | # Compare to the original input, which is also displayed 147 | 148 | print('\nVisualizing the projected (reduced dimension) faces.\n\n') 149 | K = 100 150 | X_rec = recover_data(Z, U, K) 151 | # Display normalized data 152 | plt.close() 153 | plt.figure() 154 | plt.subplot(1, 2, 1) 155 | plt.title('Original faces') 156 | display_data(X_norm[0:100, :]) 157 | 158 | # Display reconstructed data from only k eigenfaces 159 | plt.subplot(1, 2, 2) 160 | plt.title('Recovered faces') 161 | display_data(X_rec[0:100, :]) 162 | plt.close() 163 | 164 | print('Program paused. Press enter to continue.\n') 165 | # pause_func() 166 | 167 | # === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization === 168 | A = imread('./data/bird_small.png') 169 | A = A / 255 170 | img_size = A.shape 171 | X = np.reshape(A, (img_size[0] * img_size[1], 3), order='F') 172 | K = 16 173 | pixels_iters = 10 174 | initial_centroids = k_means_init_centroids(X, K) 175 | centroids_img, idx_img = run_k_means(X, initial_centroids, pixels_iters) 176 | # Sample 1000 random indexes (since working with all the data is 177 | # too expensive. If you have a fast computer, you may increase this. 178 | sel = (np.floor(np.random.rand(1000, 1) * X.shape[0]) + 1).astype(np.int32) 179 | 180 | _tab20_data = ( 181 | (0.12156862745098039, 0.4666666666666667, 0.7058823529411765), # 1f77b4 182 | (0.6823529411764706, 0.7803921568627451, 0.9098039215686274), # aec7e8 183 | (1.0, 0.4980392156862745, 0.054901960784313725), # ff7f0e 184 | (1.0, 0.7333333333333333, 0.47058823529411764), # ffbb78 185 | (0.17254901960784313, 0.6274509803921569, 0.17254901960784313), # 2ca02c 186 | (0.596078431372549, 0.8745098039215686, 0.5411764705882353), # 98df8a 187 | (0.8392156862745098, 0.15294117647058825, 0.1568627450980392), # d62728 188 | (1.0, 0.596078431372549, 0.5882352941176471), # ff9896 189 | (0.5803921568627451, 0.403921568627451, 0.7411764705882353), # 9467bd 190 | (0.7725490196078432, 0.6901960784313725, 0.8352941176470589), # c5b0d5 191 | (0.5490196078431373, 0.33725490196078434, 0.29411764705882354), # 8c564b 192 | (0.7686274509803922, 0.611764705882353, 0.5803921568627451), # c49c94 193 | (0.8901960784313725, 0.4666666666666667, 0.7607843137254902), # e377c2 194 | (0.9686274509803922, 0.7137254901960784, 0.8235294117647058), # f7b6d2 195 | (0.4980392156862745, 0.4980392156862745, 0.4980392156862745), # 7f7f7f 196 | (0.7803921568627451, 0.7803921568627451, 0.7803921568627451), # c7c7c7 197 | (0.7372549019607844, 0.7411764705882353, 0.13333333333333333), # bcbd22 198 | (0.8588235294117647, 0.8588235294117647, 0.5529411764705883), # dbdb8d 199 | (0.09019607843137255, 0.7450980392156863, 0.8117647058823529), # 17becf 200 | (0.6196078431372549, 0.8549019607843137, 0.8980392156862745), # 9edae5 201 | ) 202 | palette = mcolors.ListedColormap(_tab20_data, N=K).colors 203 | colors = [] 204 | idx = np.ravel(idx_img[np.ravel(sel)]) 205 | for i in range(len(idx)): 206 | colors.append(palette[idx[i] - 1]) 207 | 208 | fig = plt.figure() 209 | ax = fig.add_subplot(111, projection='3d') 210 | ax.scatter(X[sel, 0], X[sel, 1], X[sel, 2], c=colors, marker="o") 211 | ax.set_title('Pixel dataset plotted in 3D. Color shows centroid memberships') 212 | plt.pause(1) 213 | plt.close() 214 | 215 | print('Program paused. Press enter to continue.\n') 216 | # pause_func() 217 | 218 | # === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === 219 | # Use PCA to project this cloud to 2D for visualization 220 | X_norm, mu, sigma = feature_normalize(X) 221 | # % PCA and project the data to 2D 222 | U, S = pca(X_norm) 223 | Z = project_data(X_norm, U, 2) 224 | 225 | plt.figure() 226 | 227 | x_temp = Z[(np.ravel(sel)), :] 228 | 229 | # Create palette 230 | 231 | palette = mcolors.ListedColormap(_tab20_data, N=K).colors 232 | colors = [] 233 | for i in range(len(idx)): 234 | colors.append(palette[idx[i] - 1]) 235 | plt.scatter(x_temp[:, 0], x_temp[:, 1], c=colors) 236 | 237 | plt.title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction') 238 | plt.pause(1) 239 | print('Program paused. Press enter to continue.\n') 240 | # pause_func() 241 | -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/featureNormalize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def feature_normalize(x): 8 | x_norm = np.zeros(x.shape) 9 | mu = np.mean(x, axis=0) 10 | sigma = np.std(x, axis=0, ddof=1) 11 | for i in range(np.shape(x)[0]): 12 | x_norm[i] = (x[i] - mu) / sigma 13 | return x_norm, mu, sigma 14 | -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/findClosestCentroids.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | 7 | 8 | def find_closest_centroids(x, centroids): 9 | # Set k 10 | k = centroids.shape[0] 11 | 12 | m = x.shape[0] 13 | 14 | # You need to return the following variables correctly. 15 | idx = np.zeros((m, 1), dtype=np.int32) 16 | 17 | for i in range(m): 18 | idx[i] = 1 19 | min_distance = np.linalg.norm(x[i, :] - centroids[1 - 1, :]) ** 2 20 | for j in range(2, k + 1): 21 | distance = np.linalg.norm(x[i, :] - centroids[j - 1, :]) ** 2 22 | if distance < min_distance: 23 | min_distance = distance 24 | idx[i] = j 25 | return idx 26 | -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/kMeansInitCentroids.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | 7 | 8 | def k_means_init_centroids(x, k): 9 | # Randomly reorder the indices of examples 10 | m = x.shape[0] 11 | randidx = np.random.permutation(m) 12 | centroids = x[randidx[0:k], :] 13 | return centroids 14 | -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/pca.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def pca(x): 8 | m = x.shape[0] 9 | sigma = (1 / m) * (np.dot(x.T, x)) 10 | u, s, v = np.linalg.svd(sigma) 11 | return u, s 12 | -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/plotDataPoints.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import matplotlib.colors as mcolors 8 | 9 | _my_Set1_data = ( 10 | (0.89411764705882357, 0.10196078431372549, 0.10980392156862745), 11 | (0.21568627450980393, 0.49411764705882355, 0.72156862745098038), 12 | (0.30196078431372547, 0.68627450980392157, 0.29019607843137257), 13 | (0.59607843137254901, 0.30588235294117649, 0.63921568627450975), 14 | (1.0, 0.49803921568627452, 0.0), 15 | (1.0, 1.0, 0.2), 16 | (0.65098039215686276, 0.33725490196078434, 0.15686274509803921), 17 | (0.96862745098039216, 0.50588235294117645, 0.74901960784313726), 18 | (0.6, 0.6, 0.6), 19 | ) 20 | 21 | 22 | def plot_data_points(x, idx, k): 23 | idx = np.ravel(idx) 24 | 25 | # Create palette 26 | if k > 9: 27 | print("WARN. function: plot_data_points. colors is't enough\n") 28 | palette = mcolors.ListedColormap(_my_Set1_data, N=k).colors 29 | colors = [] 30 | for i in range(len(idx)): 31 | colors.append(palette[idx[i] - 1]) 32 | plt.scatter(x[:, 0], x[:, 1], c=colors) 33 | -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/plotProgresskMeans.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import matplotlib.pyplot as plt 6 | from plotDataPoints import plot_data_points 7 | from drawLine import draw_line 8 | 9 | 10 | def plot_progressk_means(x, centroids, previous, idx, k, i): 11 | # Plot the examples 12 | plot_data_points(x, idx, k) 13 | plt.scatter(centroids[:, 0], centroids[:, 1], s=90, c='black', marker='x') 14 | for j in range(centroids.shape[0]): 15 | draw_line(centroids[j, :], previous[j, :]) 16 | plt.title('Iteration number {}'.format(i + 1)) 17 | plt.pause(1) 18 | -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/projectData.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def project_data(x, u, k): 8 | k_list = list(range(0, k)) 9 | u_reduce = u[:, k_list] 10 | return np.dot(x, u_reduce) 11 | -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/recoverData.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def recover_data(z, u, k): 8 | k_list = list(range(0, k)) 9 | u_reduce = u[:, k_list] 10 | return np.dot(z, u_reduce.T) 11 | -------------------------------------------------------------------------------- /ex7 K-means Clustering and Principal Component Analysis/runkMeans.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import matplotlib.pyplot as plt 6 | from computeCentroids import compute_centroids 7 | from findClosestCentroids import find_closest_centroids 8 | from plotProgresskMeans import plot_progressk_means 9 | 10 | 11 | def run_k_means(x, initial_centroids, max_iters, plot_progress=False): 12 | if plot_progress: 13 | plt.ion() 14 | plt.figure() 15 | centroids = initial_centroids 16 | previous_centroids = centroids 17 | k = initial_centroids.shape[0] 18 | 19 | idx = 0 20 | 21 | for i in range(max_iters): 22 | # Output progress 23 | print('K-Means iteration {}/{}...\n'.format(i + 1, max_iters), flush=True) 24 | # For each example in X, assign it to the closest centroid 25 | idx = find_closest_centroids(x, centroids) 26 | # Optionally, plot progress here 27 | if plot_progress: 28 | plot_progressk_means(x, centroids, previous_centroids, idx, k, i) 29 | previous_centroids = centroids 30 | 31 | # Given the memberships, compute new centroids 32 | centroids = compute_centroids(x, idx, k) 33 | if plot_progress: 34 | plt.close() 35 | return centroids, idx 36 | -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/checkGradients.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | from computeNumericalGradient import compute_numerical_gradient 7 | from cofiCostFunc import cofi_cost_func 8 | 9 | 10 | def disp(disp_list): 11 | size_no_0 = disp_list[0].size 12 | disp_num = len(disp_list) 13 | for i in range(disp_num): 14 | if disp_list[0].shape != disp_list[i].shape: 15 | return False 16 | for i in range(size_no_0): 17 | print("\n", end='') 18 | for j in range(disp_num): 19 | print("{: >17.11f}".format(disp_list[j][i]), end=' ') 20 | print("\n") 21 | 22 | 23 | def check_gradients(check_nn_lambda=0): 24 | # Create small problem 25 | x_t = np.random.rand(4, 3) 26 | theta_t = np.random.rand(5, 3) 27 | 28 | # Zap out most entries 29 | y = np.dot(x_t, theta_t.T) 30 | rand_x_axis, rand_y_axis = np.where(np.random.rand(y.shape[0], y.shape[1]) > 0.5) 31 | y[rand_x_axis, rand_y_axis] = 0 32 | y_not_0_x_axis, y_not_0_y_axis = np.where(y == 0) 33 | 34 | r = np.ones(y.shape) 35 | r[y_not_0_x_axis, y_not_0_y_axis] = 0 36 | 37 | y[rand_x_axis, rand_y_axis] = 0 38 | 39 | # Run Gradient Checking 40 | x = np.random.randn(x_t.shape[0], x_t.shape[1]) 41 | theta = np.random.randn(theta_t.shape[0], theta_t.shape[1]) 42 | num_users = y.shape[1] 43 | num_movies = y.shape[0] 44 | num_features = theta_t.shape[1] 45 | 46 | numgrad = compute_numerical_gradient( 47 | cofi_cost_func, 48 | np.hstack((np.ravel(x, order='F'), np.ravel(theta, order='F'))), 49 | y, r, 50 | num_users, num_movies, num_features, check_nn_lambda 51 | ) 52 | cost, grad = cofi_cost_func( 53 | np.hstack((np.ravel(x, order='F'), np.ravel(theta, order='F'))), 54 | y, r, num_users, 55 | num_movies, num_features, check_nn_lambda 56 | ) 57 | disp([numgrad, grad]) 58 | print( 59 | 'The above two columns you get should be very similar.\n' 60 | '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n') 61 | 62 | diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad) 63 | print( 64 | 'If your cost function implementation is correct, then \n' 65 | 'the relative difference will be small (less than 1e-9). \n\n' 66 | 'Relative Difference: %s\n' % diff) 67 | -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/cofiCostFunc.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def cofi_cost_func(params, y, r, num_users, num_movies, num_features, lambda_co_fi): 8 | x = np.reshape(params[0:num_movies * num_features], (num_movies, num_features), order='F') 9 | theta = np.reshape(params[num_movies * num_features:], (num_users, num_features), order='F') 10 | 11 | # You need to return the following values correctly 12 | # j = 0 13 | # x_grad = np.zeros(x.shape) 14 | # theta_grad = np.zeros(theta.shape) 15 | 16 | diff = np.dot(x, theta.T) - y 17 | 18 | # unregularized 19 | j = (1 / 2) * np.sum((diff * r) ** 2) 20 | # regularized term for Theta 21 | j += (lambda_co_fi / 2) * np.sum(theta ** 2) 22 | # regularized term for X 23 | j += (lambda_co_fi / 2) * np.sum(x ** 2) 24 | 25 | # unregularized 26 | x_grad = np.dot(diff * r, theta) 27 | 28 | # unregularized 29 | theta_grad = np.dot((diff * r).T, x) 30 | 31 | # regularized 32 | x_grad += lambda_co_fi * x 33 | 34 | # regularized 35 | theta_grad += lambda_co_fi * theta 36 | 37 | grad = np.hstack((np.ravel(x_grad, order='F'), np.ravel(theta_grad, order='F'))) 38 | 39 | return j, grad 40 | -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/computeNumericalGradient.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | 7 | 8 | def compute_numerical_gradient(j, theta, *args): 9 | numgrad = np.zeros(theta.shape) 10 | perturb = np.zeros(theta.shape) 11 | e = 1e-4 12 | for p in range(theta.size): 13 | # Set perturbation vector 14 | perturb[p] = e 15 | loss1 = j(theta - perturb, args[0], args[1], args[2], args[3], args[4], args[5])[0] 16 | loss2 = j(theta + perturb, args[0], args[1], args[2], args[3], args[4], args[5])[0] 17 | # Compute Numerical Gradient 18 | numgrad[p] = (loss2 - loss1) / (2 * e) 19 | perturb[p] = 0 20 | return numgrad 21 | -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/data/ex8_movieParams.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex8 Anomaly Detection and Recommender/data/ex8_movieParams.mat -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/data/ex8_movies.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex8 Anomaly Detection and Recommender/data/ex8_movies.mat -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/data/ex8data1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex8 Anomaly Detection and Recommender/data/ex8data1.mat -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/data/ex8data2.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex8 Anomaly Detection and Recommender/data/ex8data2.mat -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/data/movie_ids.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/X-21/Coursera-Machine-Learning-Python-Code/c9c1e9d73e2cac5ba4648d5765a5f5e6b69139f2/ex8 Anomaly Detection and Recommender/data/movie_ids.txt -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/estimateGaussian.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def estimate_gaussian(x): 8 | m = x.shape[0] 9 | mu = np.mean(x, axis=0) 10 | err = x - mu 11 | sigma2 = (np.sum(err ** 2, axis=0)) / m 12 | return mu, sigma2 13 | -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/ex8.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | from scipy.io import loadmat 9 | 10 | from estimateGaussian import estimate_gaussian 11 | from visualizeFit import visualize_fit 12 | from multivariateGaussian import multivariate_gaussian 13 | from selectThreshold import select_threshold 14 | 15 | 16 | def pause_func(): 17 | while input() != '': 18 | pass 19 | 20 | 21 | def load_mat_file(filename): 22 | return loadmat(filename) 23 | 24 | 25 | if __name__ == '__main__': 26 | # ================== Part 1: Load Example Dataset =================== 27 | # We start this exercise by using a small dataset that is easy to 28 | # visualize. 29 | # 30 | # Our example case consists of 2 network server statistics across 31 | # several machines: the latency and throughput of each machine. 32 | # This exercise will help us find possibly faulty (or very fast) machines. 33 | 34 | print('Visualizing example dataset for outlier detection.\n\n') 35 | data = load_mat_file('./data/ex8data1.mat') 36 | # Visualize the example dataset 37 | X = data['X'] 38 | Xval = data['Xval'] 39 | yval = data['yval'] 40 | plt.ion() 41 | plt.figure() 42 | plt.scatter(X[:, 0], X[:, 1], marker="x", s=20) 43 | plt.axis([0, 30, 0, 30]) 44 | plt.xlabel('Latency (ms)') 45 | plt.ylabel('Throughput (mb/s)') 46 | plt.pause(0.5) 47 | 48 | print('Program paused. Press enter to continue.\n') 49 | # pause_func() 50 | 51 | # ================== Part 2: Estimate the dataset statistics =================== 52 | # For this exercise, we assume a Gaussian distribution for the dataset. 53 | # 54 | # We first estimate the parameters of our assumed Gaussian distribution, 55 | # then compute the probabilities for each of the points and then visualize 56 | # both the overall distribution and where each of the points falls in 57 | # terms of that distribution. 58 | 59 | print('Visualizing Gaussian fit.\n\n') 60 | # Estimate my and sigma2 61 | mu, sigma2 = estimate_gaussian(X) 62 | 63 | p = multivariate_gaussian(X, mu, sigma2) 64 | # Visualize the fit 65 | visualize_fit(X, mu, sigma2) 66 | plt.pause(0.8) 67 | 68 | print('Program paused. Press enter to continue.\n') 69 | # pause_func() 70 | 71 | # ================== Part 3: Find Outliers =================== 72 | # Now you will find a good epsilon threshold using a cross-validation set 73 | # probabilities given the estimated Gaussian distribution 74 | 75 | pval = multivariate_gaussian(Xval, mu, sigma2) 76 | epsilon, F1 = select_threshold(yval, pval) 77 | print('Best epsilon found using cross-validation: %s\n' % epsilon) 78 | print('Best F1 on Cross Validation Set: %s\n' % F1) 79 | print(' (you should see a value epsilon of about 8.99e-05)\n') 80 | print(' (you should see a Best F1 value of 0.875000)\n\n') 81 | 82 | # Find the outliers in the training set and plot it 83 | outliers = np.where(p < epsilon)[0] 84 | plt.scatter(X[outliers, 0], X[outliers, 1], s=40, marker='o', c='', edgecolors='r') 85 | plt.pause(0.8) 86 | plt.close() 87 | 88 | print('Program paused. Press enter to continue.\n') 89 | # pause_func() 90 | 91 | # ================== Part 4: Multidimensional Outliers =================== 92 | # Loads the second dataset. You should now have the 93 | # variables X, Xval, yval in your environment 94 | data = load_mat_file('./data/ex8data2.mat') 95 | X = data['X'] 96 | Xval = data['Xval'] 97 | yval = data['yval'] 98 | 99 | mu, sigma2 = estimate_gaussian(X) 100 | 101 | p = multivariate_gaussian(X, mu, sigma2) 102 | pval = multivariate_gaussian(Xval, mu, sigma2) 103 | epsilon, F1 = select_threshold(yval, pval) 104 | 105 | print('Best epsilon found using cross-validation: %e\n' % epsilon) 106 | print('Best F1 on Cross Validation Set: %s\n' % F1) 107 | print(' (you should see a value epsilon of about 1.38e-18)\n') 108 | print(' (you should see a Best F1 value of 0.615385)\n') 109 | print('# Outliers found: %s\n\n' % np.sum(p < epsilon)) 110 | -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/ex8_cofi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | from scipy.io import loadmat 9 | 10 | from cofiCostFunc import cofi_cost_func 11 | from checkGradients import check_gradients 12 | from loadMovieList import load_movie_list 13 | from normalizeRatings import normalize_ratings 14 | from fminunc_recommender import my_fminunc_rcmd 15 | 16 | 17 | def pause_func(): 18 | while input() != '': 19 | pass 20 | 21 | 22 | def load_mat_file(filename): 23 | return loadmat(filename) 24 | 25 | 26 | if __name__ == '__main__': 27 | # =============== Part 1: Loading movie ratings dataset ================ 28 | print('Loading movie ratings dataset.\n\n') 29 | data = load_mat_file('./data/ex8_movies.mat') 30 | # Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies on 31 | # 943 users 32 | 33 | # R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a 34 | # rating to movie i 35 | R = data['R'] 36 | Y = data['Y'] 37 | print('Average rating for movie 1 (Toy Story): %s / 5\n\n' % np.mean(Y[0, np.where(R == 1)[1]])) 38 | 39 | # We can "visualize" the ratings matrix by plotting it with imagesc 40 | plt.ion() 41 | # plt.figure() 42 | # plt.imshow(Y, aspect='auto') 43 | # plt.xlabel("Users") 44 | # plt.ylabel("Movies") 45 | # plt.pause(0.8) 46 | 47 | print('Program paused. Press enter to continue.\n') 48 | # pause_func() 49 | 50 | # ============ Part 2: Collaborative Filtering Cost Function =========== 51 | print('Loading movie ratings dataset.\n\n') 52 | data = load_mat_file('./data/ex8_movieParams.mat') 53 | X = data['X'] 54 | Theta = data['Theta'] 55 | num_users = data['num_users'] 56 | num_movies = data['num_movies'] 57 | num_features = data['num_features'] 58 | 59 | # Reduce the data set size so that this runs faster 60 | num_users_reduce = 4 61 | num_movies_reduce = 5 62 | num_features_reduce = 3 63 | 64 | X_reduce = X[np.arange(num_movies_reduce), :] 65 | X_reduce = X_reduce[:, np.arange(num_features_reduce)] 66 | 67 | Theta_reduce = Theta[np.arange(num_users_reduce), :] 68 | Theta_reduce = Theta_reduce[:, np.arange(num_features_reduce)] 69 | 70 | Y_reduce = Y[np.arange(num_movies_reduce), :] 71 | Y_reduce = Y_reduce[:, np.arange(num_users_reduce)] 72 | 73 | R_reduce = R[np.arange(num_movies_reduce), :] 74 | R_reduce = R_reduce[:, np.arange(num_users_reduce)] 75 | 76 | # Evaluate cost function 77 | J, Grad = cofi_cost_func(np.hstack((np.ravel(X_reduce, order='F'), np.ravel(Theta_reduce, order='F'))), Y_reduce, 78 | R_reduce, 79 | num_users_reduce, num_movies_reduce, num_features_reduce, 0) 80 | print('Cost at loaded parameters: %s \n(this value should be about 22.22)\n' % J) 81 | print('Program paused. Press enter to continue.\n') 82 | # pause_func() 83 | 84 | # ============== Part 3: Collaborative Filtering Gradient ============== 85 | print('\nChecking Gradients (without regularization) ... \n') 86 | check_gradients() 87 | print('Program paused. Press enter to continue.\n') 88 | # pause_func() 89 | 90 | # ========= Part 4: Collaborative Filtering Cost Regularization ======== 91 | J_reg, Grad_reg = cofi_cost_func( 92 | np.hstack((np.ravel(X_reduce, order='F'), np.ravel(Theta_reduce, order='F'))), 93 | Y_reduce, R_reduce, 94 | num_users_reduce, num_movies_reduce, num_features_reduce, 1.5 95 | ) 96 | print('Cost at loaded parameters (lambda = 1.5): %s \n(this value should be about 31.34)\n' % J_reg) 97 | print('Program paused. Press enter to continue.\n') 98 | # pause_func() 99 | 100 | # ======= Part 5: Collaborative Filtering Gradient Regularization ====== 101 | print('\nChecking Gradients (with regularization) ... \n') 102 | 103 | # Check gradients by running checkNNGradients 104 | check_gradients(1.5) 105 | print('Program paused. Press enter to continue.\n') 106 | # pause_func() 107 | 108 | # ============== Part 6: Entering ratings for a new user =============== 109 | # Before we will train the collaborative filtering model, we will first 110 | # add ratings that correspond to a new user that we just observed. This 111 | # part of the code will also allow you to put in your own ratings for the 112 | # movies in our dataset! 113 | 114 | movieList = load_movie_list() 115 | # Initialize my ratings 116 | my_ratings = np.zeros((1682, 1)) 117 | # Check the file movie_idx.txt for id of each movie in our dataset 118 | # For example, Toy Story (1995) has ID 1, so to rate it "4", you can set 119 | my_ratings[1 - 1] = 4 120 | # Or suppose did not enjoy Silence of the Lambs (1991), you can set 121 | my_ratings[98 - 1] = 2 122 | 123 | # We have selected a few movies we liked / did not like and the ratings we 124 | # gave are as follows: 125 | my_ratings[7 - 1] = 3 126 | my_ratings[12 - 1] = 5 127 | my_ratings[54 - 1] = 4 128 | my_ratings[64 - 1] = 5 129 | my_ratings[66 - 1] = 3 130 | my_ratings[69 - 1] = 5 131 | my_ratings[183 - 1] = 4 132 | my_ratings[226 - 1] = 5 133 | my_ratings[355 - 1] = 5 134 | 135 | print('\n\nNew user ratings:\n') 136 | for i in range(my_ratings.size): 137 | if my_ratings[i] > 0: 138 | print('Rated %.1f for %s\n' % (my_ratings[i][0], movieList[i])) 139 | 140 | print('Program paused. Press enter to continue.\n') 141 | # pause_func() 142 | 143 | # ================== Part 7: Learning Movie Ratings ==================== 144 | print('\nTraining collaborative filtering...\n') 145 | data = load_mat_file('./data/ex8_movies.mat') 146 | # Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies by 147 | # 943 users 148 | # 149 | # R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a 150 | # rating to movie i 151 | Y = data['Y'] 152 | R = data['R'] 153 | # Add our own ratings to the data matrix 154 | Y = np.hstack((my_ratings, Y)) 155 | R = np.hstack(((my_ratings != 0) + 0, R)) 156 | 157 | # Normalize Ratings 158 | Ynorm, Ymean = normalize_ratings(Y, R) 159 | 160 | # Useful Values 161 | num_users = Y.shape[1] 162 | num_movies = Y.shape[0] 163 | num_features = 10 164 | 165 | # Set Initial Parameters (Theta, X) 166 | X = np.random.randn(num_movies, num_features) 167 | Theta = np.random.randn(num_users, num_features) 168 | 169 | initial_parameters = np.hstack( 170 | (np.ravel(X, order='F'), np.ravel(Theta, order='F')) 171 | ) 172 | 173 | # Set Regularization 174 | movie_lambda = 10 175 | result = my_fminunc_rcmd( 176 | initial_parameters, Ynorm, Ymean, num_users, num_movies, num_features, movie_lambda 177 | ) 178 | result_X = result['x'] 179 | X = np.reshape(result_X[0:num_movies * num_features], (num_movies, num_features), order='F') 180 | Theta = np.reshape(result_X[num_movies * num_features:], (num_users, num_features), order='F') 181 | 182 | print('Recommender system learning completed.\n') 183 | print('Program paused. Press enter to continue.\n') 184 | # pause_func() 185 | 186 | # ================== Part 8: Recommendation for you ==================== 187 | # After training the model, you can now make recommendations by computing 188 | # the predictions matrix. 189 | 190 | p = np.dot(X, Theta.T) 191 | my_predictions = p[:, 0].reshape((p.shape[0], 1)) + Ymean 192 | ix = np.argsort(-my_predictions, axis=0) 193 | print('\nTop recommendations for you:\n') 194 | for i in range(10): 195 | j = ix[i] 196 | print('Predicting rating %.1f for movie %s\n' % 197 | (my_predictions[j][0], movieList[j])) 198 | 199 | print('\n\nOriginal ratings provided:\n') 200 | for i in range(len(my_ratings)): 201 | if my_ratings[i] > 0: 202 | print('Rated %d for %s\n' % (my_ratings[i][0], movieList[i])) 203 | -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/fminunc_recommender.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import scipy.optimize as sciopt 6 | 7 | static_grad = np.arange(0) 8 | 9 | 10 | def my_fminunc_rcmd(params, y, r, num_users, num_movies, num_features, lambda_co_fi): 11 | return sciopt.minimize( 12 | fun=cost_function, x0=params, 13 | args=(y, r, num_users, num_movies, num_features, lambda_co_fi), method="TNC", 14 | jac=gradient 15 | ) 16 | 17 | 18 | def gradient(*args): 19 | global static_grad 20 | return static_grad 21 | 22 | 23 | def cost_function(params, y, r, num_users, num_movies, num_features, lambda_co_fi): 24 | x = np.reshape(params[0:num_movies * num_features], (num_movies, num_features), order='F') 25 | theta = np.reshape(params[num_movies * num_features:], (num_users, num_features), order='F') 26 | 27 | # You need to return the following values correctly 28 | # j = 0 29 | # x_grad = np.zeros(x.shape) 30 | # theta_grad = np.zeros(theta.shape) 31 | 32 | diff = np.dot(x, theta.T) - y 33 | 34 | # unregularized 35 | j = (1 / 2) * np.sum((diff * r) ** 2) 36 | # regularized term for Theta 37 | j += (lambda_co_fi / 2) * np.sum(theta ** 2) 38 | # regularized term for X 39 | j += (lambda_co_fi / 2) * np.sum(x ** 2) 40 | 41 | # unregularized 42 | x_grad = np.dot(diff * r, theta) 43 | 44 | # unregularized 45 | theta_grad = np.dot((diff * r).T, x) 46 | 47 | # regularized 48 | x_grad += lambda_co_fi * x 49 | 50 | # regularized 51 | theta_grad += lambda_co_fi * theta 52 | 53 | global static_grad 54 | static_grad = np.hstack((np.ravel(x_grad, order='F'), np.ravel(theta_grad, order='F'))) 55 | 56 | return j 57 | -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/loadMovieList.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | 5 | import numpy as np 6 | 7 | 8 | def load_movie_list(): 9 | file = open('./data/movie_ids.txt', 'r', encoding='ISO-8859-1') 10 | n = 1682 11 | movie_list = [] 12 | ctx = '' 13 | idx = 0 14 | while 1: 15 | try: 16 | ctx = file.readline() 17 | except UnicodeDecodeError: 18 | pass 19 | if not ctx: 20 | break 21 | idx, ctx = ctx.split(' ', 1) 22 | movie_list.append(ctx) 23 | movie_list = np.array(movie_list) 24 | return movie_list 25 | -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/multivariateGaussian.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def multivariate_gaussian(x, mu, sigma2): 8 | # p = MULTIVARIATEGAUSSIAN(X, mu, Sigma2) Computes the probability 9 | # density function of the examples X under the multivariate gaussian 10 | # distribution with parameters mu and Sigma2. If Sigma2 is a matrix, it is 11 | # treated as the covariance matrix. If Sigma2 is a vector, it is treated 12 | # as the \sigma^2 values of the variances in each dimension (a diagonal 13 | # covariance matrix) 14 | k = mu.size 15 | if len(sigma2.shape) == 1: 16 | sigma2 = np.diag(sigma2) 17 | err = x - mu 18 | p = (1 / (np.power(2 * np.pi, k / 2) * np.power(np.linalg.det(sigma2), 1 / 2))) \ 19 | * np.exp((-1 / 2) * np.sum(np.dot(err, np.linalg.pinv(sigma2)) * err, axis=1)) 20 | return p 21 | -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/normalizeRatings.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def normalize_ratings(y, r): 8 | m, n = y.shape 9 | y_mean = np.zeros((m, 1)) 10 | y_norm = np.zeros((m, n)) 11 | for i in range(m): 12 | idx = np.where(r[i] == 1)[0] 13 | y_mean[i] = np.mean(y[i, idx]) 14 | y_norm[i, idx] = y[i, idx] - y_mean[i] 15 | return y_norm, y_mean 16 | -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/selectThreshold.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | 6 | 7 | def select_threshold(yval, pval): 8 | yval = np.ravel(yval) 9 | pval = np.ravel(pval) 10 | best_epsilon = 0 11 | best_f1 = 0 12 | _f1 = 0 13 | 14 | step_size = (pval.max() - pval.min()) / 1000 15 | for epsilon in np.arange(pval.min(), pval.max(), step_size): 16 | predictions = (pval < epsilon) 17 | 18 | tp = np.sum(np.logical_and(yval == 1, predictions == 1)) 19 | fp = np.sum(np.logical_and(yval == 0, predictions == 1)) 20 | fn = np.sum(np.logical_and(yval == 1, predictions == 0)) 21 | 22 | if tp: 23 | precision = tp / (tp + fp) 24 | recall = tp / (tp + fn) 25 | _f1 = (2 * precision * recall) / (precision + recall) 26 | else: 27 | _f1 = 0 28 | 29 | if _f1 > best_f1: 30 | best_f1 = _f1 31 | best_epsilon = epsilon 32 | return best_epsilon, best_f1 33 | -------------------------------------------------------------------------------- /ex8 Anomaly Detection and Recommender/visualizeFit.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding=utf-8 -*- 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from multivariateGaussian import multivariate_gaussian 7 | 8 | 9 | def visualize_fit(x, mu, sigma2): 10 | x1, x2 = np.meshgrid(np.arange(0, 35, 0.5), np.arange(0, 35, 0.5)) 11 | x_temp = np.vstack((np.ravel(x1), np.ravel(x2))).T 12 | z = multivariate_gaussian(x_temp, mu, sigma2) 13 | z = np.reshape(z, x1.shape) 14 | if not np.sum(np.isinf(z)): 15 | plt.contour(x1, x2, z, np.logspace(-20, -2, 7)) 16 | return x 17 | --------------------------------------------------------------------------------