├── README.md ├── Week1 ├── O2O-1 │ ├── o2o-1.ipynb │ └── submit1.csv ├── README.md ├── hw1 │ ├── PerceptronLinearAlgorithm.py │ ├── data │ │ ├── hw1_15_train.dat │ │ ├── hw1_18_test.dat │ │ └── hw1_18_train.dat │ ├── hw1.ipynb │ └── loadfile.py └── reference_hw1 │ ├── PerceptronLinearAlgorithm.py │ ├── hw1.ipynb │ └── loadfile.py ├── Week10 ├── README.md └── hw7 │ ├── Homework7.png │ ├── hw3_test.dat │ ├── hw3_train.dat │ └── hw7.ipynb ├── Week11 ├── README.md └── hw8 │ ├── Homework8.png │ ├── hw4_kmeans_train.dat │ ├── hw4_knn_test.dat │ ├── hw4_knn_train.dat │ ├── hw4_nnet_test.dat │ ├── hw4_nnet_train.dat │ └── hw8.ipynb ├── Week12 ├── Code │ └── o2o-2.ipynb └── README.md ├── Week2 ├── README.md ├── hw2 │ ├── DecisionStump.py │ ├── data │ │ ├── hw2_test.dat │ │ └── hw2_train.dat │ ├── hw2.ipynb │ └── preprocess.py └── reference_hw2 │ ├── DecisionStump.py │ ├── hw2.ipynb │ └── preprocess.py ├── Week3 ├── README.md ├── hw3 │ ├── LR.py │ ├── data │ │ ├── hw3_test.dat │ │ └── hw3_train.dat │ └── hw3.ipynb └── reference_hw3 │ ├── LR.py │ └── hw3.ipynb ├── Week4 ├── README.md ├── hw4 │ ├── RidgeRegression.py │ ├── data │ │ ├── hw4_test.dat │ │ └── hw4_train.dat │ └── hw4.ipynb └── reference_hw4 │ ├── RidgeRegression.py │ └── hw4.ipynb ├── Week5 ├── Code │ └── o2o-1.ipynb └── README.md ├── Week6 └── README.md ├── Week7 ├── README.md └── hw5 │ ├── data │ ├── features_test.dat │ └── features_train.dat │ └── hw5.ipynb ├── Week8 ├── README.md └── SMO │ ├── SMO.ipynb │ └── digits.zip └── Week9 ├── README.md └── hw6 ├── Homework6.png ├── hw2_adaboost_test.dat ├── hw2_adaboost_train.dat ├── hw2_lssvm_all.dat └── hw6.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # HsuanTienLin-ML-Camp 2 | 3 | ## 课程资料 4 | 5 | - [林轩田资源整理](https://github.com/RedstoneWill/NTU-HsuanTienLin-MachineLearning) 6 | 7 | - [廖雪峰 Python3 教程](https://www.liaoxuefeng.com/wiki/0014316089557264a6b348958f449949df42a6d3a2e542c000) 8 | 9 | - 李航《统计学习方法》(链接:https://pan.baidu.com/s/1MSx407RuPCJt5KSej0Yqlg 密码:h74l) 10 | 11 | - 周志华《机器学习》(链接:https://pan.baidu.com/s/1wyqhvJHkI1zHph8RRsm9iw 密码:1475) 12 | 13 | - [机器学习该怎么入门?](https://www.zhihu.com/question/20691338/answer/446610161) 14 | 15 | ## 教学大纲 16 | 17 | 整体按照林轩田基石和技法的课程顺序来进行,总课时12周。 18 | 19 | ### 第1周 何时机器可以学习? 20 | 21 | - 机器学习问题 22 | 23 | - 学习回答Yes/No 24 | 25 | - 机器学习类型 26 | 27 | - 机器学习的可行性 28 | 29 | - 作业1 30 | 31 | ### 第2周 为什么机器可以学习? 32 | 33 | - 训练vs测试 34 | 35 | - 泛化理论 36 | 37 | - VC维度 38 | 39 | - 噪声与误差 40 | 41 | - 作业2 42 | 43 | ### 第3周 机器如何学习? 44 | 45 | - 线性回归 46 | 47 | - 逻辑回归 48 | 49 | - 线性分类模型 50 | 51 | - 非线性特征转换 52 | 53 | - 作业3 54 | 55 | ### 第4周 机器如何更好地学习? 56 | 57 | - 过拟合的危险性 58 | 59 | - 正则化 60 | 61 | - 模型验证 62 | 63 | - 三大学习原则 64 | 65 | - 作业4 66 | 67 | ### 第5周 带打天池o2o实战赛(初级) 68 | 69 | ### 第6周 支持向量机SVM 70 | 71 | - 线性支持向量机 72 | 73 | - 对偶支持向量机 74 | 75 | - 核支持向量机 76 | 77 | - 软间隔支持向量机 78 | 79 | ### 第7周 SVM 核技巧的推广 80 | 81 | - 核逻辑回归 82 | 83 | - 支持向量回归 84 | 85 | - 作业5 86 | 87 | ### 第8周 SMO基本原理与实现 88 | 89 | - SMO算法的基本原理 90 | 91 | - Python实现SMO算法 92 | 93 | - 数字手写识别 94 | 95 | ### 第9周 集成学习Bagging和AdaBoost 96 | 97 | - Blending和Bagging 98 | 99 | - 提升算法AdaBoost 100 | 101 | - 决策树 102 | 103 | - 作业6 104 | 105 | ### 第10周 随机森林和GBDT 106 | 107 | - 随机森林 108 | 109 | - GBDT算法 110 | 111 | - 作业7 112 | 113 | ### 第11周 神经网络 114 | 115 | - 神经网络 116 | 117 | - 深度学习 118 | 119 | - RBF网络 120 | 121 | - 矩阵提取 122 | 123 | - 作业8 124 | 125 | - 总结 126 | 127 | ### 第12周 带打天池o2o实战赛(进阶) 128 | 129 | 欢迎大家关注我的微信公众号:AI有道(ID: redstonewill)。我会发布更多更好的文章给大家! 130 | 131 | ![这里写图片描述](https://img-blog.csdn.net/20180422232547537?) 132 | 133 | **个人主页:** 134 | 135 | >我的网站:[http://redstonewill.com/](http://redstonewill.com/) 136 | 137 | >我的CSDN:[http://blog.csdn.net/red_stone1](http://blog.csdn.net/red_stone1) 138 | 139 | >我的知乎:[https://www.zhihu.com/people/red_stone_wl](https://www.zhihu.com/people/red_stone_wl) 140 | 141 | >我的微信公众号:AI有道(ID:redstonewill) -------------------------------------------------------------------------------- /Week1/README.md: -------------------------------------------------------------------------------- 1 | ## 包含文件 2 | 3 | - hw1:第一周作业 4 | 5 | - reference_hw1: 第一周作业参考答案 6 | 7 | - O2O-1:天池比赛(初级)源代码 + 比赛提交结果示例文件 8 | 9 | ## 主要内容 10 | 11 | 1.[自测机器学习笔试100题](https://mp.weixin.qq.com/s/zdnQeoG6YzupWA9ZBCG5fA) 12 | 13 | 14 | 15 | 2.[Jupyter Notebook入门教程(上)](https://mp.weixin.qq.com/s/O2nTGOtqGR-V33-YJgPgJQ) 16 | 17 | 3.[Jupyter Notebook入门教程(下)](https://mp.weixin.qq.com/s/AwSzkjlpwvdUzh6CmHq6AQ) 18 | 19 | 4.视频地址: 20 | 21 | https://www.bilibili.com/video/av36731342 22 | 23 | https://www.bilibili.com/video/av36731342/?p=2 24 | 25 | https://www.bilibili.com/video/av36731342/?p=3 26 | 27 | https://www.bilibili.com/video/av36731342/?p=4 28 | 29 | https://www.bilibili.com/video/av36731342/?p=5 30 | 31 | https://www.bilibili.com/video/av36731342/?p=6 32 | 33 | https://www.bilibili.com/video/av36731342/?p=7 34 | 35 | https://www.bilibili.com/video/av36731342/?p= 36 | 37 | https://www.bilibili.com/video/av36731342/?p=9 38 | 39 | https://www.bilibili.com/video/av36731342/?p=10 40 | 41 | https://www.bilibili.com/video/av36731342/?p=11 42 | 43 | https://www.bilibili.com/video/av36731342/?p=12 44 | 45 | https://www.bilibili.com/video/av36731342/?p=13 46 | 47 | https://www.bilibili.com/video/av36731342/?p=14 48 | 49 | https://www.bilibili.com/video/av36731342/?p=15 50 | 51 | https://www.bilibili.com/video/av36731342/?p=16 52 | 53 | https://www.bilibili.com/video/av36731342/?p=17 54 | 55 | 5.参考资料: 56 | 57 | [林轩田机器学习基石课程学习笔记1 — The Learning Problem](https://redstonewill.com/65/) 58 | 59 | [林轩田机器学习基石课程学习笔记2 — Learning to Answer Yes/No](https://redstonewill.com/70/) 60 | 61 | [林轩田机器学习基石课程学习笔记3 — Types of Learning](https://redstonewill.com/73/) 62 | 63 | [林轩田机器学习基石课程学习笔记4 — Feasibility of Learning](https://redstonewill.com/77/) 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /Week1/hw1/PerceptronLinearAlgorithm.py: -------------------------------------------------------------------------------- 1 | # First, we use an artificial data set to study PLA. 2 | # The data set is in https://www.csie.ntu.edu.tw/~htlin/course/ml15fall/hw1/hw1_15_train.dat 3 | # Each line of the data set contains one (xn ,yn ) with xn ∈ R4 . 4 | # The first 4 numbers of the line contains the components of x n orderly, the last number is y n . 5 | # Please initialize your algorithm with w = 0 and take sign(0) as −1. 6 | # As a friendly reminder, remember to add x0 = 1 as always! 7 | 8 | import numpy as np 9 | 10 | class pla(object): 11 | 12 | def __init__(self): 13 | pass 14 | 15 | # Q1. Implement a version of PLA by visiting examples in the naive cycle using the order of examples in the data set. 16 | # Run the algorithm on the data set. 17 | # What is the number of updates before the algorithm halts? 18 | def pla_1(self, X, Y): 19 | 20 | # weights initialization 21 | W = np.zeros(X.shape[1]) 22 | 23 | # PLA iteration 24 | halt = 0 # number of iteration before halt 25 | 26 | ### YOUR CODE HERE 27 | 28 | 29 | 30 | ### END YOUR CODE 31 | 32 | return halt 33 | 34 | # Q2. Implement a version of PLA by visiting examples in fixed, pre-determined random cycles throughout the algorithm. 35 | # Run the algorithm on the data set. Please repeat your experiment for 2000 times, each with a different random seed. 36 | # What is the average number of updates before the algorithm halts? 37 | # Plot a histogram ( https://en.wikipedia.org/wiki/Histogram ) to show the number of updates versus frequency. 38 | def pla_2(self, X, Y): 39 | 40 | Iteration = 2000 # number of iteration 41 | Halts = [] # list store halt every iteration 42 | Accuracys = [] # list store accuracy every iteration 43 | 44 | ### YOUR CODE HERE 45 | 46 | 47 | 48 | ### END YOUR CODE 49 | 50 | # mean 51 | halt_mean = np.mean(Halts) 52 | accuracy_mean = np.mean(Accuracys) 53 | 54 | return halt_mean, accuracy_mean 55 | 56 | # Q3. Implement a version of PLA by visiting examples in fixed, pre-determined random cycles throughout the algorithm, while changing the update rule to be: 57 | # Wt+1→Wt+ηyn(t)xn(t) with η=0.5η=0.5 . Note that your PLA in the previous problem corresponds to η=1η=1 . 58 | # Please repeat your experiment for 2000 times, each with a different random seed. What is the average number of updates before the algorithm halts? 59 | # Plot a histogram to show the number of updates versus frequency. Compare your result to the previous problem and briefly discuss your findings. 60 | def pla_3(self, X, Y): 61 | 62 | Iteration = 2000 # number of iteration 63 | Halts = [] # list store halt every iteration 64 | Accuracys = [] # list store accuracy every iteration 65 | 66 | ### YOUR CODE HERE 67 | 68 | 69 | 70 | ### END YOUR CODE 71 | 72 | # mean 73 | halt_mean = np.mean(Halts) 74 | accuracy_mean = np.mean(Accuracys) 75 | 76 | return halt_mean, accuracy_mean 77 | 78 | 79 | # Next, we play with the pocket algorithm. Modify your PLA in Problem 16 to visit examples purely randomly, 80 | # and then add the ‘pocket’ steps to the algorithm. We will use 81 | # https://www.csie.ntu.edu.tw/~htlin/course/ml15fall/hw1/hw1_18_train.dat as the training data set D, 82 | # and https://www.csie.ntu.edu.tw/~htlin/course/ml15fall/hw1/hw1_18_test.dat 83 | # as the test set for “verifying” the g returned by your algorithm (see lecture 4 about verifying). 84 | # The sets are of the same format as the previous one. 85 | 86 | class pocket_pla(object): 87 | 88 | def __init__(slef): 89 | pass 90 | 91 | 92 | # Q1. Run the pocket algorithm with a total of 50 updates on D, and verify the performance of w pocket using the test set. 93 | # Please repeat your experiment for 2000 times, each with a different random seed. 94 | # What is the average error rate on the test set? Plot a histogram to show error rate versus frequency. 95 | 96 | # calculate error count 97 | def calError(self, X, Y, W): 98 | score = np.dot(X, W) 99 | Y_pred = np.ones_like(Y) 100 | Y_pred[score < 0] = -1 101 | err_cnt = np.sum(Y_pred != Y) 102 | return err_cnt 103 | 104 | def pocket_pla_1(self, X_train, Y_train, X_test, Y_test): 105 | Iteration = 2000 # number of iteration 106 | Update = 50 107 | Errors = [] # list store error rate every iteration 108 | 109 | ### YOUR CODE HERE 110 | 111 | 112 | 113 | ### END YOUR CODE 114 | 115 | # mean of errors 116 | error_mean = np.mean(Errors) 117 | 118 | return error_mean 119 | 120 | # Q2. Modify your algorithm to return w50w50 (the PLA vector after 50 updates) instead of w (the pocket vector) after 50 updates. 121 | # Run the modified algorithm on D, and verify the performance using the test set. 122 | # Please repeat your experiment for 2000 times, each with a different random seed. 123 | # What is the average error rate on the test set? Plot a histogram to show error rate versus frequency. 124 | # Compare your result to the previous problem and briefly discuss your findings. 125 | def pocket_pla_2(self, X_train, Y_train, X_test, Y_test): 126 | Iteration = 2000 # number of iteration 127 | Update = 50 128 | Errors = [] # list store error rate every iteration 129 | 130 | 131 | ### YOUR CODE HERE 132 | 133 | 134 | 135 | ### END YOUR CODE 136 | 137 | # mean of error 138 | error_mean = np.mean(Errors) 139 | 140 | return error_mean 141 | 142 | # Q3. Modify your algorithm in Problem 1 to run for 100 updates instead of 50, and verify the performance of w pocket using the test set. 143 | # Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set? Plot a 144 | # histogram to show error rate versus frequency. Compare your result to Problem 18 and briefly discuss your findings. 145 | def pocket_pla_3(self, X_train, Y_train, X_test, Y_test): 146 | Iteration = 2000 # number of iteration 147 | Update = 100 148 | Errors = [] # list store error rate every iteration 149 | 150 | 151 | ### YOUR CODE HERE 152 | 153 | 154 | 155 | ### END YOUR CODE 156 | 157 | # mean of errors 158 | error_mean = np.mean(Errors) 159 | 160 | return error_mean 161 | 162 | -------------------------------------------------------------------------------- /Week1/hw1/data/hw1_15_train.dat: -------------------------------------------------------------------------------- 1 | 0.97681 0.10723 0.64385 0.29556 1 2 | 0.67194 0.2418 0.83075 0.42741 1 3 | 0.20619 0.23321 0.81004 0.98691 1 4 | 0.51583 0.055814 0.92274 0.75797 1 5 | 0.70893 0.10836 0.33951 0.77058 1 6 | 0.55743 0.67804 0.061044 0.72689 1 7 | 0.15654 0.75584 0.01122 0.42598 -1 8 | 0.50462 0.15137 0.33878 0.41881 1 9 | 0.22657 0.59272 0.24103 0.46221 -1 10 | 0.49174 0.65115 0.24622 0.24796 -1 11 | 0.59512 0.26994 0.74692 0.32784 1 12 | 0.32439 0.37294 0.11623 0.94499 1 13 | 0.4475 0.60183 0.41323 0.58492 1 14 | 0.41171 0.098584 0.4795 0.083842 -1 15 | 0.10059 0.37353 0.0057687 0.14313 -1 16 | 0.8182 0.70052 0.67561 0.22231 1 17 | 0.3221 0.95754 0.99328 0.50757 1 18 | 0.41469 0.48406 0.39832 0.53216 1 19 | 0.48364 0.36163 0.14351 0.3153 -1 20 | 0.5323 0.21312 0.40401 0.98252 1 21 | 0.71073 0.29015 0.15557 0.70588 1 22 | 0.68151 0.23617 0.085193 0.58718 1 23 | 0.069048 0.14742 0.92254 0.93918 1 24 | 0.19337 0.29606 0.72589 0.71993 1 25 | 0.62783 0.80021 0.69486 0.41697 1 26 | 0.94658 0.85253 0.75418 0.3027 1 27 | 0.54402 0.73303 0.29073 0.26307 -1 28 | 0.20166 0.96147 0.83956 0.76917 1 29 | 0.8416 0.22036 0.60311 0.34751 1 30 | 0.659 0.40341 0.16311 0.12612 -1 31 | 0.87845 0.46984 0.32142 0.00042772 -1 32 | 0.95971 0.7334 0.45993 0.76215 1 33 | 0.35449 0.22126 0.57224 0.4336 1 34 | 0.34263 0.81404 0.30048 0.1461 -1 35 | 0.7234 0.45707 0.44129 0.40039 1 36 | 0.39538 0.20276 0.67262 0.67505 1 37 | 0.45179 0.78087 0.4938 0.073425 -1 38 | 0.23881 0.7675 0.40806 0.074954 -1 39 | 0.91059 0.18045 0.089421 0.59719 1 40 | 0.30088 0.3124 0.30033 0.1078 -1 41 | 0.20636 0.25969 0.87208 0.075063 -1 42 | 0.84325 0.20161 0.018555 0.58518 1 43 | 0.33334 0.087671 0.078659 0.15274 -1 44 | 0.18111 0.11502 0.73474 0.65718 1 45 | 0.90105 0.69659 0.44014 0.28963 1 46 | 0.76096 0.17909 0.18557 0.86889 1 47 | 0.20359 0.77736 0.2176 0.071641 -1 48 | 0.42406 0.98081 0.99433 0.071268 -1 49 | 0.61642 0.060815 0.10835 0.85805 1 50 | 0.62755 0.47251 0.63101 0.86293 1 51 | 0.55335 0.10757 0.87192 0.8353 1 52 | 0.72356 0.088313 0.69772 0.091611 1 53 | 0.02084 0.66204 0.26704 0.93343 1 54 | 0.15623 0.39914 0.58355 0.9993 1 55 | 0.90115 0.74857 0.6048 0.54481 1 56 | 0.40522 0.34025 0.84438 0.30728 1 57 | 0.69053 0.70505 0.77211 0.50009 1 58 | 0.32972 0.36727 0.038398 0.24515 -1 59 | 0.087565 0.80164 0.10873 0.72862 -1 60 | 0.26626 0.19317 0.83732 0.96563 1 61 | 0.33161 0.23154 0.12297 0.17358 -1 62 | 0.8338 0.22029 0.62198 0.5915 1 63 | 0.38873 0.57979 0.75488 0.12437 -1 64 | 0.093349 0.084263 0.085754 0.19575 -1 65 | 0.3938 0.21727 0.59706 0.36985 1 66 | 0.14047 0.12652 0.89396 0.0056295 -1 67 | 0.34342 0.76697 0.82696 0.43354 1 68 | 0.2665 0.83265 0.28848 0.2337 -1 69 | 0.36046 0.36809 0.32623 0.25556 -1 70 | 0.99778 0.97657 0.674 0.51915 1 71 | 0.22303 0.028847 0.73739 0.41662 1 72 | 0.30179 0.44626 0.17371 0.73116 1 73 | 0.31285 0.25044 0.46658 0.12074 -1 74 | 0.24446 0.51992 0.80413 0.74044 1 75 | 0.31433 0.80511 0.6496 0.56248 1 76 | 0.10521 0.202 0.87425 0.90105 1 77 | 0.34385 0.5524 0.52835 0.833 1 78 | 0.52791 0.62401 0.56754 0.41641 1 79 | 0.77826 0.57861 0.49655 0.84074 1 80 | 0.26143 0.5512 0.38472 0.18668 -1 81 | 0.87326 0.96009 0.24922 0.65171 1 82 | 0.65069 0.96118 0.36716 0.6302 1 83 | 0.46037 0.98854 0.62971 0.62758 1 84 | 0.11105 0.93171 0.85023 0.022051 -1 85 | 0.32721 0.95939 0.9862 0.92881 1 86 | 0.54203 0.071898 0.79052 0.86281 1 87 | 0.18994 0.76582 0.21911 0.25161 -1 88 | 0.24274 0.9501 0.80862 0.68007 1 89 | 0.36659 0.57376 0.22493 0.94652 1 90 | 0.52105 0.45772 0.7153 0.91306 1 91 | 0.73745 0.045874 0.9518 0.90951 1 92 | 0.0054206 0.5803 0.92465 0.52961 1 93 | 0.61914 0.3734 0.45772 0.56601 1 94 | 0.68483 0.34833 0.6974 0.51117 1 95 | 0.31049 0.58616 0.78657 0.077121 -1 96 | 0.0077248 0.69259 0.98719 0.93702 1 97 | 0.45361 0.47903 0.1331 0.41037 -1 98 | 0.84801 0.7256 0.21409 0.88719 1 99 | 0.29968 0.17497 0.99655 0.15494 1 100 | 0.10789 0.090897 0.013157 0.45712 -1 101 | 0.72711 0.89662 0.048524 0.77902 1 102 | 0.50372 0.14179 0.8632 0.57913 1 103 | 0.22889 0.248 0.5324 0.58705 1 104 | 0.79724 0.4484 0.90201 0.19897 1 105 | 0.10663 0.49593 0.20231 0.05901 -1 106 | 0.15117 0.49039 0.8309 0.91627 1 107 | 0.95409 0.40038 0.82197 0.73251 1 108 | 0.35704 0.014972 0.47835 0.55573 1 109 | 0.4672 0.78532 0.63665 0.80891 1 110 | 0.51268 0.49317 0.37239 0.11229 -1 111 | 0.60983 0.54596 0.30924 0.45368 1 112 | 0.17321 0.67316 0.27675 0.53482 -1 113 | 0.5761 0.36533 0.44297 0.585 1 114 | 0.77885 0.92006 0.51157 0.42738 1 115 | 0.58168 0.7896 0.58292 0.11996 -1 116 | 0.7243 0.19231 0.12572 0.42981 1 117 | 0.27893 0.27538 0.82096 0.92758 1 118 | 0.79986 0.070765 0.099176 0.61674 1 119 | 0.65646 0.042222 0.039717 0.90227 1 120 | 0.2386 0.41482 0.16741 0.26592 -1 121 | 0.84494 0.53851 0.08783 0.74972 1 122 | 0.69721 0.29151 0.14566 0.092551 -1 123 | 0.085241 0.19873 0.11313 0.53704 -1 124 | 0.18871 0.093184 0.55176 0.047211 -1 125 | 0.21583 0.79506 0.30754 0.7987 1 126 | 0.050727 0.19674 0.73473 0.48999 1 127 | 0.077524 0.29589 0.012955 0.93278 1 128 | 0.87063 0.46914 0.22899 0.35294 1 129 | 0.84807 0.60812 0.42088 0.97709 1 130 | 0.045535 0.66219 0.76946 0.71987 1 131 | 0.64344 0.20442 0.20197 0.43431 1 132 | 0.33283 0.78383 0.0097152 0.13798 -1 133 | 0.091392 0.95801 0.30999 0.17345 -1 134 | 0.058002 0.42981 0.92919 0.40967 1 135 | 0.22095 0.66618 0.86801 0.61817 1 136 | 0.018695 0.21615 0.68387 0.069085 -1 137 | 0.79796 0.18841 0.12854 0.50856 1 138 | 0.67478 0.92791 0.025838 0.12608 -1 139 | 0.68964 0.92125 0.65626 0.76319 1 140 | 0.37004 0.0075887 0.99533 0.82581 1 141 | 0.4103 0.22978 0.2938 0.78125 1 142 | 0.46467 0.40583 0.26626 0.17288 -1 143 | 0.27347 0.38493 0.20575 0.80271 1 144 | 0.0037457 0.59585 0.85865 0.037211 -1 145 | 0.45059 0.83556 0.54132 0.21109 -1 146 | 0.055447 0.84199 0.62001 0.80487 1 147 | 0.016285 0.39547 0.12598 0.63249 -1 148 | 0.11982 0.90112 0.55878 0.19737 -1 149 | 0.77264 0.38371 0.61856 0.36306 1 150 | 0.68999 0.42401 0.43875 0.98001 1 151 | 0.057837 0.86126 0.84096 0.6711 1 152 | 0.23792 0.066348 0.44791 0.9972 1 153 | 0.39259 0.89268 0.54155 0.0061404 -1 154 | 0.20604 0.19453 0.31621 0.71208 1 155 | 0.18058 0.37711 0.88283 0.65659 1 156 | 0.80745 0.24562 0.82253 0.98408 1 157 | 0.41828 0.36215 0.8516 0.68281 1 158 | 0.1323 0.39434 0.84215 0.91682 1 159 | 0.61753 0.09773 0.81467 0.40281 1 160 | 0.97318 0.19905 0.26089 0.68696 1 161 | 0.76135 0.65909 0.89342 0.21845 1 162 | 0.58691 0.6069 0.43123 0.042843 -1 163 | 0.34919 0.10586 0.50059 0.082363 -1 164 | 0.37798 0.23626 0.23852 0.14685 -1 165 | 0.9042 0.98451 0.019088 0.76116 1 166 | 0.84556 0.90166 0.072432 0.079249 -1 167 | 0.84747 0.64503 0.011196 0.53983 1 168 | 0.49067 0.78682 0.15697 0.089691 -1 169 | 0.92475 0.60457 0.64656 0.93019 1 170 | 0.63634 0.80437 0.44479 0.18618 -1 171 | 0.19157 0.60461 0.40676 0.95747 1 172 | 0.5551 0.89083 0.2496 0.65735 1 173 | 0.93298 0.76517 0.25749 0.035361 -1 174 | 0.2199 0.21024 0.10609 0.33801 -1 175 | 0.81888 0.42535 0.37241 0.74882 1 176 | 0.32533 0.40846 0.037799 0.004201 -1 177 | 0.4737 0.14999 0.66915 0.8465 1 178 | 0.16804 0.44428 0.51001 0.66228 1 179 | 0.86743 0.8456 0.17056 0.95574 1 180 | 0.28583 0.93363 0.91645 0.95502 1 181 | 0.83711 0.59571 0.3367 0.97731 1 182 | 0.32174 0.85545 0.71378 0.91737 1 183 | 0.52212 0.36278 0.66123 0.75587 1 184 | 0.21409 0.1191 0.11796 0.75938 1 185 | 0.38188 0.29273 0.27347 0.23086 -1 186 | 0.72916 0.73744 0.90535 0.13761 1 187 | 0.059381 0.25354 0.22097 0.83323 1 188 | 0.36486 0.91348 0.14745 0.57585 -1 189 | 0.68553 0.062004 0.70984 0.66362 1 190 | 0.93301 0.86593 0.17125 0.77453 1 191 | 0.61463 0.4409 0.75333 0.89446 1 192 | 0.12285 0.057161 0.58692 0.49092 1 193 | 0.56427 0.42429 0.41168 0.44017 1 194 | 0.29777 0.69766 0.8302 0.061072 -1 195 | 0.53183 0.69574 0.73405 0.90509 1 196 | 0.61368 0.29695 0.35748 0.841 1 197 | 0.85256 0.0045204 0.85749 0.38761 1 198 | 0.46745 0.45305 0.44254 0.72515 1 199 | 0.71941 0.19092 0.24009 0.89824 1 200 | 0.73892 0.44994 0.78128 0.18219 1 201 | 0.31277 0.92634 0.29642 0.46112 -1 202 | 0.11872 0.89219 0.794 0.28731 -1 203 | 0.54582 0.79468 0.18279 0.048142 -1 204 | 0.83241 0.46586 0.10901 0.048364 -1 205 | 0.89567 0.69597 0.89578 0.10248 1 206 | 0.24917 0.76999 0.20536 0.56092 -1 207 | 0.83858 0.81299 0.95404 0.62472 1 208 | 0.21222 0.21892 0.84233 0.83773 1 209 | 0.31804 0.5679 0.55799 0.15455 -1 210 | 0.81836 0.32376 0.50428 0.2733 1 211 | 0.74487 0.78055 0.18939 0.25642 -1 212 | 0.14736 0.74033 0.48418 0.0015921 -1 213 | 0.80975 0.072057 0.71856 0.86265 1 214 | 0.92345 0.37355 0.34499 0.89149 1 215 | 0.38189 0.089103 0.31269 0.72856 1 216 | 0.49649 0.25659 0.65471 0.94681 1 217 | 0.10242 0.27703 0.52294 0.85126 1 218 | 0.35479 0.17024 0.79189 0.86742 1 219 | 0.70429 0.69697 0.062243 0.964 1 220 | 0.29857 0.77505 0.65087 0.28314 -1 221 | 0.68766 0.51467 0.63235 0.44751 1 222 | 0.15416 0.83044 0.69105 0.027009 -1 223 | 0.83522 0.32071 0.52787 0.10613 1 224 | 0.83811 0.3915 0.57094 0.47851 1 225 | 0.57131 0.88752 0.53706 0.55403 1 226 | 0.93257 0.64968 0.24587 0.81109 1 227 | 0.29608 0.083328 0.74109 0.35551 1 228 | 0.46203 0.18142 0.063792 0.92144 1 229 | 0.41203 0.53101 0.77315 0.62032 1 230 | 0.36268 0.29523 0.71811 0.70884 1 231 | 0.39207 0.53465 0.28893 0.93615 1 232 | 0.95333 0.40831 0.29404 0.41991 1 233 | 0.94916 0.34266 0.87255 0.43527 1 234 | 0.19017 0.47568 0.14256 0.44132 -1 235 | 0.85894 0.9006 0.23357 0.80459 1 236 | 0.67525 0.86288 0.013998 0.28517 -1 237 | 0.88734 0.64802 0.36704 0.54815 1 238 | 0.84748 0.20105 0.89731 0.59314 1 239 | 0.53217 0.98951 0.1954 0.27718 -1 240 | 0.47945 0.30232 0.45604 0.89163 1 241 | 0.99187 0.72996 0.77676 0.72478 1 242 | 0.8889 0.36558 0.82728 0.45772 1 243 | 0.27408 0.7204 0.65677 0.70424 1 244 | 0.52243 0.59938 0.6246 0.11785 -1 245 | 0.76399 0.025814 0.33736 0.20739 1 246 | 0.27187 0.74592 0.21669 0.41116 -1 247 | 0.90839 0.050892 0.67696 0.98549 1 248 | 0.60506 0.54448 0.84372 0.30577 1 249 | 0.10422 0.76155 0.83826 0.5412 1 250 | 0.78474 0.0066151 0.22536 0.50022 1 251 | 0.98582 0.68248 0.28302 0.45186 1 252 | 0.41665 0.81217 0.097022 0.32122 -1 253 | 0.90475 0.46776 0.88671 0.68763 1 254 | 0.033977 0.048415 0.60235 0.065179 -1 255 | 0.98983 0.48006 0.33899 0.29487 1 256 | 0.85168 0.59711 0.93749 0.35835 1 257 | 0.84725 0.020964 0.39386 0.88603 1 258 | 0.56072 0.91605 0.019558 0.42813 -1 259 | 0.11745 0.060389 0.021678 0.58085 -1 260 | 0.20919 0.79555 0.69939 0.78054 1 261 | 0.7171 0.28297 0.84921 0.74192 1 262 | 0.21242 0.32839 0.56807 0.53329 1 263 | 0.48941 0.0084562 0.51977 0.72383 1 264 | 0.98037 0.2035 0.32161 0.4112 1 265 | 0.35711 0.67505 0.11554 0.47356 -1 266 | 0.68983 0.09837 0.66985 0.62623 1 267 | 0.43838 0.026309 0.51285 0.86236 1 268 | 0.10529 0.68645 0.99395 0.63142 1 269 | 0.53952 0.99271 0.27649 0.9474 1 270 | 0.018782 0.74473 0.99206 0.87102 1 271 | 0.51718 0.67211 0.70828 0.31218 1 272 | 0.41189 0.56691 0.78364 0.67886 1 273 | 0.44772 0.18827 0.71978 0.36447 1 274 | 0.317 0.47494 0.54949 0.55973 1 275 | 0.21139 0.30158 0.65269 0.051723 -1 276 | 0.13736 0.51767 0.28234 0.79935 1 277 | 0.037048 0.10755 0.63398 0.76885 1 278 | 0.44087 0.89808 0.67844 0.48225 1 279 | 0.75841 0.78382 0.24322 0.72986 1 280 | 0.87597 0.89991 0.037972 0.2432 -1 281 | 0.60687 0.32885 0.54284 0.67944 1 282 | 0.43019 0.869 0.60879 0.90864 1 283 | 0.65513 0.39801 0.91845 0.53552 1 284 | 0.88689 0.65472 0.99466 0.69948 1 285 | 0.77567 0.94883 0.8498 0.18626 1 286 | 0.97233 0.1599 0.9329 0.089635 1 287 | 0.94461 0.72613 0.71317 0.46217 1 288 | 0.4605 0.97047 0.76531 0.3996 1 289 | 0.5502 0.37931 0.76456 0.80705 1 290 | 0.5828 0.16063 0.74013 0.11508 1 291 | 0.58966 0.49064 0.99596 0.25634 1 292 | 0.96575 0.2141 0.15024 0.98043 1 293 | 0.29939 0.2934 0.46088 0.74118 1 294 | 0.042301 0.51492 0.105 0.33518 -1 295 | 0.62395 0.45102 0.92252 0.77543 1 296 | 0.36607 0.35256 0.32267 0.3285 -1 297 | 0.96545 0.25132 0.064417 0.51374 1 298 | 0.63056 0.053806 0.14816 0.40033 1 299 | 0.48831 0.76017 0.61242 0.48176 1 300 | 0.5583 0.59146 0.24049 0.22209 -1 301 | 0.94304 0.96431 0.31249 0.10506 -1 302 | 0.011705 0.93889 0.25839 0.21194 -1 303 | 0.97164 0.22943 0.18083 0.88409 1 304 | 0.87546 0.6744 0.75024 0.25818 1 305 | 0.64631 0.32332 0.86857 0.40117 1 306 | 0.4276 0.81183 0.34678 0.98935 1 307 | 0.28472 0.82959 0.40054 0.87363 1 308 | 0.62037 0.31285 0.27722 0.64167 1 309 | 0.70482 0.629 0.6828 0.51672 1 310 | 0.83688 0.18413 0.37164 0.51392 1 311 | 0.19111 0.26472 0.19798 0.76058 1 312 | 0.24988 0.091229 0.19524 0.012353 -1 313 | 0.62081 0.11765 0.98492 0.019084 1 314 | 0.18157 0.22637 0.68213 0.74354 1 315 | 0.7659 0.28888 0.61728 0.1657 1 316 | 0.26463 0.45099 0.14001 0.47823 -1 317 | 0.90022 0.31697 0.73717 0.84918 1 318 | 0.85095 0.7647 0.26824 0.61702 1 319 | 0.33281 0.83714 0.21334 0.27535 -1 320 | 0.29159 0.13184 0.10133 0.33435 -1 321 | 0.46935 0.26674 0.023366 0.21269 -1 322 | 0.6042 0.23026 0.50198 0.67093 1 323 | 0.50244 0.31349 0.564 0.74072 1 324 | 0.12275 0.53116 0.37771 0.27835 -1 325 | 0.12977 0.61848 0.83557 0.087753 -1 326 | 0.60099 0.74051 0.046187 0.79207 1 327 | 0.96669 0.37691 0.014413 0.026769 -1 328 | 0.24756 0.67287 0.053795 0.053087 -1 329 | 0.31767 0.63018 0.37828 0.27766 -1 330 | 0.60216 0.17537 0.1279 0.61092 1 331 | 0.087833 0.99196 0.77303 0.98091 1 332 | 0.36564 0.23189 0.64808 0.78337 1 333 | 0.21106 0.13959 0.20768 0.72656 1 334 | 0.6089 0.20358 0.9282 0.39475 1 335 | 0.079604 0.58299 0.46986 0.69636 1 336 | 0.25485 0.35519 0.26085 0.69246 1 337 | 0.67904 0.41069 0.49872 0.69857 1 338 | 0.40779 0.8325 0.16625 0.47396 -1 339 | 0.46199 0.50523 0.33119 0.92953 1 340 | 0.89327 0.56518 0.21383 0.61029 1 341 | 0.41033 0.38488 0.12862 0.8564 1 342 | 0.058138 0.62899 0.60946 0.99762 1 343 | 0.0073587 0.54418 0.26272 0.0063957 -1 344 | 0.91431 0.96241 0.89095 0.22206 1 345 | 0.97883 0.69139 0.23555 0.56506 1 346 | 0.79162 0.25942 0.20671 0.081687 -1 347 | 0.1136 0.19133 0.20443 0.44308 -1 348 | 0.5753 0.11082 0.96049 0.44523 1 349 | 0.66688 0.32664 0.058022 0.21483 -1 350 | 0.85187 0.53112 0.29813 0.91085 1 351 | 0.5679 0.7258 0.47001 0.49278 1 352 | 0.35162 0.85285 0.45142 0.22949 -1 353 | 0.2479 0.52952 0.79521 0.44092 1 354 | 0.4693 0.60065 0.90787 0.92907 1 355 | 0.31096 0.052271 0.25236 0.82934 1 356 | 0.55096 0.79786 0.71317 0.8198 1 357 | 0.99279 0.15139 0.27982 0.45122 1 358 | 0.66404 0.096739 0.26582 0.10294 -1 359 | 0.52803 0.1423 0.46639 0.57637 1 360 | 0.99328 0.14342 0.0087678 0.84295 1 361 | 0.5299 0.17308 0.0613 0.99353 1 362 | 0.81762 0.54861 0.87142 0.55873 1 363 | 0.68483 0.65517 0.49261 0.65511 1 364 | 0.24142 0.53478 0.92219 0.53656 1 365 | 0.66164 0.97376 0.61345 0.39626 1 366 | 0.049532 0.54176 0.98792 0.89908 1 367 | 0.038881 0.38398 0.6202 0.25135 -1 368 | 0.61624 0.084068 0.02411 0.65738 1 369 | 0.17096 0.41017 0.78869 0.71301 1 370 | 0.29773 0.63452 0.9311 0.57032 1 371 | 0.041402 0.64972 0.2671 0.15491 -1 372 | 0.28259 0.44665 0.57678 0.98452 1 373 | 0.16068 0.072643 0.31165 0.29832 -1 374 | 0.97714 0.77051 0.54517 0.72295 1 375 | 0.87151 0.86679 0.20841 0.69075 1 376 | 0.34734 0.25215 0.67884 0.69012 1 377 | 0.26408 0.11281 0.021935 0.17689 -1 378 | 0.69426 0.41539 0.27711 0.78669 1 379 | 0.84044 0.29512 0.56474 0.33757 1 380 | 0.39973 0.32958 0.34539 0.66934 1 381 | 0.58272 0.40829 0.30819 0.1299 -1 382 | 0.4527 0.40875 0.045895 0.41199 -1 383 | 0.29341 0.03832 0.7905 0.33916 1 384 | 0.92222 0.51471 0.13331 0.56679 1 385 | 0.18129 0.96248 0.79131 0.58486 1 386 | 0.45696 0.20427 0.69854 0.48235 1 387 | 0.96531 0.27775 0.95255 0.56022 1 388 | 0.50468 0.99699 0.75136 0.51681 1 389 | 0.55852 0.067689 0.666 0.98482 1 390 | 0.83188 0.66817 0.23403 0.72472 1 391 | 0.97959 0.40402 0.96303 0.28133 1 392 | 0.29634 0.4012 0.40266 0.67864 1 393 | 0.34922 0.99751 0.23234 0.52115 -1 394 | 0.65637 0.7181 0.72843 0.93113 1 395 | 0.079695 0.57218 0.70591 0.33812 -1 396 | 0.71206 0.51569 0.18168 0.5557 1 397 | 0.17528 0.2625 0.8306 0.029669 -1 398 | 0.93895 0.93941 0.72496 0.95655 1 399 | 0.046136 0.94413 0.038311 0.26812 -1 400 | 0.072491 0.2242 0.62592 0.67238 1 401 | -------------------------------------------------------------------------------- /Week1/hw1/data/hw1_18_train.dat: -------------------------------------------------------------------------------- 1 | 0.94544 0.42842 0.79833 0.16244 -1 2 | 0.85365 0.084168 0.5682 0.49221 -1 3 | 0.17095 0.82127 0.98444 0.51486 -1 4 | 0.51412 0.92124 0.42323 0.097934 -1 5 | 0.28147 0.71434 0.075309 0.9116 1 6 | 0.46295 0.64512 0.96324 0.31516 -1 7 | 0.97789 0.80155 0.90235 0.74203 -1 8 | 0.41825 0.69419 0.46246 0.31523 -1 9 | 0.75203 0.20264 0.8765 0.47593 -1 10 | 0.31767 0.16814 0.97148 0.75625 1 11 | 0.60639 0.6256 0.69092 0.23644 -1 12 | 0.74736 0.58863 0.85253 0.49688 -1 13 | 0.58935 0.25994 0.4347 0.68396 1 14 | 0.91428 0.64863 0.26845 0.588 -1 15 | 0.82428 0.37408 0.34576 0.36043 -1 16 | 0.27134 0.95245 0.97289 0.32633 1 17 | 0.11337 0.50822 0.38557 0.65094 1 18 | 0.062256 0.87209 0.039721 0.16706 -1 19 | 0.88686 0.95327 0.72667 0.25407 -1 20 | 0.43814 0.0050115 0.71146 0.0035177 1 21 | 0.32039 0.18087 0.36328 0.8082 1 22 | 0.53334 0.4928 0.12447 0.056322 -1 23 | 0.96163 0.44451 0.36669 0.036965 -1 24 | 0.24527 0.98187 0.75223 0.99043 -1 25 | 0.79825 0.61992 0.3143 0.32149 -1 26 | 0.50254 0.71889 0.93769 0.92041 -1 27 | 0.79394 0.37095 0.38948 0.0074354 -1 28 | 0.68834 0.0069991 0.052548 0.58947 1 29 | 0.24581 0.84714 0.87805 0.47974 -1 30 | 0.73303 0.61519 0.45505 0.79567 -1 31 | 0.99991 0.59968 0.9556 0.065947 -1 32 | 0.29596 0.80898 0.21135 0.62465 -1 33 | 0.43281 0.40818 0.72753 0.036651 -1 34 | 0.016077 0.54539 0.86837 0.92598 -1 35 | 0.87715 0.96472 0.11656 0.67721 -1 36 | 0.71588 0.52111 0.24414 0.37834 -1 37 | 0.19095 0.80957 0.32997 0.22846 -1 38 | 0.95192 0.96849 0.98366 0.52196 -1 39 | 0.82839 0.0056522 0.81113 0.64297 -1 40 | 0.54021 0.743 0.13468 0.65629 -1 41 | 0.99376 0.75706 0.15834 0.086766 -1 42 | 0.10648 0.381 0.71405 0.82389 1 43 | 0.084093 0.76674 0.1923 0.75727 1 44 | 0.46726 0.37486 0.37621 0.42141 -1 45 | 0.91136 0.14701 0.64793 0.51055 1 46 | 0.38033 0.21008 0.12881 0.94164 1 47 | 0.91284 0.29903 0.88588 0.17236 1 48 | 0.19765 0.73064 0.02935 0.21955 -1 49 | 0.55713 0.7849 0.86564 0.49825 -1 50 | 0.91464 0.72806 0.81428 0.069319 -1 51 | 0.88463 0.47266 0.11536 0.1131 -1 52 | 0.6461 0.18499 0.81522 0.94009 1 53 | 0.64541 0.7648 0.32368 0.73249 -1 54 | 0.71777 0.97299 0.0068357 0.23462 -1 55 | 0.75935 0.27755 0.34451 0.97279 1 56 | 0.59766 0.82676 0.54538 0.44152 -1 57 | 0.66124 0.60285 0.15767 0.50997 -1 58 | 0.91434 0.80477 0.5302 0.7179 -1 59 | 0.79207 0.37092 0.43425 0.55487 -1 60 | 0.28981 0.3454 0.78777 0.15946 -1 61 | 0.42121 0.94815 0.45906 0.86798 -1 62 | 0.98778 0.81017 0.8999 0.6688 -1 63 | 0.98924 0.37912 0.57184 0.75316 -1 64 | 0.20123 0.67704 0.26961 0.65225 1 65 | 0.099661 0.69236 0.014422 0.64556 1 66 | 0.3407 0.059772 0.96243 0.021753 -1 67 | 0.92051 0.87893 0.95596 0.61146 -1 68 | 0.25358 0.7837 0.10157 0.84909 1 69 | 0.50234 0.10202 0.47909 0.52893 1 70 | 0.21401 0.5297 0.83508 0.46247 -1 71 | 0.22217 0.76309 0.63363 0.19147 1 72 | 0.059843 0.074491 0.62272 0.33171 1 73 | 0.4254 0.32481 0.37 0.72589 1 74 | 0.79855 0.4113 0.23395 0.60854 -1 75 | 0.74527 0.1973 0.025593 0.12136 1 76 | 0.38965 0.042129 0.72629 0.72746 1 77 | 0.044469 0.38991 0.24453 0.23324 1 78 | 0.48217 0.16906 0.87229 0.83814 1 79 | 0.59366 0.61092 0.79603 0.45029 -1 80 | 0.85701 0.2861 0.63004 0.39647 -1 81 | 0.42987 0.97002 0.96852 0.63568 1 82 | 0.33547 0.76827 0.02346 0.76674 1 83 | 0.63515 0.87719 0.68515 0.79026 -1 84 | 0.24163 0.11254 0.0057335 0.78254 1 85 | 0.46859 0.56278 0.58113 0.38057 -1 86 | 0.30662 0.90384 0.82137 0.71406 -1 87 | 0.81151 0.32455 0.8215 0.859 -1 88 | 0.59745 0.45798 0.68015 0.33713 -1 89 | 0.53703 0.1119 0.47181 0.47458 -1 90 | 0.21971 0.69501 0.78568 0.22207 -1 91 | 0.93699 0.19526 0.70893 0.050585 -1 92 | 0.93906 0.29948 0.10738 0.16498 1 93 | 0.94447 0.49082 0.30976 0.18486 -1 94 | 0.11545 0.45901 0.076574 0.54802 1 95 | 0.082947 0.52343 0.35765 0.97716 1 96 | 0.70326 0.88867 0.89782 0.63029 -1 97 | 0.46631 0.88959 0.030474 0.43322 -1 98 | 0.95113 0.8949 0.19866 0.785 -1 99 | 0.28544 0.82337 0.2345 0.39962 -1 100 | 0.027894 0.9766 0.87988 0.53984 -1 101 | 0.07983 0.29738 0.87245 0.99437 1 102 | 0.82998 0.5035 0.80164 0.17057 -1 103 | 0.028217 0.50685 0.72916 0.85567 1 104 | 0.40388 0.92436 0.56477 0.44963 -1 105 | 0.90187 0.32117 0.74473 0.24326 -1 106 | 0.73095 0.2588 0.43453 0.97059 1 107 | 0.41368 0.89723 0.182 0.47806 -1 108 | 0.11226 0.026238 0.29415 0.37557 1 109 | 0.42866 0.2443 0.49539 0.94608 1 110 | 0.33316 0.50113 0.80768 0.043853 -1 111 | 0.65005 0.53913 0.62971 0.27501 -1 112 | 0.19505 0.83866 0.61309 0.59312 -1 113 | 0.18783 0.2416 0.98059 0.1366 -1 114 | 0.17982 0.53769 0.05188 0.1039 -1 115 | 0.59292 0.37792 0.18383 0.086961 -1 116 | 0.12219 0.12664 0.3749 0.42526 -1 117 | 0.077696 0.090644 0.19474 0.44012 1 118 | 0.9119 0.12906 0.56118 0.27136 -1 119 | 0.2809 0.19917 0.2599 0.30015 -1 120 | 0.64989 0.48175 0.61955 0.85827 -1 121 | 0.48254 0.40326 0.22209 0.72561 1 122 | 0.81362 0.06319 0.20517 0.71242 1 123 | 0.6766 0.6566 0.96196 0.34791 -1 124 | 0.090456 0.5697 0.71801 0.80353 1 125 | 0.53199 0.58617 0.26177 0.14681 1 126 | 0.68213 0.14501 0.15362 0.79581 1 127 | 0.10793 0.67764 0.046448 0.42205 -1 128 | 0.9974 0.50109 0.52296 0.67443 -1 129 | 0.01671 0.53982 0.76136 0.40655 -1 130 | 0.26095 0.098336 0.32133 0.57666 1 131 | 0.092384 0.80928 0.20406 0.83956 1 132 | 0.34863 0.09403 0.24638 0.62832 1 133 | 0.4658 0.075587 0.47739 0.41045 -1 134 | 0.35384 0.76788 0.034629 0.40548 -1 135 | 0.45331 0.83543 0.70061 0.63152 -1 136 | 0.81968 0.96028 0.27632 0.50381 -1 137 | 0.64954 0.015774 0.74488 0.72422 1 138 | 0.83247 0.77724 0.029681 0.69362 -1 139 | 0.21638 0.19252 0.089856 0.47597 1 140 | 0.61762 0.55406 0.14456 0.79885 1 141 | 0.49154 0.99616 0.87485 0.89607 1 142 | 0.56813 0.32066 0.93349 0.2846 -1 143 | 0.68642 0.21554 0.96518 0.24528 -1 144 | 0.81495 0.067454 0.48273 0.111 -1 145 | 0.28 0.69775 0.21403 0.46569 -1 146 | 0.70653 0.51615 0.15495 0.95157 1 147 | 0.95602 0.63613 0.93635 0.17966 -1 148 | 0.38754 0.73093 0.88863 0.057157 -1 149 | 0.44664 0.088606 0.57233 0.20928 1 150 | 0.58492 0.085617 0.63382 0.75037 1 151 | 0.027497 0.75783 0.36209 0.82204 1 152 | 0.17424 0.89468 0.981 0.082675 1 153 | 0.46793 0.59123 0.76998 0.66763 -1 154 | 0.33792 0.34933 0.99612 0.70088 -1 155 | 0.10496 0.3467 0.058179 0.29768 1 156 | 0.44057 0.86733 0.59267 0.84175 -1 157 | 0.77315 0.64458 0.3609 0.91872 -1 158 | 0.34028 0.2083 0.42855 0.01098 -1 159 | 0.048626 0.21629 0.44467 0.6715 1 160 | 0.16182 0.18726 0.37422 0.74989 1 161 | 0.46159 0.59223 0.69567 0.28574 -1 162 | 0.78292 0.0032437 0.75811 0.83909 1 163 | 0.10193 0.44132 0.14411 0.23565 1 164 | 0.12898 0.72941 0.94921 0.68217 -1 165 | 0.8237 0.28587 0.027655 0.69607 1 166 | 0.22095 0.42793 0.18922 0.57207 1 167 | 0.16095 0.14612 0.57497 0.67227 1 168 | 0.09885 0.68321 0.99774 0.65724 -1 169 | 0.035431 0.51635 0.15898 0.90432 1 170 | 0.84928 0.07628 0.80011 0.13521 -1 171 | 0.23497 0.54927 0.27934 0.67458 1 172 | 0.89329 0.40425 0.89087 0.9544 -1 173 | 0.25647 0.90448 0.98679 0.40448 -1 174 | 0.9549 0.58648 0.13263 0.35258 -1 175 | 0.56105 0.80475 0.27889 0.85465 -1 176 | 0.11478 0.39264 0.38285 0.67149 1 177 | 0.05977 0.11741 0.76248 0.92593 1 178 | 0.39143 0.28659 0.21722 0.43321 1 179 | 0.91179 0.27647 0.34898 0.65393 -1 180 | 0.09443 0.19051 0.41085 0.45381 1 181 | 0.91804 0.39235 0.43304 0.8628 -1 182 | 0.88089 0.7906 0.79615 0.15766 1 183 | 0.87239 0.07267 0.11488 0.30647 -1 184 | 0.1785 0.0045679 0.28736 0.51988 1 185 | 0.27355 0.16534 0.40671 0.70966 1 186 | 0.31004 0.55867 0.55958 0.83113 1 187 | 0.18929 0.30348 0.39635 0.77803 1 188 | 0.771 0.46698 0.61146 0.049329 -1 189 | 0.68171 0.55808 0.15867 0.40752 1 190 | 0.42068 0.72155 0.88432 0.69495 -1 191 | 0.98412 0.026636 0.79746 0.12529 -1 192 | 0.043827 0.31464 0.86038 0.68158 1 193 | 0.51212 0.64513 0.098453 0.23475 -1 194 | 0.089007 0.3716 0.12259 0.94529 1 195 | 0.50772 0.22065 0.59359 0.97939 1 196 | 0.93465 0.36301 0.22942 0.39776 -1 197 | 0.42056 0.29535 0.43695 0.62165 -1 198 | 0.048604 0.7202 0.54842 0.97717 1 199 | 0.50136 0.1145 0.50765 0.87669 1 200 | 0.36174 0.71377 0.64479 0.056333 -1 201 | 0.2777 0.39013 0.93838 0.84906 1 202 | 0.93749 0.45131 0.32837 0.3695 -1 203 | 0.20373 0.58066 0.071795 0.12352 -1 204 | 0.4725 0.61682 0.48422 0.17611 -1 205 | 0.65798 0.31002 0.60457 0.47883 -1 206 | 0.8993 0.53183 0.67929 0.58757 1 207 | 0.38832 0.25274 0.21567 0.50743 1 208 | 0.51313 0.15745 0.61119 0.67401 1 209 | 0.11358 0.47657 0.24184 0.58755 1 210 | 0.37034 0.36359 0.53112 0.24716 -1 211 | 0.42413 0.057342 0.89212 0.017731 -1 212 | 0.4345 0.56371 0.21414 0.2536 -1 213 | 0.77712 0.75059 0.62399 0.71009 -1 214 | 0.04853 0.48105 0.82719 0.82047 1 215 | 0.19355 0.55776 0.35611 0.85254 1 216 | 0.22617 0.86303 0.74426 0.34111 -1 217 | 0.50795 0.51506 0.18379 0.33261 -1 218 | 0.28944 0.53901 0.83546 0.62305 -1 219 | 0.70436 0.10892 0.17572 0.79067 1 220 | 0.3696 0.09916 0.99783 0.18065 -1 221 | 0.051676 0.52229 0.58675 0.063048 -1 222 | 0.049773 0.38636 0.6454 0.61632 1 223 | 0.43775 0.61545 0.452 0.085732 -1 224 | 0.52391 0.3862 0.36216 0.8966 1 225 | 0.78333 0.7802 0.017248 0.37567 -1 226 | 0.96443 0.46328 0.36682 0.67743 -1 227 | 0.16228 0.84555 0.9954 0.39404 -1 228 | 0.75956 0.56209 0.12159 0.39666 -1 229 | 0.40135 0.26447 0.32038 0.84968 1 230 | 0.60452 0.96625 0.80747 0.95796 -1 231 | 0.24149 0.42608 0.5135 0.79964 1 232 | 0.69923 0.19624 0.052588 0.50421 1 233 | 0.20982 0.56073 0.23733 0.7298 1 234 | 0.38829 0.84218 0.036708 0.27729 -1 235 | 0.89298 0.5224 0.57908 0.32866 -1 236 | 0.04836 0.44043 0.3381 0.40337 1 237 | 0.58797 0.81477 0.89051 0.49763 1 238 | 0.92702 0.16908 0.42873 0.47049 -1 239 | 0.33997 0.056387 0.67732 0.72104 1 240 | 0.29235 0.41816 0.54616 0.37338 -1 241 | 0.82621 0.35915 0.91535 0.52773 -1 242 | 0.93916 0.21437 0.72499 0.09895 1 243 | 0.69561 0.55595 0.54144 0.19081 -1 244 | 0.8787 0.18979 0.52277 0.65395 -1 245 | 0.2716 0.16702 0.66867 0.89177 1 246 | 0.018541 0.2597 0.23497 0.66155 -1 247 | 0.65402 0.44188 0.63458 0.23922 -1 248 | 0.85941 0.95651 0.94928 0.14757 1 249 | 0.53318 0.60877 0.73118 0.82874 1 250 | 0.11015 0.75083 0.66826 0.43597 -1 251 | 0.016778 0.092719 0.18851 0.91858 1 252 | 0.1857 0.61943 0.33343 0.15638 -1 253 | 0.098883 0.51122 0.47688 0.14015 -1 254 | 0.94742 0.46387 0.93815 0.046698 -1 255 | 0.94504 0.72574 0.51539 0.58107 1 256 | 0.55468 0.2202 0.43257 0.7247 1 257 | 0.49326 0.77617 0.51483 0.18184 -1 258 | 0.56779 0.54134 0.30324 0.21922 1 259 | 0.85912 0.031351 0.18672 0.2763 1 260 | 0.33885 0.75368 0.67618 0.12606 -1 261 | 0.13941 0.58348 0.3689 0.96683 1 262 | 0.26842 0.87624 0.41527 0.38335 -1 263 | 0.8252 0.84305 0.014381 0.59557 -1 264 | 0.90833 0.96149 0.76021 0.87398 -1 265 | 0.58498 0.5865 0.061924 0.46478 -1 266 | 0.56257 0.74287 0.58352 0.036336 -1 267 | 0.68061 0.045708 0.28845 0.98 1 268 | 0.23589 0.91737 0.70029 0.94748 -1 269 | 0.053795 0.8503 0.43338 0.1861 -1 270 | 0.83485 0.79077 0.039515 0.29425 -1 271 | 0.025228 0.26938 0.82224 0.051886 -1 272 | 0.56661 0.81062 0.36746 0.65976 -1 273 | 0.34201 0.26074 0.19502 0.51494 -1 274 | 0.78709 0.96186 0.79637 0.87761 -1 275 | 0.93672 0.30279 0.23763 0.1445 -1 276 | 0.70084 0.73074 0.49955 0.60997 -1 277 | 0.095449 0.87436 0.03229 0.32337 -1 278 | 0.032767 0.19899 0.47481 0.61727 1 279 | 0.628 0.46022 0.034758 0.078972 -1 280 | 0.043928 0.47253 0.7023 0.91318 1 281 | 0.39271 0.75308 0.24997 0.37393 -1 282 | 0.14241 0.57104 0.28455 0.84095 1 283 | 0.92955 0.77705 0.92942 0.79626 -1 284 | 0.97141 0.33196 0.68807 0.10602 -1 285 | 0.2255 0.42757 0.23947 0.93753 1 286 | 0.16957 0.57938 0.38032 0.3348 -1 287 | 0.69882 0.65871 0.27148 0.21215 -1 288 | 0.2661 0.199 0.51164 0.55192 1 289 | 0.12523 0.34976 0.35157 0.37661 1 290 | 0.059368 0.83684 0.054409 0.87858 1 291 | 0.25219 0.65417 0.71273 0.24637 -1 292 | 0.068428 0.14777 0.85266 0.15734 1 293 | 0.78826 0.43742 0.32272 0.14754 -1 294 | 0.93125 0.62768 0.14753 0.70593 -1 295 | 0.056151 0.32494 0.6846 0.23316 -1 296 | 0.063142 0.60313 0.32174 0.87777 1 297 | 0.23769 0.15745 0.70689 0.69297 1 298 | 0.83399 0.87303 0.83481 0.32702 1 299 | 0.05876 0.13989 0.88631 0.47709 -1 300 | 0.48562 0.81802 0.596 0.30847 -1 301 | 0.87224 0.38699 0.25926 0.94772 1 302 | 0.93123 0.55403 0.69714 0.40914 -1 303 | 0.07334 0.71654 0.53738 0.90147 1 304 | 0.19967 0.067767 0.44733 0.97446 -1 305 | 0.76995 0.82885 0.10241 0.22155 -1 306 | 0.71601 0.42943 0.67001 0.81219 -1 307 | 0.56513 0.85928 0.4524 0.32379 1 308 | 0.28782 0.95656 0.59435 0.75684 1 309 | 0.94386 0.98528 0.36511 0.0012337 -1 310 | 0.4703 0.50875 0.15726 0.88635 1 311 | 0.26889 0.23061 0.34074 0.53681 1 312 | 0.63058 0.26073 0.33759 0.19645 -1 313 | 0.78077 0.002525 0.037824 0.70881 1 314 | 0.33963 0.051688 0.79924 0.16172 -1 315 | 0.30472 0.30228 0.35446 0.58161 1 316 | 0.24093 0.2747 0.82408 0.19372 -1 317 | 0.8592 0.81663 0.9595 0.1779 -1 318 | 0.85398 0.30959 0.53588 0.54926 -1 319 | 0.4833 0.7314 0.32524 0.71049 -1 320 | 0.614 0.38803 0.67951 0.70156 1 321 | 0.98824 0.94792 0.74455 0.13349 -1 322 | 0.42402 0.16833 0.94189 0.29365 -1 323 | 0.74407 0.54713 0.54192 0.12071 -1 324 | 0.063134 0.18254 0.29053 0.038356 1 325 | 0.65632 0.53947 0.16126 0.32511 -1 326 | 0.9393 0.85986 0.68543 0.52104 1 327 | 0.71479 0.48179 0.60225 0.15613 -1 328 | 0.44205 0.6306 0.90769 0.97106 -1 329 | 0.85049 0.048915 0.2332 0.017383 -1 330 | 0.87906 0.25269 0.46236 0.56512 1 331 | 0.93955 0.96214 0.30794 0.99416 -1 332 | 0.012749 0.68618 0.16968 0.85224 1 333 | 0.045195 0.63691 0.19143 0.62263 1 334 | 0.012138 0.10272 0.55453 0.98919 1 335 | 0.71185 0.61347 0.50344 0.20019 -1 336 | 0.41225 0.76667 0.35031 0.5021 -1 337 | 0.69191 0.83291 0.99819 0.30203 -1 338 | 0.49165 0.76016 0.42874 0.27776 -1 339 | 0.80428 0.31712 0.85327 0.96486 1 340 | 0.5598 0.46229 0.78144 0.71341 -1 341 | 0.27162 0.77556 0.84894 0.64938 -1 342 | 0.82086 0.47713 0.98303 0.43151 -1 343 | 0.3513 0.52658 0.84636 0.0064609 -1 344 | 0.40952 0.64755 0.63748 0.30576 -1 345 | 0.38456 0.36008 0.066319 0.94909 1 346 | 0.86577 0.24435 0.76425 0.16517 -1 347 | 0.35595 0.73159 0.067512 0.717 1 348 | 0.30959 0.13269 0.66714 0.70887 1 349 | 0.60087 0.12507 0.5814 0.79926 1 350 | 0.43133 0.23505 0.0255 0.26809 1 351 | 0.62997 0.12028 0.3059 0.7856 1 352 | 0.47713 0.73178 0.4905 0.2034 -1 353 | 0.24023 0.14217 0.87103 0.95436 1 354 | 0.4416 0.14914 0.90971 0.73073 1 355 | 0.65308 0.52754 0.46611 0.45209 -1 356 | 0.78063 0.6998 0.93154 0.13919 -1 357 | 0.25823 0.89672 0.78748 0.55453 -1 358 | 0.66235 0.89901 0.025559 0.60776 -1 359 | 0.018733 0.17002 0.094589 0.41247 -1 360 | 0.031237 0.10522 0.46865 0.35616 1 361 | 0.91349 0.38781 0.82556 0.86227 -1 362 | 0.094865 0.033995 0.3056 0.8688 1 363 | 0.77287 0.92994 0.37936 0.33073 -1 364 | 0.66504 0.51028 0.52946 0.67426 -1 365 | 0.11013 0.75305 0.71427 0.00056762 -1 366 | 0.51981 0.17903 0.3893 0.67221 1 367 | 0.10114 0.21765 0.46143 0.22902 1 368 | 0.010339 0.15346 0.83746 0.81257 1 369 | 0.8632 0.011808 0.19987 0.6472 1 370 | 0.54008 0.82482 0.84166 0.56496 -1 371 | 0.3482 0.8905 0.51233 0.91889 -1 372 | 0.77469 0.93583 0.4624 0.73994 -1 373 | 0.9745 0.56708 0.060375 0.66342 -1 374 | 0.14142 0.30387 0.21102 0.19947 1 375 | 0.39872 0.55039 0.44443 0.26098 -1 376 | 0.34177 0.60486 0.90462 0.86823 1 377 | 0.83967 0.71719 0.30083 0.39151 -1 378 | 0.81773 0.29067 0.35061 0.55061 -1 379 | 0.66948 0.36054 0.041649 0.64083 1 380 | 0.85696 0.7028 0.092982 0.63155 -1 381 | 0.53649 0.43733 0.35184 0.22402 -1 382 | 0.19458 0.7009 0.082374 0.83509 1 383 | 0.3575 0.28107 0.1875 0.085435 1 384 | 0.48013 0.38729 0.10979 0.26772 -1 385 | 0.85196 0.19375 0.50782 0.36685 -1 386 | 0.234 0.71013 0.098888 0.26843 -1 387 | 0.81637 0.049073 0.36264 0.59693 1 388 | 0.21186 0.38381 0.060465 0.21239 1 389 | 0.71556 0.72849 0.6179 0.18003 -1 390 | 0.26447 0.13603 0.64078 0.52565 1 391 | 0.75369 0.95215 0.15934 0.39296 -1 392 | 0.073749 0.14393 0.18983 0.51306 1 393 | 0.81873 0.24105 0.92729 0.94901 -1 394 | 0.62371 0.68504 0.8073 0.95978 -1 395 | 0.43277 0.28914 0.058256 0.91954 1 396 | 0.58683 0.41957 0.050398 0.85571 1 397 | 0.1523 0.25312 0.44469 0.52488 1 398 | 0.87089 0.15239 0.65042 0.50816 -1 399 | 0.63169 0.79572 0.82213 0.28511 -1 400 | 0.79702 0.8672 0.037235 0.71756 -1 401 | 0.09008 0.48571 0.25841 0.4297 1 402 | 0.12958 0.17459 0.095275 0.6383 1 403 | 0.23153 0.70622 0.89636 0.72335 -1 404 | 0.75807 0.1275 0.93011 0.18538 -1 405 | 0.71421 0.37544 0.79022 0.42783 -1 406 | 0.59602 0.037279 0.70431 0.21939 -1 407 | 0.41592 0.040164 0.74832 0.97646 1 408 | 0.12855 0.23937 0.58989 0.64235 1 409 | 0.65212 0.21828 0.0083334 0.77747 1 410 | 0.28586 0.46605 0.91566 0.33282 -1 411 | 0.78176 0.35581 0.88115 0.091832 -1 412 | 0.10607 0.69497 0.87662 0.1209 -1 413 | 0.3088 1.5614e-05 0.65955 0.4316 -1 414 | 0.87959 0.89579 0.40481 0.75755 -1 415 | 0.17627 0.091288 0.74384 0.86473 1 416 | 0.74549 0.8077 0.18775 0.81805 -1 417 | 0.9317 0.242 0.26646 0.88595 1 418 | 0.88149 0.25451 0.90232 0.69041 -1 419 | 0.62955 0.57953 0.38087 0.44537 -1 420 | 0.10859 0.71253 0.65669 0.092201 -1 421 | 0.12763 0.68387 0.12442 0.86436 1 422 | 0.23332 0.62202 0.51503 0.34624 -1 423 | 0.25611 0.7191 0.01862 0.64031 1 424 | 0.29705 0.34509 0.93597 0.18984 -1 425 | 0.2496 0.73978 0.72371 0.10923 -1 426 | 0.049175 0.11706 0.49745 0.39921 1 427 | 0.48114 0.25889 0.74817 0.23936 -1 428 | 0.4659 0.72012 0.65165 0.48187 1 429 | 0.4343 0.76832 0.71398 0.37773 -1 430 | 0.3524 0.66063 0.87535 0.39566 -1 431 | 0.57184 0.40824 0.098993 0.051089 -1 432 | 0.31597 0.91646 0.30136 0.44274 -1 433 | 0.1384 0.42918 0.012166 0.63011 1 434 | 0.76093 0.68485 0.069427 0.69864 -1 435 | 0.74892 0.11612 0.35644 0.61754 1 436 | 0.13922 0.68474 0.81931 0.54694 -1 437 | 0.94023 0.42769 0.55768 0.32668 -1 438 | 0.68091 0.23228 0.34034 0.72082 1 439 | 0.87453 0.48672 0.97173 0.040737 -1 440 | 0.49237 0.13036 0.61508 0.86846 1 441 | 0.33983 0.87492 0.56542 0.49186 -1 442 | 0.65083 0.78996 0.69556 0.99073 -1 443 | 0.59199 0.19262 0.72098 0.33655 -1 444 | 0.14436 0.34911 0.55038 0.23322 -1 445 | 0.815 0.84924 0.16249 0.17836 -1 446 | 0.66821 0.76756 0.084265 0.49902 -1 447 | 0.080257 0.55313 0.45666 0.78816 1 448 | 0.31555 0.24986 0.93284 0.15411 -1 449 | 0.89131 0.45232 0.29465 0.70323 -1 450 | 0.87804 0.10965 0.82546 0.28059 -1 451 | 0.45884 0.87073 0.68606 0.46454 -1 452 | 0.22361 0.74029 0.92432 0.79606 -1 453 | 0.1483 0.14499 0.96228 0.52377 1 454 | 0.90739 0.54535 0.50465 0.48081 -1 455 | 0.065069 0.24443 0.56713 0.8638 1 456 | 0.023888 0.36313 0.85114 0.81391 1 457 | 0.82311 0.2624 0.73697 0.8167 -1 458 | 0.8728 0.69841 0.42519 0.062268 -1 459 | 0.67062 0.55296 0.062903 0.80634 1 460 | 0.37663 0.22243 0.34317 0.15066 1 461 | 0.071037 0.94222 0.6525 0.090855 1 462 | 0.96671 0.80279 0.89601 0.46875 -1 463 | 0.78693 0.50915 0.87117 0.17317 -1 464 | 0.79814 0.508 0.67991 0.61601 -1 465 | 0.61392 0.73486 0.51633 0.56495 -1 466 | 0.38121 0.36322 0.77206 0.51608 -1 467 | 0.5954 0.91567 0.75785 0.5009 -1 468 | 0.50379 0.37611 0.85467 0.31313 -1 469 | 0.71569 0.23432 0.5401 0.97112 -1 470 | 0.60397 0.66904 0.44689 0.065277 -1 471 | 0.85894 0.73859 0.25248 0.73152 -1 472 | 0.62537 0.70175 0.87938 0.3264 -1 473 | 0.50515 0.45676 0.88166 0.022922 -1 474 | 0.15647 0.38701 0.40518 0.97207 1 475 | 0.13852 0.80626 0.59088 0.36689 -1 476 | 0.80128 0.61836 0.11954 0.76606 -1 477 | 0.98429 0.53256 0.42958 0.85868 -1 478 | 0.075177 0.38532 0.63336 0.48658 1 479 | 0.59246 0.30028 0.059762 0.97234 1 480 | 0.31957 0.79177 0.65774 0.012287 -1 481 | 0.28574 0.52854 0.89194 0.53445 -1 482 | 0.39154 0.35027 0.9611 0.28562 -1 483 | 0.26419 0.82448 0.91928 0.46473 -1 484 | 0.77511 0.22871 0.81792 0.036291 -1 485 | 0.54754 0.41152 0.024828 0.81809 1 486 | 0.22222 0.46347 0.32722 0.64281 1 487 | 0.12237 0.87681 0.016255 0.29233 -1 488 | 0.70996 0.8453 0.96215 0.11639 -1 489 | 0.42764 0.36548 0.2229 0.58804 1 490 | 0.15212 0.7592 0.92622 0.25391 -1 491 | 0.12955 0.5786 0.79714 0.31898 -1 492 | 0.53495 0.072288 0.62715 0.57982 1 493 | 0.83327 0.93204 0.5718 0.035657 -1 494 | 0.32058 0.82525 0.26048 0.21956 1 495 | 0.21605 0.063647 0.37849 0.76708 1 496 | 0.54296 0.95995 0.38446 0.11145 -1 497 | 0.37438 0.12874 0.7528 0.97768 1 498 | 0.078827 0.3823 0.13965 0.82677 1 499 | 0.92288 0.86313 0.22939 0.92214 -1 500 | 0.50315 0.41884 0.026094 0.91623 1 501 | -------------------------------------------------------------------------------- /Week1/hw1/hw1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Homework 1-1: PLA" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "First, we use an artificial data set to study PLA. The data set is in https://www.csie.ntu.edu.tw/~htlin/course/ml15fall/hw1/hw1_15_train.dat\n", 15 | "\n", 16 | "Each line of the data set contains one (x n ,y n ) with x n ∈ R 4 . The first 4 numbers of the line contains\n", 17 | "the components of x n orderly, the last number is y n . Please initialize your algorithm with w = 0 and\n", 18 | "take sign(0) as −1. As a friendly reminder, remember to add x 0 = 1 as always!" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "**1. Implement a version of PLA by visiting examples in the na¨ıve cycle using the order of examples in the data set. Run the algorithm on the data set. What is the number of updates before the algorithm halts? **" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "Load the dataset" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 1, 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "from loadfile import *\n", 44 | "from PerceptronLinearAlgorithm import *" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 2, 50 | "metadata": { 51 | "collapsed": true 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "X_data, Y_data = loadfile('./data/hw1_15_train.dat')" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "array([[ 0.97681 , 0.10723 , 0.64385 , 0.29556 ],\n", 67 | " [ 0.67194 , 0.2418 , 0.83075 , 0.42741 ],\n", 68 | " [ 0.20619 , 0.23321 , 0.81004 , 0.98691 ],\n", 69 | " ..., \n", 70 | " [ 0.93895 , 0.93941 , 0.72496 , 0.95655 ],\n", 71 | " [ 0.046136, 0.94413 , 0.038311, 0.26812 ],\n", 72 | " [ 0.072491, 0.2242 , 0.62592 , 0.67238 ]])" 73 | ] 74 | }, 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "X_data" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 4, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "data": { 91 | "text/plain": [ 92 | "array([ 1., 1., 1., 1., 1., 1., -1., 1., -1., -1., 1., 1., 1.,\n", 93 | " -1., -1., 1., 1., 1., -1., 1., 1., 1., 1., 1., 1., 1.,\n", 94 | " -1., 1., 1., -1., -1., 1., 1., -1., 1., 1., -1., -1., 1.,\n", 95 | " -1., -1., 1., -1., 1., 1., 1., -1., -1., 1., 1., 1., 1.,\n", 96 | " 1., 1., 1., 1., 1., -1., -1., 1., -1., 1., -1., -1., 1.,\n", 97 | " -1., 1., -1., -1., 1., 1., 1., -1., 1., 1., 1., 1., 1.,\n", 98 | " 1., -1., 1., 1., 1., -1., 1., 1., -1., 1., 1., 1., 1.,\n", 99 | " 1., 1., 1., -1., 1., -1., 1., 1., -1., 1., 1., 1., 1.,\n", 100 | " -1., 1., 1., 1., 1., -1., 1., -1., 1., 1., -1., 1., 1.,\n", 101 | " 1., 1., -1., 1., -1., -1., -1., 1., 1., 1., 1., 1., 1.,\n", 102 | " 1., -1., -1., 1., 1., -1., 1., -1., 1., 1., 1., -1., 1.,\n", 103 | " -1., -1., 1., -1., -1., 1., 1., 1., 1., -1., 1., 1., 1.,\n", 104 | " 1., 1., 1., 1., 1., -1., -1., -1., 1., -1., 1., -1., 1.,\n", 105 | " -1., 1., 1., -1., -1., 1., -1., 1., 1., 1., 1., 1., 1.,\n", 106 | " 1., 1., -1., 1., 1., -1., 1., 1., 1., 1., 1., -1., 1.,\n", 107 | " 1., 1., 1., 1., 1., -1., -1., -1., -1., 1., -1., 1., 1.,\n", 108 | " -1., 1., -1., -1., 1., 1., 1., 1., 1., 1., 1., -1., 1.,\n", 109 | " -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -1.,\n", 110 | " 1., -1., 1., 1., -1., 1., 1., 1., 1., -1., 1., -1., 1.,\n", 111 | " 1., 1., 1., 1., -1., 1., -1., 1., 1., 1., -1., -1., 1.,\n", 112 | " 1., 1., 1., 1., -1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", 113 | " 1., -1., 1., 1., 1., 1., -1., 1., 1., 1., 1., 1., 1.,\n", 114 | " 1., 1., 1., 1., 1., 1., 1., -1., 1., -1., 1., 1., 1.,\n", 115 | " -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -1.,\n", 116 | " 1., 1., 1., -1., 1., 1., -1., -1., -1., 1., 1., -1., -1.,\n", 117 | " 1., -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., -1.,\n", 118 | " 1., 1., 1., 1., -1., 1., 1., -1., -1., 1., -1., 1., 1.,\n", 119 | " -1., 1., 1., 1., 1., 1., -1., 1., 1., 1., 1., 1., 1.,\n", 120 | " 1., 1., -1., 1., 1., 1., -1., 1., -1., 1., 1., 1., -1.,\n", 121 | " 1., 1., 1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", 122 | " 1., 1., -1., 1., -1., 1., -1., 1., -1., 1.])" 123 | ] 124 | }, 125 | "execution_count": 4, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "Y_data" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "Data preprocess" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 5, 144 | "metadata": { 145 | "collapsed": true 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "# Add bias\n", 150 | "X_data = np.hstack((np.ones((X_data.shape[0],1)), X_data))\n", 151 | "# weights initialization\n", 152 | "W = np.zeros(X_data.shape[1])" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 6, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "text/plain": [ 163 | "array([[ 1. , 0.97681 , 0.10723 , 0.64385 , 0.29556 ],\n", 164 | " [ 1. , 0.67194 , 0.2418 , 0.83075 , 0.42741 ],\n", 165 | " [ 1. , 0.20619 , 0.23321 , 0.81004 , 0.98691 ],\n", 166 | " ..., \n", 167 | " [ 1. , 0.93895 , 0.93941 , 0.72496 , 0.95655 ],\n", 168 | " [ 1. , 0.046136, 0.94413 , 0.038311, 0.26812 ],\n", 169 | " [ 1. , 0.072491, 0.2242 , 0.62592 , 0.67238 ]])" 170 | ] 171 | }, 172 | "execution_count": 6, 173 | "metadata": {}, 174 | "output_type": "execute_result" 175 | } 176 | ], 177 | "source": [ 178 | "X_data" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 7, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/plain": [ 189 | "array([ 0., 0., 0., 0., 0.])" 190 | ] 191 | }, 192 | "execution_count": 7, 193 | "metadata": {}, 194 | "output_type": "execute_result" 195 | } 196 | ], 197 | "source": [ 198 | "W" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "PLA iteration" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 8, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "name": "stdout", 215 | "output_type": "stream", 216 | "text": [ 217 | "37\n" 218 | ] 219 | } 220 | ], 221 | "source": [ 222 | "PLA = pla()\n", 223 | "halt = PLA.pla_1(X_data, Y_data)\n", 224 | "print(halt)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "**2. Implement a version of PLA by visiting examples in fixed, pre-determined random cycles\n", 232 | "throughout the algorithm. Run the algorithm on the data set. Please repeat your experiment for\n", 233 | "2000 times, each with a different random seed. What is the average number of updates before the\n", 234 | "algorithm halts?**" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 9, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "name": "stdout", 244 | "output_type": "stream", 245 | "text": [ 246 | "34.2895\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "halt, accuracy = PLA.pla_2(X_data, Y_data)\n", 252 | "print(halt)" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "** 3. Implement a version of PLA by visiting examples in fixed, pre-determined random cycles throughout the algorithm, while changing the update rule to be:**\n", 260 | "\n", 261 | "$$W_{t+1}\\rightarrow W_t+\\eta y_{n(t)}x_{n(t)}$$\n", 262 | "\n", 263 | "**with $\\eta=0.5$. Note that your PLA in the previous problem corresponds to $\\eta=1$. Please repeat your experiment for 2000 times, each with a different random seed. What is the average number of updates before the algorithm halts? **" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 10, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "34.2895\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "halt, accuracy = PLA.pla_3(X_data, Y_data)\n", 281 | "print(halt)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "# Homework 1-2: Pocket PLA" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "Next, we play with the pocket algorithm. Modify your PLA in Problem 16 to visit examples purely randomly, and then add the ‘pocket’ steps to the algorithm. We will use https://www.csie.ntu.edu.tw/~htlin/course/ml15fall/hw1/hw1_18_train.dat as the training data set D, and https://www.csie.ntu.edu.tw/~htlin/course/ml15fall/hw1/hw1_18_test.dat as the test set for “verifying” the g returned by your algorithm (see lecture 4 about verifying). The sets are of the same format as the previous one." 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "**1. Run the pocket algorithm with a total of 50 updates on D, and verify the performance of w pocket using the test set. Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set? **" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "Load the dataset" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 11, 315 | "metadata": { 316 | "collapsed": true 317 | }, 318 | "outputs": [], 319 | "source": [ 320 | "# train data\n", 321 | "X_train, Y_train = loadfile('./data/hw1_18_train.dat')\n", 322 | "# test data\n", 323 | "X_test, Y_test = loadfile('./data/hw1_18_test.dat')" 324 | ] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "metadata": {}, 329 | "source": [ 330 | "Data preprocess" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 12, 336 | "metadata": { 337 | "collapsed": true 338 | }, 339 | "outputs": [], 340 | "source": [ 341 | "# Add bias\n", 342 | "X_train = np.hstack((np.ones((X_train.shape[0],1)), X_train))\n", 343 | "X_test = np.hstack((np.ones((X_test.shape[0],1)), X_test))" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 13, 349 | "metadata": {}, 350 | "outputs": [ 351 | { 352 | "data": { 353 | "text/plain": [ 354 | "array([[ 1. , 0.94544 , 0.42842 , 0.79833 , 0.16244 ],\n", 355 | " [ 1. , 0.85365 , 0.084168, 0.5682 , 0.49221 ],\n", 356 | " [ 1. , 0.17095 , 0.82127 , 0.98444 , 0.51486 ],\n", 357 | " ..., \n", 358 | " [ 1. , 0.078827, 0.3823 , 0.13965 , 0.82677 ],\n", 359 | " [ 1. , 0.92288 , 0.86313 , 0.22939 , 0.92214 ],\n", 360 | " [ 1. , 0.50315 , 0.41884 , 0.026094, 0.91623 ]])" 361 | ] 362 | }, 363 | "execution_count": 13, 364 | "metadata": {}, 365 | "output_type": "execute_result" 366 | } 367 | ], 368 | "source": [ 369 | "X_train" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 14, 375 | "metadata": {}, 376 | "outputs": [ 377 | { 378 | "data": { 379 | "text/plain": [ 380 | "array([[ 1. , 0.62926 , 0.32783 , 0.010417, 0.73102 ],\n", 381 | " [ 1. , 0.32368 , 0.61439 , 0.42097 , 0.025626],\n", 382 | " [ 1. , 0.15968 , 0.83346 , 0.97515 , 0.32762 ],\n", 383 | " ..., \n", 384 | " [ 1. , 0.34445 , 0.30929 , 0.049222, 0.49242 ],\n", 385 | " [ 1. , 0.040789, 0.28334 , 0.55817 , 0.60612 ],\n", 386 | " [ 1. , 0.36302 , 0.34961 , 0.12651 , 0.54315 ]])" 387 | ] 388 | }, 389 | "execution_count": 14, 390 | "metadata": {}, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [ 395 | "X_test" 396 | ] 397 | }, 398 | { 399 | "cell_type": "markdown", 400 | "metadata": {}, 401 | "source": [ 402 | "Pocket PLA iteration" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 15, 408 | "metadata": {}, 409 | "outputs": [ 410 | { 411 | "name": "stdout", 412 | "output_type": "stream", 413 | "text": [ 414 | "0.287786\n" 415 | ] 416 | } 417 | ], 418 | "source": [ 419 | "PocketPLA = pocket_pla()\n", 420 | "error_mean = PocketPLA.pocket_pla_1(X_train, Y_train, X_test, Y_test)\n", 421 | "print(error_mean)" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": {}, 427 | "source": [ 428 | "**2. Modify your algorithm to return $w_{50}$(the PLA vector after 50 updates) instead of w (the pocket vector) after 50 updates. Run the modified algorithm on D, and verify the performance using the test set. Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set? Compare your result to the previous problem and briefly discuss your findings.**" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": 16, 434 | "metadata": {}, 435 | "outputs": [ 436 | { 437 | "name": "stdout", 438 | "output_type": "stream", 439 | "text": [ 440 | "0.300748\n" 441 | ] 442 | } 443 | ], 444 | "source": [ 445 | "error_mean = PocketPLA.pocket_pla_2(X_train, Y_train, X_test, Y_test)\n", 446 | "print(error_mean)" 447 | ] 448 | }, 449 | { 450 | "cell_type": "markdown", 451 | "metadata": {}, 452 | "source": [ 453 | "**3. Modify your algorithm in Problem 18 to run for 100 updates instead of 50, and verify the performance of w pocket using the test set. Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set?**" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 17, 459 | "metadata": {}, 460 | "outputs": [ 461 | { 462 | "name": "stdout", 463 | "output_type": "stream", 464 | "text": [ 465 | "0.287875\n" 466 | ] 467 | } 468 | ], 469 | "source": [ 470 | "error_mean = PocketPLA.pocket_pla_3(X_train, Y_train, X_test, Y_test)\n", 471 | "print(error_mean)" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": null, 477 | "metadata": { 478 | "collapsed": true 479 | }, 480 | "outputs": [], 481 | "source": [] 482 | } 483 | ], 484 | "metadata": { 485 | "kernelspec": { 486 | "display_name": "Python 3", 487 | "language": "python", 488 | "name": "python3" 489 | }, 490 | "language_info": { 491 | "codemirror_mode": { 492 | "name": "ipython", 493 | "version": 3 494 | }, 495 | "file_extension": ".py", 496 | "mimetype": "text/x-python", 497 | "name": "python", 498 | "nbconvert_exporter": "python", 499 | "pygments_lexer": "ipython3", 500 | "version": "3.6.3" 501 | } 502 | }, 503 | "nbformat": 4, 504 | "nbformat_minor": 2 505 | } 506 | -------------------------------------------------------------------------------- /Week1/hw1/loadfile.py: -------------------------------------------------------------------------------- 1 | # homework 1 2 | 3 | #Load the dataset 4 | 5 | import numpy as np 6 | 7 | def loadfile(file): 8 | X = [] # features, shape = (samples, features) 9 | Y = [] # labels, shape = (sample,) 10 | for line in open(file).readlines(): 11 | items = line.strip().split('\t') # features and label split by Tab 12 | y = items[1].strip() 13 | y = float(y) # str to float 14 | Y.append(y) 15 | x = items[0].strip().split(' ') 16 | x = list(map(float, x)) # str to float 17 | X.append(x) 18 | X = np.array(X) # list to array 19 | Y = np.array(Y) # list to array 20 | return X, Y -------------------------------------------------------------------------------- /Week1/reference_hw1/PerceptronLinearAlgorithm.py: -------------------------------------------------------------------------------- 1 | # First, we use an artificial data set to study PLA. 2 | # The data set is in https://www.csie.ntu.edu.tw/~htlin/course/ml15fall/hw1/hw1_15_train.dat 3 | # Each line of the data set contains one (xn ,yn ) with xn ∈ R4 . 4 | # The first 4 numbers of the line contains the components of x n orderly, the last number is y n . 5 | # Please initialize your algorithm with w = 0 and take sign(0) as −1. 6 | # As a friendly reminder, remember to add x0 = 1 as always! 7 | 8 | import numpy as np 9 | 10 | class pla(object): 11 | 12 | def __init__(self): 13 | pass 14 | 15 | # Q1. Implement a version of PLA by visiting examples in the naive cycle using the order of examples in the data set. 16 | # Run the algorithm on the data set. 17 | # What is the number of updates before the algorithm halts? 18 | def pla_1(self, X, Y): 19 | 20 | # weights initialization 21 | W = np.zeros(X.shape[1]) 22 | 23 | # PLA iteration 24 | halt = 0 # number of iteration before halt 25 | for i in range(X.shape[0]): 26 | score = np.dot(X[i,:], W) # score 27 | if score*Y[i] <= 0: # classification error 28 | W = W + np.dot(X[i,:].T, Y[i]) 29 | halt = halt + 1 30 | 31 | return halt 32 | 33 | # Q2. Implement a version of PLA by visiting examples in fixed, pre-determined random cycles throughout the algorithm. 34 | # Run the algorithm on the data set. Please repeat your experiment for 2000 times, each with a different random seed. 35 | # What is the average number of updates before the algorithm halts? 36 | # Plot a histogram ( https://en.wikipedia.org/wiki/Histogram ) to show the number of updates versus frequency. 37 | def pla_2(self, X, Y): 38 | 39 | Iteration = 2000 # number of iteration 40 | Halts = [] # list store halt every iteration 41 | Accuracys = [] # list store accuracy every iteration 42 | 43 | for iter in range(Iteration): 44 | np.random.seed(iter) # set random seed, different by iteration 45 | permutation = np.random.permutation(X.shape[0]) # random select index 46 | X = X[permutation] # random order X 47 | Y = Y[permutation] # random order Y, as the same as X 48 | 49 | # look through the entire data set 50 | W = np.zeros(X.shape[1]) # weights initialization 51 | halt = 0 # number of iteration before halt 52 | for i in range(X.shape[0]): 53 | score = np.dot(X[i,:], W) # score 54 | if score*Y[i] <= 0: # classification error 55 | W = W + np.dot(X[i,:].T, Y[i]) 56 | halt = halt + 1 57 | 58 | # accuracy 59 | Y_pred = np.dot(X, W) 60 | Y_pred[Y_pred > 0] = 1 61 | Y_pred[Y_pred < 0] = -1 62 | accuracy = np.mean(Y_pred == Y) 63 | 64 | # store Halts & Accuracys 65 | Halts.append(halt) 66 | Accuracys.append(accuracy) 67 | 68 | # mean 69 | halt_mean = np.mean(Halts) 70 | accuracy_mean = np.mean(Accuracys) 71 | 72 | return halt_mean, accuracy_mean 73 | 74 | # Q3. Implement a version of PLA by visiting examples in fixed, pre-determined random cycles throughout the algorithm, while changing the update rule to be: 75 | # Wt+1→Wt+ηyn(t)xn(t) with η=0.5η=0.5 . Note that your PLA in the previous problem corresponds to η=1η=1 . 76 | # Please repeat your experiment for 2000 times, each with a different random seed. What is the average number of updates before the algorithm halts? 77 | # Plot a histogram to show the number of updates versus frequency. Compare your result to the previous problem and briefly discuss your findings. 78 | def pla_3(self, X, Y): 79 | 80 | Iteration = 2000 # number of iteration 81 | Halts = [] # list store halt every iteration 82 | Accuracys = [] # list store accuracy every iteration 83 | 84 | for iter in range(Iteration): 85 | np.random.seed(iter) # set random seed, different by iteration 86 | permutation = np.random.permutation(X.shape[0]) # random select index 87 | X = X[permutation] # random order X_data 88 | Y = Y[permutation] # random order Y_data, as the same as X_data 89 | 90 | # look through the entire data set 91 | W = np.zeros(X.shape[1]) # weights initialization 92 | halt = 0 # number of iteration before halt 93 | for i in range(X.shape[0]): 94 | score = np.dot(X[i,:], W) # score 95 | if score*Y[i] <= 0: # classification error 96 | W = W + 0.5 * np.dot(X[i,:].T, Y[i]) 97 | halt = halt + 1 98 | 99 | # accuracy 100 | Y_pred = np.dot(X, W) 101 | Y_pred[Y_pred > 0] = 1 102 | Y_pred[Y_pred < 0] = -1 103 | accuracy = np.mean(Y_pred == Y) 104 | 105 | # store Halts & Accuracys 106 | Halts.append(halt) 107 | Accuracys.append(accuracy) 108 | 109 | # mean 110 | halt_mean = np.mean(Halts) 111 | accuracy_mean = np.mean(Accuracys) 112 | return halt_mean, accuracy_mean 113 | 114 | 115 | # Next, we play with the pocket algorithm. Modify your PLA in Problem 16 to visit examples purely randomly, 116 | # and then add the ‘pocket’ steps to the algorithm. We will use 117 | # https://www.csie.ntu.edu.tw/~htlin/course/ml15fall/hw1/hw1_18_train.dat as the training data set D, 118 | # and https://www.csie.ntu.edu.tw/~htlin/course/ml15fall/hw1/hw1_18_test.dat 119 | # as the test set for “verifying” the g returned by your algorithm (see lecture 4 about verifying). 120 | # The sets are of the same format as the previous one. 121 | 122 | class pocket_pla(object): 123 | 124 | def __init__(slef): 125 | pass 126 | 127 | 128 | # Q1. Run the pocket algorithm with a total of 50 updates on D, and verify the performance of w pocket using the test set. 129 | # Please repeat your experiment for 2000 times, each with a different random seed. 130 | # What is the average error rate on the test set? Plot a histogram to show error rate versus frequency. 131 | 132 | # calculate error count 133 | def calError(self, X, Y, W): 134 | score = np.dot(X, W) 135 | Y_pred = np.ones_like(Y) 136 | Y_pred[score < 0] = -1 137 | err_cnt = np.sum(Y_pred != Y) 138 | return err_cnt 139 | 140 | def pocket_pla_1(self, X_train, Y_train, X_test, Y_test): 141 | Iteration = 2000 # number of iteration 142 | Update = 50 143 | Errors = [] # list store error rate every iteration 144 | 145 | 146 | for iter in range(Iteration): 147 | np.random.seed(iter) # set random seed, different by iteration 148 | permutation = np.random.permutation(X_train.shape[0]) # random select index 149 | X_train = X_train[permutation] # random order X_train 150 | Y_train = Y_train[permutation] # random order Y_train, as the same as X_train 151 | 152 | # look through the 50 iterations 153 | W = np.zeros(X_train.shape[1]) # weights initialization 154 | min_err = self.calError(X_train, Y_train, W) # set initial W can make minimal error 155 | for i in range(Update): 156 | score = np.dot(X_train[i,:], W) # score 157 | if score*Y_train[i] <= 0: # classification error 158 | tmp = W + np.dot(X_train[i,:].T, Y_train[i]) # new tmp, wait to decide replace W 159 | tmp_err = self.calError(X_train, Y_train, tmp) # calculate new error 160 | if tmp_err < min_err: 161 | W = tmp # update W 162 | min_err = tmp_err # update min_err 163 | 164 | # get W to test data 165 | Y_pred_test = np.dot(X_test, W) # calculate score 166 | Y_pred_test[Y_pred_test > 0] = 1 # positive 167 | Y_pred_test[Y_pred_test < 0] = -1 # negative 168 | error = np.mean(Y_pred_test != Y_test) 169 | Errors.append(error) # store error to list 170 | 171 | # mean of errors 172 | error_mean = np.mean(Errors) 173 | 174 | return error_mean 175 | 176 | # Q2. Modify your algorithm to return w50w50 (the PLA vector after 50 updates) instead of w (the pocket vector) after 50 updates. 177 | # Run the modified algorithm on D, and verify the performance using the test set. 178 | # Please repeat your experiment for 2000 times, each with a different random seed. 179 | # What is the average error rate on the test set? Plot a histogram to show error rate versus frequency. 180 | # Compare your result to the previous problem and briefly discuss your findings. 181 | def pocket_pla_2(self, X_train, Y_train, X_test, Y_test): 182 | Iteration = 2000 # number of iteration 183 | Update = 50 184 | Errors = [] # list store error rate every iteration 185 | 186 | for iter in range(Iteration): 187 | np.random.seed(iter) # set random seed, different by iteration 188 | permutation = np.random.permutation(X_train.shape[0]) # random select index 189 | X_train = X_train[permutation] # random order X_train 190 | Y_train = Y_train[permutation] # random order Y_train, as the same as X_train 191 | 192 | # look through the 50 iterations 193 | W = np.zeros(X_train.shape[1]) # weights initialization 194 | for i in range(Update): 195 | score = np.dot(X_train[i,:], W) # score 196 | if score*Y_train[i] <= 0: # classification error 197 | W = W + np.dot(X_train[i,:].T, Y_train[i]) 198 | 199 | # get W to test data 200 | Y_pred_test = np.dot(X_test, W) # calculate score 201 | Y_pred_test[Y_pred_test > 0] = 1 # positive 202 | Y_pred_test[Y_pred_test < 0] = -1 # negative 203 | error = np.mean(Y_pred_test != Y_test) 204 | Errors.append(error) # store error to list 205 | 206 | # mean of error 207 | error_mean = np.mean(Errors) 208 | 209 | return error_mean 210 | 211 | # Q3. Modify your algorithm in Problem 1 to run for 100 updates instead of 50, and verify the performance of w pocket using the test set. 212 | # Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set? Plot a 213 | # histogram to show error rate versus frequency. Compare your result to Problem 18 and briefly discuss your findings. 214 | def pocket_pla_3(self, X_train, Y_train, X_test, Y_test): 215 | Iteration = 2000 # number of iteration 216 | Update = 100 217 | Errors = [] # list store error rate every iteration 218 | 219 | 220 | for iter in range(Iteration): 221 | np.random.seed(iter) # set random seed, different by iteration 222 | permutation = np.random.permutation(X_train.shape[0]) # random select index 223 | X_train = X_train[permutation] # random order X_train 224 | Y_train = Y_train[permutation] # random order Y_train, as the same as X_train 225 | 226 | # look through the 50 iterations 227 | W = np.zeros(X_train.shape[1]) # weights initialization 228 | min_err = self.calError(X_train, Y_train, W) # set initial W can make minimal error 229 | for i in range(Update): 230 | score = np.dot(X_train[i,:], W) # score 231 | if score*Y_train[i] <= 0: # classification error 232 | tmp = W + np.dot(X_train[i,:].T, Y_train[i]) # new tmp, wait to decide replace W 233 | tmp_err = self.calError(X_train, Y_train, tmp) # calculate new error 234 | if tmp_err < min_err: 235 | W = tmp # update W 236 | min_err = tmp_err # update min_err 237 | 238 | # get W to test data 239 | Y_pred_test = np.dot(X_test, W) # calculate score 240 | Y_pred_test[Y_pred_test > 0] = 1 # positive 241 | Y_pred_test[Y_pred_test < 0] = -1 # negative 242 | error = np.mean(Y_pred_test != Y_test) 243 | Errors.append(error) # store error to list 244 | 245 | # mean of errors 246 | error_mean = np.mean(Errors) 247 | 248 | return error_mean 249 | 250 | -------------------------------------------------------------------------------- /Week1/reference_hw1/hw1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Homework 1-1: PLA" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "First, we use an artificial data set to study PLA. The data set is in https://www.csie.ntu.edu.tw/~htlin/course/ml15fall/hw1/hw1_15_train.dat\n", 15 | "\n", 16 | "Each line of the data set contains one (x n ,y n ) with x n ∈ R 4 . The first 4 numbers of the line contains\n", 17 | "the components of x n orderly, the last number is y n . Please initialize your algorithm with w = 0 and\n", 18 | "take sign(0) as −1. As a friendly reminder, remember to add x 0 = 1 as always!" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "**1. Implement a version of PLA by visiting examples in the na¨ıve cycle using the order of examples in the data set. Run the algorithm on the data set. What is the number of updates before the algorithm halts? **" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "Load the dataset" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 1, 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "from loadfile import *\n", 44 | "from PerceptronLinearAlgorithm import *" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 2, 50 | "metadata": { 51 | "collapsed": true 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "X_data, Y_data = loadfile('./data/hw1_15_train.dat')" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "array([[ 0.97681 , 0.10723 , 0.64385 , 0.29556 ],\n", 67 | " [ 0.67194 , 0.2418 , 0.83075 , 0.42741 ],\n", 68 | " [ 0.20619 , 0.23321 , 0.81004 , 0.98691 ],\n", 69 | " ..., \n", 70 | " [ 0.93895 , 0.93941 , 0.72496 , 0.95655 ],\n", 71 | " [ 0.046136, 0.94413 , 0.038311, 0.26812 ],\n", 72 | " [ 0.072491, 0.2242 , 0.62592 , 0.67238 ]])" 73 | ] 74 | }, 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "X_data" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 4, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "data": { 91 | "text/plain": [ 92 | "array([ 1., 1., 1., 1., 1., 1., -1., 1., -1., -1., 1., 1., 1.,\n", 93 | " -1., -1., 1., 1., 1., -1., 1., 1., 1., 1., 1., 1., 1.,\n", 94 | " -1., 1., 1., -1., -1., 1., 1., -1., 1., 1., -1., -1., 1.,\n", 95 | " -1., -1., 1., -1., 1., 1., 1., -1., -1., 1., 1., 1., 1.,\n", 96 | " 1., 1., 1., 1., 1., -1., -1., 1., -1., 1., -1., -1., 1.,\n", 97 | " -1., 1., -1., -1., 1., 1., 1., -1., 1., 1., 1., 1., 1.,\n", 98 | " 1., -1., 1., 1., 1., -1., 1., 1., -1., 1., 1., 1., 1.,\n", 99 | " 1., 1., 1., -1., 1., -1., 1., 1., -1., 1., 1., 1., 1.,\n", 100 | " -1., 1., 1., 1., 1., -1., 1., -1., 1., 1., -1., 1., 1.,\n", 101 | " 1., 1., -1., 1., -1., -1., -1., 1., 1., 1., 1., 1., 1.,\n", 102 | " 1., -1., -1., 1., 1., -1., 1., -1., 1., 1., 1., -1., 1.,\n", 103 | " -1., -1., 1., -1., -1., 1., 1., 1., 1., -1., 1., 1., 1.,\n", 104 | " 1., 1., 1., 1., 1., -1., -1., -1., 1., -1., 1., -1., 1.,\n", 105 | " -1., 1., 1., -1., -1., 1., -1., 1., 1., 1., 1., 1., 1.,\n", 106 | " 1., 1., -1., 1., 1., -1., 1., 1., 1., 1., 1., -1., 1.,\n", 107 | " 1., 1., 1., 1., 1., -1., -1., -1., -1., 1., -1., 1., 1.,\n", 108 | " -1., 1., -1., -1., 1., 1., 1., 1., 1., 1., 1., -1., 1.,\n", 109 | " -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -1.,\n", 110 | " 1., -1., 1., 1., -1., 1., 1., 1., 1., -1., 1., -1., 1.,\n", 111 | " 1., 1., 1., 1., -1., 1., -1., 1., 1., 1., -1., -1., 1.,\n", 112 | " 1., 1., 1., 1., -1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", 113 | " 1., -1., 1., 1., 1., 1., -1., 1., 1., 1., 1., 1., 1.,\n", 114 | " 1., 1., 1., 1., 1., 1., 1., -1., 1., -1., 1., 1., 1.,\n", 115 | " -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -1.,\n", 116 | " 1., 1., 1., -1., 1., 1., -1., -1., -1., 1., 1., -1., -1.,\n", 117 | " 1., -1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., -1.,\n", 118 | " 1., 1., 1., 1., -1., 1., 1., -1., -1., 1., -1., 1., 1.,\n", 119 | " -1., 1., 1., 1., 1., 1., -1., 1., 1., 1., 1., 1., 1.,\n", 120 | " 1., 1., -1., 1., 1., 1., -1., 1., -1., 1., 1., 1., -1.,\n", 121 | " 1., 1., 1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", 122 | " 1., 1., -1., 1., -1., 1., -1., 1., -1., 1.])" 123 | ] 124 | }, 125 | "execution_count": 4, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "Y_data" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "Data preprocess" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 5, 144 | "metadata": { 145 | "collapsed": true 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "# Add bias\n", 150 | "X_data = np.hstack((np.ones((X_data.shape[0],1)), X_data))\n", 151 | "# weights initialization\n", 152 | "W = np.zeros(X_data.shape[1])" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 6, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "text/plain": [ 163 | "array([[ 1. , 0.97681 , 0.10723 , 0.64385 , 0.29556 ],\n", 164 | " [ 1. , 0.67194 , 0.2418 , 0.83075 , 0.42741 ],\n", 165 | " [ 1. , 0.20619 , 0.23321 , 0.81004 , 0.98691 ],\n", 166 | " ..., \n", 167 | " [ 1. , 0.93895 , 0.93941 , 0.72496 , 0.95655 ],\n", 168 | " [ 1. , 0.046136, 0.94413 , 0.038311, 0.26812 ],\n", 169 | " [ 1. , 0.072491, 0.2242 , 0.62592 , 0.67238 ]])" 170 | ] 171 | }, 172 | "execution_count": 6, 173 | "metadata": {}, 174 | "output_type": "execute_result" 175 | } 176 | ], 177 | "source": [ 178 | "X_data" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 7, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/plain": [ 189 | "array([ 0., 0., 0., 0., 0.])" 190 | ] 191 | }, 192 | "execution_count": 7, 193 | "metadata": {}, 194 | "output_type": "execute_result" 195 | } 196 | ], 197 | "source": [ 198 | "W" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "PLA iteration" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 8, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "name": "stdout", 215 | "output_type": "stream", 216 | "text": [ 217 | "37\n" 218 | ] 219 | } 220 | ], 221 | "source": [ 222 | "PLA = pla()\n", 223 | "halt = PLA.pla_1(X_data, Y_data)\n", 224 | "print(halt)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "**2. Implement a version of PLA by visiting examples in fixed, pre-determined random cycles\n", 232 | "throughout the algorithm. Run the algorithm on the data set. Please repeat your experiment for\n", 233 | "2000 times, each with a different random seed. What is the average number of updates before the\n", 234 | "algorithm halts?**" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 9, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "name": "stdout", 244 | "output_type": "stream", 245 | "text": [ 246 | "34.2895\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "halt, accuracy = PLA.pla_2(X_data, Y_data)\n", 252 | "print(halt)" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "** 3. Implement a version of PLA by visiting examples in fixed, pre-determined random cycles throughout the algorithm, while changing the update rule to be:**\n", 260 | "\n", 261 | "$$W_{t+1}\\rightarrow W_t+\\eta y_{n(t)}x_{n(t)}$$\n", 262 | "\n", 263 | "**with $\\eta=0.5$. Note that your PLA in the previous problem corresponds to $\\eta=1$. Please repeat your experiment for 2000 times, each with a different random seed. What is the average number of updates before the algorithm halts? **" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 10, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "34.2895\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "halt, accuracy = PLA.pla_3(X_data, Y_data)\n", 281 | "print(halt)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "# Homework 1-2: Pocket PLA" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "Next, we play with the pocket algorithm. Modify your PLA in Problem 16 to visit examples purely randomly, and then add the ‘pocket’ steps to the algorithm. We will use https://www.csie.ntu.edu.tw/~htlin/course/ml15fall/hw1/hw1_18_train.dat as the training data set D, and https://www.csie.ntu.edu.tw/~htlin/course/ml15fall/hw1/hw1_18_test.dat as the test set for “verifying” the g returned by your algorithm (see lecture 4 about verifying). The sets are of the same format as the previous one." 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "**1. Run the pocket algorithm with a total of 50 updates on D, and verify the performance of w pocket using the test set. Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set? **" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "Load the dataset" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 11, 315 | "metadata": { 316 | "collapsed": true 317 | }, 318 | "outputs": [], 319 | "source": [ 320 | "# train data\n", 321 | "X_train, Y_train = loadfile('./data/hw1_18_train.dat')\n", 322 | "# test data\n", 323 | "X_test, Y_test = loadfile('./data/hw1_18_test.dat')" 324 | ] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "metadata": {}, 329 | "source": [ 330 | "Data preprocess" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 12, 336 | "metadata": { 337 | "collapsed": true 338 | }, 339 | "outputs": [], 340 | "source": [ 341 | "# Add bias\n", 342 | "X_train = np.hstack((np.ones((X_train.shape[0],1)), X_train))\n", 343 | "X_test = np.hstack((np.ones((X_test.shape[0],1)), X_test))" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 13, 349 | "metadata": {}, 350 | "outputs": [ 351 | { 352 | "data": { 353 | "text/plain": [ 354 | "array([[ 1. , 0.94544 , 0.42842 , 0.79833 , 0.16244 ],\n", 355 | " [ 1. , 0.85365 , 0.084168, 0.5682 , 0.49221 ],\n", 356 | " [ 1. , 0.17095 , 0.82127 , 0.98444 , 0.51486 ],\n", 357 | " ..., \n", 358 | " [ 1. , 0.078827, 0.3823 , 0.13965 , 0.82677 ],\n", 359 | " [ 1. , 0.92288 , 0.86313 , 0.22939 , 0.92214 ],\n", 360 | " [ 1. , 0.50315 , 0.41884 , 0.026094, 0.91623 ]])" 361 | ] 362 | }, 363 | "execution_count": 13, 364 | "metadata": {}, 365 | "output_type": "execute_result" 366 | } 367 | ], 368 | "source": [ 369 | "X_train" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 14, 375 | "metadata": {}, 376 | "outputs": [ 377 | { 378 | "data": { 379 | "text/plain": [ 380 | "array([[ 1. , 0.62926 , 0.32783 , 0.010417, 0.73102 ],\n", 381 | " [ 1. , 0.32368 , 0.61439 , 0.42097 , 0.025626],\n", 382 | " [ 1. , 0.15968 , 0.83346 , 0.97515 , 0.32762 ],\n", 383 | " ..., \n", 384 | " [ 1. , 0.34445 , 0.30929 , 0.049222, 0.49242 ],\n", 385 | " [ 1. , 0.040789, 0.28334 , 0.55817 , 0.60612 ],\n", 386 | " [ 1. , 0.36302 , 0.34961 , 0.12651 , 0.54315 ]])" 387 | ] 388 | }, 389 | "execution_count": 14, 390 | "metadata": {}, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [ 395 | "X_test" 396 | ] 397 | }, 398 | { 399 | "cell_type": "markdown", 400 | "metadata": {}, 401 | "source": [ 402 | "Pocket PLA iteration" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 15, 408 | "metadata": {}, 409 | "outputs": [ 410 | { 411 | "name": "stdout", 412 | "output_type": "stream", 413 | "text": [ 414 | "0.287786\n" 415 | ] 416 | } 417 | ], 418 | "source": [ 419 | "PocketPLA = pocket_pla()\n", 420 | "error_mean = PocketPLA.pocket_pla_1(X_train, Y_train, X_test, Y_test)\n", 421 | "print(error_mean)" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": {}, 427 | "source": [ 428 | "**2. Modify your algorithm to return $w_{50}$(the PLA vector after 50 updates) instead of w (the pocket vector) after 50 updates. Run the modified algorithm on D, and verify the performance using the test set. Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set? Compare your result to the previous problem and briefly discuss your findings.**" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": 16, 434 | "metadata": {}, 435 | "outputs": [ 436 | { 437 | "name": "stdout", 438 | "output_type": "stream", 439 | "text": [ 440 | "0.300748\n" 441 | ] 442 | } 443 | ], 444 | "source": [ 445 | "error_mean = PocketPLA.pocket_pla_2(X_train, Y_train, X_test, Y_test)\n", 446 | "print(error_mean)" 447 | ] 448 | }, 449 | { 450 | "cell_type": "markdown", 451 | "metadata": {}, 452 | "source": [ 453 | "**3. Modify your algorithm in Problem 18 to run for 100 updates instead of 50, and verify the performance of w pocket using the test set. Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set?**" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 17, 459 | "metadata": {}, 460 | "outputs": [ 461 | { 462 | "name": "stdout", 463 | "output_type": "stream", 464 | "text": [ 465 | "0.287875\n" 466 | ] 467 | } 468 | ], 469 | "source": [ 470 | "error_mean = PocketPLA.pocket_pla_3(X_train, Y_train, X_test, Y_test)\n", 471 | "print(error_mean)" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": null, 477 | "metadata": { 478 | "collapsed": true 479 | }, 480 | "outputs": [], 481 | "source": [] 482 | } 483 | ], 484 | "metadata": { 485 | "kernelspec": { 486 | "display_name": "Python 3", 487 | "language": "python", 488 | "name": "python3" 489 | }, 490 | "language_info": { 491 | "codemirror_mode": { 492 | "name": "ipython", 493 | "version": 3 494 | }, 495 | "file_extension": ".py", 496 | "mimetype": "text/x-python", 497 | "name": "python", 498 | "nbconvert_exporter": "python", 499 | "pygments_lexer": "ipython3", 500 | "version": "3.6.3" 501 | } 502 | }, 503 | "nbformat": 4, 504 | "nbformat_minor": 2 505 | } 506 | -------------------------------------------------------------------------------- /Week1/reference_hw1/loadfile.py: -------------------------------------------------------------------------------- 1 | # homework 1 2 | 3 | #Load the dataset 4 | 5 | import numpy as np 6 | 7 | def loadfile(file): 8 | X = [] # features, shape = (samples, features) 9 | Y = [] # labels, shape = (sample,) 10 | for line in open(file).readlines(): 11 | items = line.strip().split('\t') # features and label split by Tab 12 | y = items[1].strip() 13 | y = float(y) # str to float 14 | Y.append(y) 15 | x = items[0].strip().split(' ') 16 | x = list(map(float, x)) # str to float 17 | X.append(x) 18 | X = np.array(X) # list to array 19 | Y = np.array(Y) # list to array 20 | return X, Y -------------------------------------------------------------------------------- /Week10/README.md: -------------------------------------------------------------------------------- 1 | ## 包含文件 2 | 3 | - hw7:第十周作业 4 | 5 | ## 主要内容 6 | 7 | 视频地址: 8 | 9 | https://www.bilibili.com/video/av36760800/?p=38 10 | 11 | https://www.bilibili.com/video/av36760800/?p=39 12 | 13 | https://www.bilibili.com/video/av36760800/?p=40 14 | 15 | https://www.bilibili.com/video/av36760800/?p=41 16 | 17 | https://www.bilibili.com/video/av36760800/?p=42 18 | 19 | https://www.bilibili.com/video/av36760800/?p=43 20 | 21 | https://www.bilibili.com/video/av36760800/?p=44 22 | 23 | https://www.bilibili.com/video/av36760800/?p=45 24 | 25 | 参考资料: 26 | 27 | https://redstonewill.com/601/ 28 | 29 | https://redstonewill.com/644/ 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /Week10/hw7/Homework7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RedstoneWill/HsuanTienLin-ML-Camp/5d24fe8410d0383db64bfe2663627ab34ad15fb8/Week10/hw7/Homework7.png -------------------------------------------------------------------------------- /Week10/hw7/hw3_train.dat: -------------------------------------------------------------------------------- 1 | 0.757222 0.633831 -1 2 | 0.847382 0.281581 -1 3 | 0.24931 0.618635 +1 4 | 0.538526 0.144259 -1 5 | 0.474435 0.414558 -1 6 | 0.374151 0.0120482 1 7 | 0.847185 0.217572 1 8 | 0.983368 0.250496 1 9 | 0.645141 0.485816 1 10 | 0.172211 0.254331 -1 11 | 0.116866 0.378804 -1 12 | 0.55097 0.760426 -1 13 | 0.312109 0.442938 -1 14 | 0.304777 0.0529649 1 15 | 0.572727 0.370527 1 16 | 0.171491 0.50076 -1 17 | 0.644567 0.834055 -1 18 | 0.0529041 0.338461 -1 19 | 0.0323543 0.830701 -1 20 | 0.272193 0.587396 -1 21 | 0.123521 0.0516625 1 22 | 0.905544 0.247013 1 23 | 0.854276 0.559648 1 24 | 0.375914 0.505747 -1 25 | 0.160755 0.238718 -1 26 | 0.45893 0.227062 1 27 | 0.395407 0.791184 -1 28 | 0.742325 0.586444 1 29 | 0.43615 0.136922 1 30 | 0.954217 0.680325 1 31 | 0.916386 0.381431 1 32 | 0.953844 0.439266 1 33 | 0.328701 0.721918 -1 34 | 0.275732 0.43115 -1 35 | 0.892366 0.0136661 1 36 | 0.249529 0.0709084 1 37 | 0.124333 0.611515 -1 38 | 0.54449 0.423701 1 39 | 0.86019 0.93029 -1 40 | 0.432404 0.0901487 1 41 | 0.204973 0.406648 -1 42 | 0.0748025 0.568699 -1 43 | 0.936407 0.106094 1 44 | 0.572728 0.90924 -1 45 | 0.358618 0.651613 -1 46 | 0.631685 0.910141 -1 47 | 0.802581 0.599025 1 48 | 0.366818 0.0135169 1 49 | 0.708026 0.300654 1 50 | 0.243625 0.106277 1 51 | 0.960778 0.59799 1 52 | 0.726241 0.057674 1 53 | 0.158561 0.690295 -1 54 | 0.420638 0.503567 -1 55 | 0.651344 0.290269 1 56 | 0.933469 0.490516 1 57 | 0.502864 0.721677 -1 58 | 0.595151 0.82293 -1 59 | 0.696778 0.300018 1 60 | 0.927038 0.295737 1 61 | 0.145192 0.377728 -1 62 | 0.385435 0.68299 -1 63 | 0.296852 0.868018 -1 64 | 0.659204 0.77369 -1 65 | 0.896153 0.832046 1 66 | 0.466137 0.877674 -1 67 | 0.815532 0.164151 1 68 | 0.310117 0.857713 -1 69 | 0.522385 0.961609 -1 70 | 0.369345 0.781697 -1 71 | 0.901988 0.831265 1 72 | 0.692314 0.0640428 1 73 | 0.836977 0.614453 1 74 | 0.104584 0.357892 -1 75 | 0.265266 0.65833 -1 76 | 0.729254 0.885763 -1 77 | 0.205254 0.404956 -1 78 | 0.032359 0.778401 -1 79 | 0.464724 0.159682 1 80 | 0.940021 0.493738 1 81 | 0.248985 0.646083 -1 82 | 0.541258 0.728218 -1 83 | 0.391575 0.291076 1 84 | 0.0254967 0.300503 -1 85 | 0.475398 0.920203 -1 86 | 0.835664 0.584283 1 87 | 0.296033 0.0885163 1 88 | 0.0435908 0.646312 -1 89 | 0.284148 0.182427 1 90 | 0.627696 0.788116 -1 91 | 0.312939 0.871275 -1 92 | 0.676521 0.316903 1 93 | 0.0123539 0.178643 -1 94 | 0.682164 0.777194 -1 95 | 0.421563 0.302683 1 96 | 0.03183 0.289761 -1 97 | 0.435715 0.190071 1 98 | 0.730492 0.0655594 1 99 | 0.92527 0.524315 1 100 | 0.984815 0.383621 1 101 | -------------------------------------------------------------------------------- /Week10/hw7/hw7.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "# 为了实现决策树,需建立树结点的类\n", 24 | "# 定义树结点\n", 25 | "class Node:\n", 26 | " def __init__(self, theta, index, value=None):\n", 27 | " self.theta = theta # 划分的阈值\n", 28 | " self.index = index # 选用的维度\n", 29 | " self.value = value # 根节点的值\n", 30 | " self.leftNode = None\n", 31 | " self.rightNode = None" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 6, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "# 定义Gini系数---作为每个子集“好坏”的衡量标准\n", 43 | "def gini(Y):\n", 44 | " l = Y.shape[0]\n", 45 | " if l == 0:\n", 46 | " return 1\n", 47 | " return 1-(np.sum(Y==1)/l)**2-(np.sum(Y==-1)/l)**2" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 7, 53 | "metadata": { 54 | "collapsed": true 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "# 为了便于实现,找出每一维度下的最佳划分阈值和对应的branch值 --- 但这样实现代价是运行速度\n", 59 | "# 单维情况下的最佳树桩---大于等于为1类\n", 60 | "def one_stump(X, Y, thres):\n", 61 | " l = thres.shape[0]\n", 62 | " mini = Y.shape[0]\n", 63 | " for i in range(l):\n", 64 | " Y1 = Y[X=thres[i]]\n", 66 | " judge = Y1.shape[0]*gini(Y1)+Y2.shape[0]*gini(Y2)\n", 67 | " if mini>judge:\n", 68 | " mini = judge; b = thres[i]\n", 69 | " return mini, b" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 8, 75 | "metadata": { 76 | "collapsed": true 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "# 找出最佳划分的阈值和对应的维度\n", 81 | "# 结合全部维数的决策树桩\n", 82 | "def decision_stump(X, Y):\n", 83 | " row, col = X.shape\n", 84 | " Xsort = np.sort(X, 0)\n", 85 | " thres = (np.r_[Xsort[0:1, :]-0.1, Xsort]+np.r_[Xsort, Xsort[-1:, :]+0.1])/2\n", 86 | " mpurity = row; mb = 0; index = 0\n", 87 | " for i in range(col):\n", 88 | " purity, b = one_stump(X[:, i], Y[:, 0], thres[:, i])\n", 89 | " if mpurity > purity:\n", 90 | " mpurity = purity; mb = b; index = i\n", 91 | " return mb, index" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 9, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "# 定义划分终止的条件\n", 103 | "# 终止条件\n", 104 | "def stop_cond(X, Y):\n", 105 | " if np.sum(Y!=Y[0])==0 or X.shape[0]==1 or np.sum(X!=X[0, :])==0:\n", 106 | " return True\n", 107 | " return False" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 10, 113 | "metadata": { 114 | "collapsed": true 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "# 定义完全生长的决策树\n", 119 | "def dTree(X, Y):\n", 120 | " if stop_cond(X, Y):\n", 121 | " node = Node(None, None, Y[0])\n", 122 | " return node\n", 123 | " b, index = decision_stump(X, Y)\n", 124 | " pos1 = X[:, index] < b; pos2 = X[:, index] >= b\n", 125 | " leftX = X[pos1, :]; leftY = Y[pos1, 0:1]\n", 126 | " rightX = X[pos2, :]; rightY = Y[pos2, 0:1]\n", 127 | " node = Node(b, index)\n", 128 | " node.leftNode = dTree(leftX, leftY)\n", 129 | " node.rightNode = dTree(rightX, rightY)\n", 130 | " return node" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 11, 136 | "metadata": { 137 | "collapsed": true 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "# 定义只进行一次划分的决策树(夸张的剪枝)\n", 142 | "def dTree_one(X, Y):\n", 143 | " b, index = decision_stump(X, Y)\n", 144 | " pos1 = X[:, index] < b; pos2 = X[:, index] >= b\n", 145 | " node = Node(b, index)\n", 146 | " value1 = 1 if np.sign(np.sum(Y[pos1]))>=0 else -1\n", 147 | " value2 = 1 if np.sign(np.sum(Y[pos2]))>=0 else -1\n", 148 | " node.leftNode = Node(None, None, np.array([value1]))\n", 149 | " node.rightNode = Node(None, None, np.array([value2]))\n", 150 | " return node" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 12, 156 | "metadata": { 157 | "collapsed": true 158 | }, 159 | "outputs": [], 160 | "source": [ 161 | "# 预测函数---基于决策树对单个样本进行的预测\n", 162 | "def predict_one(node, X):\n", 163 | " if node.value is not None:\n", 164 | " return node.value[0]\n", 165 | " thre = node.theta; index = node.index\n", 166 | " if X[index] < thre:\n", 167 | " return predict_one(node.leftNode, X)\n", 168 | " else:\n", 169 | " return predict_one(node.rightNode, X)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 13, 175 | "metadata": { 176 | "collapsed": true 177 | }, 178 | "outputs": [], 179 | "source": [ 180 | "# 基于决策树的预测结果及其错误率衡量函数\n", 181 | "def err_fun(X, Y, node):\n", 182 | " row, col = X.shape\n", 183 | " Yhat = np.zeros(Y.shape)\n", 184 | " for i in range(row):\n", 185 | " Yhat[i] = predict_one(node, X[i, :])\n", 186 | " return Yhat, np.sum(Yhat!=Y)/row" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 26, 192 | "metadata": { 193 | "collapsed": true 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "# bagging函数\n", 198 | "def bagging(X, Y):\n", 199 | " row, col = X.shape\n", 200 | " pos = np.random.randint(0, row, (row,))\n", 201 | " return X[pos, :], Y[pos, :]" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 15, 207 | "metadata": { 208 | "collapsed": true 209 | }, 210 | "outputs": [], 211 | "source": [ 212 | "# 随机森林算法---没有加入feature的随机选择\n", 213 | "def random_forest(X, Y, T):\n", 214 | " nodeArr = []\n", 215 | " for i in range(T):\n", 216 | " Xtemp, Ytemp = bagging(X, Y)\n", 217 | " node = dTree(Xtemp, Ytemp)\n", 218 | " nodeArr.append(node)\n", 219 | " return nodeArr" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 16, 225 | "metadata": { 226 | "collapsed": true 227 | }, 228 | "outputs": [], 229 | "source": [ 230 | "# 基于剪枝后的随机森林算法\n", 231 | "def random_forest_pruned(X, Y, T):\n", 232 | " nodeArr = []\n", 233 | " for i in range(T):\n", 234 | " Xtemp, Ytemp = bagging(X, Y)\n", 235 | " node = dTree_one(Xtemp, Ytemp)\n", 236 | " nodeArr.append(node)\n", 237 | " return nodeArr" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 17, 243 | "metadata": { 244 | "collapsed": true 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "# ----------------具体题目-------------------\n", 249 | "# 加载数据函数\n", 250 | "def loadData(filename):\n", 251 | " data = pd.read_csv(filename, sep='\\s+', header=None)\n", 252 | " data = data.as_matrix()\n", 253 | " col, row = data.shape\n", 254 | " X = data[:, 0: row-1]\n", 255 | " Y = data[:, row-1:row]\n", 256 | " return X, Y" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 18, 262 | "metadata": { 263 | "collapsed": true 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "# 导入数据\n", 268 | "X, Y = loadData('hw3_train.dat')\n", 269 | "Xtest, Ytest = loadData('hw3_test.dat')" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 19, 275 | "metadata": { 276 | "collapsed": false 277 | }, 278 | "outputs": [ 279 | { 280 | "name": "stdout", 281 | "output_type": "stream", 282 | "text": [ 283 | "完全生长的决策树内部结点数目: 10\n" 284 | ] 285 | } 286 | ], 287 | "source": [ 288 | "# Q13\n", 289 | "# 定义一个搜索树有多少结点的函数---叶子结点不计入\n", 290 | "def internal_node(node):\n", 291 | " if node == None:\n", 292 | " return 0\n", 293 | " if node.leftNode == None and node.rightNode == None:\n", 294 | " return 0\n", 295 | " l = 0; r = 0\n", 296 | " if node.leftNode != None:\n", 297 | " l = internal_node(node.leftNode)\n", 298 | " if node.rightNode != None:\n", 299 | " r = internal_node(node.rightNode)\n", 300 | " return 1 + l + r\n", 301 | "\n", 302 | "node = dTree(X, Y)\n", 303 | "print('完全生长的决策树内部结点数目:', internal_node(node))" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 22, 309 | "metadata": { 310 | "collapsed": false 311 | }, 312 | "outputs": [ 313 | { 314 | "name": "stdout", 315 | "output_type": "stream", 316 | "text": [ 317 | "Ein: 0.0 \n", 318 | "Eout: 0.126\n" 319 | ] 320 | } 321 | ], 322 | "source": [ 323 | "# Q14 and Q15\n", 324 | "_, ein = err_fun(X, Y, node)\n", 325 | "_, eout = err_fun(Xtest, Ytest, node)\n", 326 | "print('Ein: ', ein, '\\nEout: ', eout)" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 27, 332 | "metadata": { 333 | "collapsed": false, 334 | "scrolled": true 335 | }, 336 | "outputs": [ 337 | { 338 | "name": "stdout", 339 | "output_type": "stream", 340 | "text": [ 341 | "Ein(gt)的平均: 0.0518873333333\n", 342 | "Ein(G): 0.0\n", 343 | "Eout(G): 0.07452\n" 344 | ] 345 | } 346 | ], 347 | "source": [ 348 | "# Q16,Q17,Q18\n", 349 | "ein = 0; eout = 0; err = 0\n", 350 | "for j in range(50):\n", 351 | " nodeArr = random_forest(X, Y, 300)\n", 352 | " l = len(nodeArr)\n", 353 | " yhat1 = np.zeros((Y.shape[0], l))\n", 354 | " yhat2 = np.zeros((Ytest.shape[0], l))\n", 355 | " for i in range(l):\n", 356 | " yhat1[:, i:i+1], _ = err_fun(X, Y, nodeArr[i])\n", 357 | " yhat2[:, i:i+1], _ = err_fun(Xtest, Ytest, nodeArr[i])\n", 358 | " errg = np.sum(yhat1!=Y, 0)/Y.shape[0]\n", 359 | " Yhat = np.sign(np.sum(yhat1, 1)).reshape(Y.shape)\n", 360 | " Ytesthat = np.sign(np.sum(yhat2, 1)).reshape(Ytest.shape)\n", 361 | " Yhat[Yhat == 0] = 1; Ytesthat[Ytesthat == 0] = 1\n", 362 | " ein += np.sum(Yhat!=Y)/Y.shape[0]\n", 363 | " eout += np.sum(Ytesthat!=Ytest)/Ytest.shape[0]\n", 364 | " err += np.sum(errg)/l\n", 365 | "print('Ein(gt)的平均:', err/50)\n", 366 | "print('Ein(G): ', ein/50)\n", 367 | "print('Eout(G): ', eout/50)" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 28, 373 | "metadata": { 374 | "collapsed": false 375 | }, 376 | "outputs": [ 377 | { 378 | "name": "stdout", 379 | "output_type": "stream", 380 | "text": [ 381 | "Ein: 0.1106\n", 382 | "Eout: 0.15336\n" 383 | ] 384 | } 385 | ], 386 | "source": [ 387 | "# Q19, Q20\n", 388 | "ein = 0; eout = 0\n", 389 | "for j in range(50):\n", 390 | " nodeArr = random_forest_pruned(X, Y, 300)\n", 391 | " l = len(nodeArr)\n", 392 | " yhat1 = np.zeros((Y.shape[0], l))\n", 393 | " yhat2 = np.zeros((Ytest.shape[0], l))\n", 394 | " for i in range(l):\n", 395 | " yhat1[:, i:i + 1], _ = err_fun(X, Y, nodeArr[i])\n", 396 | " yhat2[:, i:i + 1], _ = err_fun(Xtest, Ytest, nodeArr[i])\n", 397 | " Yhat = np.sign(np.sum(yhat1, 1)).reshape(Y.shape)\n", 398 | " Ytesthat = np.sign(np.sum(yhat2, 1)).reshape(Ytest.shape)\n", 399 | " Yhat[Yhat == 0] = 1;\n", 400 | " Ytesthat[Ytesthat == 0] = 1\n", 401 | " ein += np.sum(Yhat != Y) / Y.shape[0]\n", 402 | " eout += np.sum(Ytesthat != Ytest) / Ytest.shape[0]\n", 403 | "print('Ein: ', ein/50)\n", 404 | "print('Eout: ', eout/50)" 405 | ] 406 | } 407 | ], 408 | "metadata": { 409 | "kernelspec": { 410 | "display_name": "Python 3", 411 | "language": "python", 412 | "name": "python3" 413 | }, 414 | "language_info": { 415 | "codemirror_mode": { 416 | "name": "ipython", 417 | "version": 3 418 | }, 419 | "file_extension": ".py", 420 | "mimetype": "text/x-python", 421 | "name": "python", 422 | "nbconvert_exporter": "python", 423 | "pygments_lexer": "ipython3", 424 | "version": "3.6.0" 425 | } 426 | }, 427 | "nbformat": 4, 428 | "nbformat_minor": 2 429 | } 430 | -------------------------------------------------------------------------------- /Week11/README.md: -------------------------------------------------------------------------------- 1 | ## 包含文件 2 | 3 | - hw8:第十一周作业 4 | 5 | ## 主要内容 6 | 7 | 视频地址: 8 | 9 | https://www.bilibili.com/video/av36760800/?p=46 10 | 11 | https://www.bilibili.com/video/av36760800/?p=47 12 | 13 | https://www.bilibili.com/video/av36760800/?p=48 14 | 15 | https://www.bilibili.com/video/av36760800/?p=49 16 | 17 | https://www.bilibili.com/video/av36760800/?p=50 18 | 19 | https://www.bilibili.com/video/av36760800/?p=51 20 | 21 | https://www.bilibili.com/video/av36760800/?p=52 22 | 23 | https://www.bilibili.com/video/av36760800/?p=53 24 | 25 | https://www.bilibili.com/video/av36760800/?p=54 26 | 27 | https://www.bilibili.com/video/av36760800/?p=55 28 | 29 | https://www.bilibili.com/video/av36760800/?p=56 30 | 31 | https://www.bilibili.com/video/av36760800/?p=57 32 | 33 | https://www.bilibili.com/video/av36760800/?p=58 34 | 35 | https://www.bilibili.com/video/av36760800/?p=59 36 | 37 | https://www.bilibili.com/video/av36760800/?p=60 38 | 39 | https://www.bilibili.com/video/av36760800/?p=61 40 | 41 | https://www.bilibili.com/video/av36760800/?p=62 42 | 43 | https://www.bilibili.com/video/av36760800/?p=63 44 | 45 | https://www.bilibili.com/video/av36760800/?p=64 46 | 47 | https://www.bilibili.com/video/av36760800/?p=65 48 | 49 | 参考资料: 50 | 51 | https://redstonewill.com/682/ 52 | 53 | https://redstonewill.com/710/ 54 | 55 | https://redstonewill.com/739/ 56 | 57 | https://redstonewill.com/783/ 58 | 59 | https://redstonewill.com/810/ 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /Week11/hw8/Homework8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RedstoneWill/HsuanTienLin-ML-Camp/5d24fe8410d0383db64bfe2663627ab34ad15fb8/Week11/hw8/Homework8.png -------------------------------------------------------------------------------- /Week11/hw8/hw4_kmeans_train.dat: -------------------------------------------------------------------------------- 1 | 0.8105 -0.35 0.4769 0.4541 -0.9829 0.5252 0.3838 -0.3408 -0.4824 2 | -0.6273 -0.2097 0.9404 0.1143 0.3487 -0.5206 0.0061 0.5024 -0.6687 3 | 0.1624 -0.1173 0.426 -0.3607 -0.6632 0.4431 -0.8355 0.7206 -0.8977 4 | -1 0.7758 -0.267 -0.888 -0.1099 -0.9183 -0.4086 0.8962 0.5841 5 | 0.8464 0.1762 0.2729 0.2724 0.8155 0.6096 -0.2844 0.98 0.3302 6 | -0.0135 0.6193 0.7705 0.7195 0.7313 -0.3395 0.8012 -0.6773 -0.4433 7 | 0.0934 -0.8379 -0.2083 -0.6337 0.4346 -0.3928 0.9759 -0.8499 -0.4128 8 | 0.8923 -0.0018 -0.6837 0.6628 -0.2823 -0.9524 -0.6767 -0.4811 -0.6296 9 | -0.9028 0.701 -0.9063 -0.1111 -0.9328 0.5282 0.496 -0.9569 0.6784 10 | -0.9706 0.1392 0.6562 -0.6543 -0.198 -0.6261 -0.6067 0.1254 -0.1071 11 | -0.6891 -0.4157 0.1057 -0.5954 0.4732 0.1729 0.9328 -0.0308 0.216 12 | -0.0845 -0.5858 -0.0486 -0.4282 -0.2401 0.7534 -0.0543 0.1531 -0.1212 13 | -0.9596 -0.3929 0.9556 0.1461 0.0117 0.4288 -0.681 -0.0555 -0.602 14 | 0.9124 0.7287 -0.7506 -0.1363 -0.6995 0.0093 -0.3828 0.2462 -0.8376 15 | 0.7514 0.7608 -0.0175 0.7071 -0.0931 0.9942 0.1359 0.2259 -0.0613 16 | -0.1805 -0.2265 -0.9636 0.0689 0.6373 -0.6631 -0.9218 -0.7456 0.5831 17 | -0.3048 0.8819 -0.8509 0.6777 0.5889 0.056 0.6719 -0.2752 -0.7181 18 | -0.5873 -0.9376 -0.3226 -0.5509 0.1313 -0.6853 -0.214 0.2095 -0.4309 19 | 0.425 -0.535 -0.6683 0.5741 -0.8574 0.9207 -0.3699 0.8145 -0.3545 20 | 0.8587 -0.0571 -0.7906 -0.4638 0.392 0.3407 -0.1491 -0.822 -0.4498 21 | -0.8107 0.0089 -0.765 -0.479 -0.4171 -0.6223 -0.5583 0.213 -0.8078 22 | -0.8616 0.9386 -0.9095 -0.6522 -0.5252 0.4825 0.6886 0.3256 0.6605 23 | -1 -0.3258 -0.1998 -0.7559 0.1952 0.3832 -0.3782 0.6369 -0.4038 24 | -0.4212 -0.1462 -0.2603 -0.3308 0.2016 0.2144 -0.8483 -0.1099 -0.46 25 | 0.8112 0.377 -0.5551 -0.3885 0.6211 0.6401 0.9946 -0.7571 0.277 26 | -0.8868 0.0669 0.5703 -0.1472 0.7361 -0.2282 -0.9328 0.8879 0.662 27 | 0.6635 0.5312 0.5358 -0.8916 -0.8574 0.1569 0.7485 -0.8628 0.3998 28 | 0.7432 -0.8466 -0.9884 0.3135 0.0062 0.7477 -0.9147 0.0734 0.6355 29 | -0.3031 0.2371 -0.4132 -0.7674 0.3454 -0.2706 0.3895 0.0939 -0.1334 30 | -1 -0.1108 0.7883 -0.7978 -0.7973 -0.2055 0.9498 -0.712 0.8679 31 | 1 0.2703 -0.6408 -0.4365 0.5029 0.7046 0.2929 -0.1076 -0.2015 32 | 0.3891 0.1182 -0.0468 0.1774 0.3203 0.1559 0.9719 0.2702 0.4439 33 | -0.4895 0.7533 0.3229 -0.1304 -0.6832 -0.1742 -0.4258 0.6097 0.7182 34 | -0.6454 -0.0875 0.4457 0.3077 -0.91 -0.234 -0.5364 -0.9381 -1 35 | 0.4393 0.8004 -0.5783 -0.2378 -0.3299 -0.2615 0.588 0.2443 -0.6518 36 | 0.0337 0.2622 -0.4467 -0.5206 -0.4301 -0.3567 0.2454 0.0335 -0.2949 37 | -0.1583 0.767 0.6972 0.2634 -0.4708 -0.6327 -0.998 -0.8828 0.6116 38 | -0.8917 0.1634 -0.6017 -0.3384 0.6428 -0.0318 0.3049 -0.1118 -1 39 | -0.4864 0.1848 0.0375 -0.7892 -0.5517 0.5667 -0.4218 -0.5498 0.6839 40 | 0.5545 0.3762 -0.5996 0.9528 -0.9622 -0.9568 -0.0789 0.3427 -0.0686 41 | 0.1361 -0.5169 -0.3709 -0.8264 -0.306 0.0774 0.7403 0.2721 0.5276 42 | 0.7686 0.4347 -0.0279 -0.831 0.3875 0.0099 -0.7878 -0.6914 -0.6474 43 | 0.689 -0.767 -0.8421 -0.6819 -0.5934 -0.1481 0.3954 -0.8532 -0.876 44 | -0.153 0.8711 -0.0993 0.8191 -0.9599 -0.7117 -0.171 -0.7477 -0.4031 45 | -0.4384 0.3295 0.1583 -0.2805 0.6476 0.5649 0.5713 0.043 0.7117 46 | -0.2528 -0.9359 0.2564 0.6479 0.8832 0.2966 0.9362 -0.2878 0.5489 47 | 0.2867 0.3421 0.9149 -0.555 -0.9384 0.5625 -0.9901 0.6329 -0.3945 48 | -0.6103 0.3564 0.8529 0.6461 0.0044 0.7361 -0.0573 -0.0595 -0.5517 49 | -1 0.1217 -0.5353 0.9365 0.5667 -0.4737 0.4989 0.5765 -0.8408 50 | -0.5352 -0.3079 0.453 -0.6823 -0.6618 -0.5426 -0.9462 0.2809 0.3979 51 | 0.9667 0.2303 0.8283 -0.5686 0.1668 0.3949 -0.0423 -0.3343 -0.0286 52 | -0.2993 0.911 0.2642 -0.8462 -0.7713 0.6024 -0.3888 -0.7175 -0.1167 53 | 0.5873 0.5954 0.0947 0.4155 -0.9732 -0.7385 -0.1896 -0.0155 -0.0728 54 | -0.3765 0.4062 0.0545 0.8877 0.56 0.2833 0.4901 -0.8289 0.5658 55 | -0.1065 -0.3518 0.5746 0.9882 -0.9363 0.6014 -0.7503 -0.1259 -0.4141 56 | -0.9823 0.3309 -0.2012 0.0723 0.2186 -0.6412 -0.6445 -0.2913 -0.4701 57 | -0.749 0.0047 -0.5807 0.8256 -0.007 -0.517 0.4271 0.2427 0.3572 58 | -0.9071 0.3115 -0.9485 -0.1083 -0.6162 0.2701 0.2505 -0.2607 0.9788 59 | -0.7382 0.1835 -0.8231 -0.3189 0.0091 0.1698 0.1642 -0.5638 -0.5875 60 | 0.2551 0.2422 0.4373 0.3066 -0.8661 0.821 -0.4233 0.3844 -0.4397 61 | -0.2114 0.9172 0.3369 -0.0345 -0.4017 -0.654 -0.8647 0.7625 -0.2178 62 | 0.5056 -0.9265 0.6228 -0.0571 0.3801 0.7567 -0.2361 0.9569 0.1411 63 | -0.3013 -0.0825 0.8785 -0.9643 0.883 -0.5231 -0.6183 -0.9817 -0.7606 64 | -0.2241 0.4515 0.4151 -0.6012 -0.6056 -0.2047 -0.8445 0.1584 -0.2479 65 | 0.5637 0.7266 -0.689 0.4422 0.7623 -0.8061 0.9191 -0.856 -0.7878 66 | -0.9766 -0.5208 -0.8244 0.4386 -0.1221 -0.4299 -0.7662 0.0334 0.7284 67 | 0.644 0.496 -0.0344 0.955 -0.0618 -0.2722 -0.8511 -0.1426 -0.1281 68 | 0.8634 0.7211 -0.6378 -0.9609 0.1597 0.2401 -0.3909 0.3935 -0.7265 69 | 0.7875 -0.7259 -0.9684 -0.2469 -0.771 -0.0301 0.4809 -0.6221 0.8272 70 | -0.5843 0.7417 -0.738 -0.2221 0.7808 0.4217 -0.982 -0.6101 -0.1848 71 | 0.4305 0.0635 -0.9011 0.4622 0.8166 -0.6721 -0.5679 0.2975 -0.2941 72 | 0.6433 -0.4014 0.0649 0.9053 0.3765 -0.1543 0.3269 0.3946 0.2356 73 | 0.1617 -0.9885 -0.6974 0.2606 0.4737 -0.8808 0.5885 0.9057 0.4168 74 | 0.0624 -0.0892 0.8487 -0.8727 -0.184 0.2252 -0.0271 -0.857 -0.3802 75 | 0.4106 -0.2164 -0.1017 0.7132 -0.9558 -0.628 0.8325 0.6327 -0.7223 76 | 0.5663 -0.2714 -0.379 0.415 -0.1441 0.437 -0.3598 0.8288 0.58 77 | -0.5474 0.6195 -0.7293 0.3509 0.3328 -0.6851 0.7229 0.1652 0.9476 78 | -0.8465 -0.7029 -0.7304 -0.2255 0.712 0.1255 -0.7885 -0.6478 -0.0456 79 | 0.1437 0.6306 -0.1798 0.4145 -0.0185 -0.847 0.7294 -0.2956 0.3182 80 | 0.0927 0.3018 -0.2395 0.3623 -0.9236 -0.5275 -0.5121 -0.7121 -0.1753 81 | 0.6346 -0.1202 0.2456 -0.5452 -0.7057 -0.7729 -0.3923 -0.9763 -0.0685 82 | -0.878 -0.6548 -0.9133 -0.1175 0.7075 -0.837 0.355 -0.8046 -0.5491 83 | -0.7684 0.7061 0.1463 0.4771 -0.8391 0.4406 0.7042 -0.2314 0.4643 84 | 0.0571 -0.5249 -0.2373 0.1438 0.3575 -0.5297 0.3069 -0.2875 -0.3343 85 | -0.4453 0.7404 -0.9191 0.701 0.2175 -0.7582 0.1417 -0.0783 0.0104 86 | -0.8114 -0.1131 -0.4669 -0.0486 -0.9693 0.8906 0.4216 0.3376 -0.3969 87 | -0.2346 0.9384 -0.2555 -0.1536 0.6394 0.962 0.0882 -0.2189 -0.1162 88 | 0.8614 0.3468 0.158 -0.6056 -0.7018 0.1887 -0.715 0.7198 -0.4737 89 | 0.3875 -0.0368 -0.0563 -0.868 0.8095 -0.4169 -0.906 -0.1023 0.3642 90 | 0.6901 -0.339 0.2563 -0.152 0.0554 0.5544 -0.9633 0.3405 0.2742 91 | 0.1901 0.9995 -0.7577 -0.8662 -0.8685 -0.9482 -0.283 -0.7745 -0.0505 92 | -0.258 -0.6876 0.4063 0.9982 0.1604 -0.5383 0.5527 0.1971 0.8022 93 | 0.1874 0.1349 -0.3578 0.4296 0.2687 -0.2263 0.4814 0.9857 -0.0008 94 | 0.1218 0.6413 0.1371 -0.4719 0.6396 -0.7025 -0.0102 0.1922 0.4946 95 | 0.4655 0.1148 -0.6657 -0.8923 -0.4556 0.6031 -0.1186 -0.9741 0.5888 96 | -0.0921 0.9551 -0.8037 -0.9549 -0.5168 0.8359 -0.6574 0.4731 0.0281 97 | -0.7088 -0.4467 -0.9106 -0.3745 -0.339 -0.3662 -0.7714 0.5423 -0.3404 98 | -0.9721 -0.586 0.9048 -0.7758 -0.541 -0.6119 -0.9399 -0.1984 0.8611 99 | 0.1099 -0.9784 0.7673 0.1993 -0.3529 -0.5718 0.8331 -0.1243 0.9706 100 | 0.5588 -0.8062 0.3135 0.4636 -0.5819 0.7725 0.8517 -0.5218 -0.4259 101 | -------------------------------------------------------------------------------- /Week11/hw8/hw4_knn_train.dat: -------------------------------------------------------------------------------- 1 | 0.8105 -0.35 0.4769 0.4541 -0.9829 0.5252 0.3838 -0.3408 -0.4824 -1 2 | -0.6273 -0.2097 0.9404 0.1143 0.3487 -0.5206 0.0061 0.5024 -0.6687 1 3 | 0.1624 -0.1173 0.426 -0.3607 -0.6632 0.4431 -0.8355 0.7206 -0.8977 1 4 | -1 0.7758 -0.267 -0.888 -0.1099 -0.9183 -0.4086 0.8962 0.5841 1 5 | 0.8464 0.1762 0.2729 0.2724 0.8155 0.6096 -0.2844 0.98 0.3302 -1 6 | -0.0135 0.6193 0.7705 0.7195 0.7313 -0.3395 0.8012 -0.6773 -0.4433 1 7 | 0.0934 -0.8379 -0.2083 -0.6337 0.4346 -0.3928 0.9759 -0.8499 -0.4128 1 8 | 0.8923 -0.0018 -0.6837 0.6628 -0.2823 -0.9524 -0.6767 -0.4811 -0.6296 1 9 | -0.9028 0.701 -0.9063 -0.1111 -0.9328 0.5282 0.496 -0.9569 0.6784 -1 10 | -0.9706 0.1392 0.6562 -0.6543 -0.198 -0.6261 -0.6067 0.1254 -0.1071 1 11 | -0.6891 -0.4157 0.1057 -0.5954 0.4732 0.1729 0.9328 -0.0308 0.216 1 12 | -0.0845 -0.5858 -0.0486 -0.4282 -0.2401 0.7534 -0.0543 0.1531 -0.1212 -1 13 | -0.9596 -0.3929 0.9556 0.1461 0.0117 0.4288 -0.681 -0.0555 -0.602 1 14 | 0.9124 0.7287 -0.7506 -0.1363 -0.6995 0.0093 -0.3828 0.2462 -0.8376 1 15 | 0.7514 0.7608 -0.0175 0.7071 -0.0931 0.9942 0.1359 0.2259 -0.0613 -1 16 | -0.1805 -0.2265 -0.9636 0.0689 0.6373 -0.6631 -0.9218 -0.7456 0.5831 -1 17 | -0.3048 0.8819 -0.8509 0.6777 0.5889 0.056 0.6719 -0.2752 -0.7181 -1 18 | -0.5873 -0.9376 -0.3226 -0.5509 0.1313 -0.6853 -0.214 0.2095 -0.4309 -1 19 | 0.425 -0.535 -0.6683 0.5741 -0.8574 0.9207 -0.3699 0.8145 -0.3545 -1 20 | 0.8587 -0.0571 -0.7906 -0.4638 0.392 0.3407 -0.1491 -0.822 -0.4498 1 21 | -0.8107 0.0089 -0.765 -0.479 -0.4171 -0.6223 -0.5583 0.213 -0.8078 1 22 | -0.8616 0.9386 -0.9095 -0.6522 -0.5252 0.4825 0.6886 0.3256 0.6605 -1 23 | -1 -0.3258 -0.1998 -0.7559 0.1952 0.3832 -0.3782 0.6369 -0.4038 1 24 | -0.4212 -0.1462 -0.2603 -0.3308 0.2016 0.2144 -0.8483 -0.1099 -0.46 1 25 | 0.8112 0.377 -0.5551 -0.3885 0.6211 0.6401 0.9946 -0.7571 0.277 -1 26 | -0.8868 0.0669 0.5703 -0.1472 0.7361 -0.2282 -0.9328 0.8879 0.662 1 27 | 0.6635 0.5312 0.5358 -0.8916 -0.8574 0.1569 0.7485 -0.8628 0.3998 1 28 | 0.7432 -0.8466 -0.9884 0.3135 0.0062 0.7477 -0.9147 0.0734 0.6355 -1 29 | -0.3031 0.2371 -0.4132 -0.7674 0.3454 -0.2706 0.3895 0.0939 -0.1334 1 30 | -1 -0.1108 0.7883 -0.7978 -0.7973 -0.2055 0.9498 -0.712 0.8679 1 31 | 1 0.2703 -0.6408 -0.4365 0.5029 0.7046 0.2929 -0.1076 -0.2015 -1 32 | 0.3891 0.1182 -0.0468 0.1774 0.3203 0.1559 0.9719 0.2702 0.4439 -1 33 | -0.4895 0.7533 0.3229 -0.1304 -0.6832 -0.1742 -0.4258 0.6097 0.7182 1 34 | -0.6454 -0.0875 0.4457 0.3077 -0.91 -0.234 -0.5364 -0.9381 -1 -1 35 | 0.4393 0.8004 -0.5783 -0.2378 -0.3299 -0.2615 0.588 0.2443 -0.6518 1 36 | 0.0337 0.2622 -0.4467 -0.5206 -0.4301 -0.3567 0.2454 0.0335 -0.2949 1 37 | -0.1583 0.767 0.6972 0.2634 -0.4708 -0.6327 -0.998 -0.8828 0.6116 1 38 | -0.8917 0.1634 -0.6017 -0.3384 0.6428 -0.0318 0.3049 -0.1118 -1 1 39 | -0.4864 0.1848 0.0375 -0.7892 -0.5517 0.5667 -0.4218 -0.5498 0.6839 -1 40 | 0.5545 0.3762 -0.5996 0.9528 -0.9622 -0.9568 -0.0789 0.3427 -0.0686 -1 41 | 0.1361 -0.5169 -0.3709 -0.8264 -0.306 0.0774 0.7403 0.2721 0.5276 -1 42 | 0.7686 0.4347 -0.0279 -0.831 0.3875 0.0099 -0.7878 -0.6914 -0.6474 1 43 | 0.689 -0.767 -0.8421 -0.6819 -0.5934 -0.1481 0.3954 -0.8532 -0.876 1 44 | -0.153 0.8711 -0.0993 0.8191 -0.9599 -0.7117 -0.171 -0.7477 -0.4031 1 45 | -0.4384 0.3295 0.1583 -0.2805 0.6476 0.5649 0.5713 0.043 0.7117 -1 46 | -0.2528 -0.9359 0.2564 0.6479 0.8832 0.2966 0.9362 -0.2878 0.5489 1 47 | 0.2867 0.3421 0.9149 -0.555 -0.9384 0.5625 -0.9901 0.6329 -0.3945 1 48 | -0.6103 0.3564 0.8529 0.6461 0.0044 0.7361 -0.0573 -0.0595 -0.5517 -1 49 | -1 0.1217 -0.5353 0.9365 0.5667 -0.4737 0.4989 0.5765 -0.8408 -1 50 | -0.5352 -0.3079 0.453 -0.6823 -0.6618 -0.5426 -0.9462 0.2809 0.3979 1 51 | 0.9667 0.2303 0.8283 -0.5686 0.1668 0.3949 -0.0423 -0.3343 -0.0286 1 52 | -0.2993 0.911 0.2642 -0.8462 -0.7713 0.6024 -0.3888 -0.7175 -0.1167 1 53 | 0.5873 0.5954 0.0947 0.4155 -0.9732 -0.7385 -0.1896 -0.0155 -0.0728 1 54 | -0.3765 0.4062 0.0545 0.8877 0.56 0.2833 0.4901 -0.8289 0.5658 -1 55 | -0.1065 -0.3518 0.5746 0.9882 -0.9363 0.6014 -0.7503 -0.1259 -0.4141 -1 56 | -0.9823 0.3309 -0.2012 0.0723 0.2186 -0.6412 -0.6445 -0.2913 -0.4701 1 57 | -0.749 0.0047 -0.5807 0.8256 -0.007 -0.517 0.4271 0.2427 0.3572 -1 58 | -0.9071 0.3115 -0.9485 -0.1083 -0.6162 0.2701 0.2505 -0.2607 0.9788 1 59 | -0.7382 0.1835 -0.8231 -0.3189 0.0091 0.1698 0.1642 -0.5638 -0.5875 1 60 | 0.2551 0.2422 0.4373 0.3066 -0.8661 0.821 -0.4233 0.3844 -0.4397 -1 61 | -0.2114 0.9172 0.3369 -0.0345 -0.4017 -0.654 -0.8647 0.7625 -0.2178 1 62 | 0.5056 -0.9265 0.6228 -0.0571 0.3801 0.7567 -0.2361 0.9569 0.1411 -1 63 | -0.3013 -0.0825 0.8785 -0.9643 0.883 -0.5231 -0.6183 -0.9817 -0.7606 1 64 | -0.2241 0.4515 0.4151 -0.6012 -0.6056 -0.2047 -0.8445 0.1584 -0.2479 1 65 | 0.5637 0.7266 -0.689 0.4422 0.7623 -0.8061 0.9191 -0.856 -0.7878 -1 66 | -0.9766 -0.5208 -0.8244 0.4386 -0.1221 -0.4299 -0.7662 0.0334 0.7284 -1 67 | 0.644 0.496 -0.0344 0.955 -0.0618 -0.2722 -0.8511 -0.1426 -0.1281 -1 68 | 0.8634 0.7211 -0.6378 -0.9609 0.1597 0.2401 -0.3909 0.3935 -0.7265 1 69 | 0.7875 -0.7259 -0.9684 -0.2469 -0.771 -0.0301 0.4809 -0.6221 0.8272 -1 70 | -0.5843 0.7417 -0.738 -0.2221 0.7808 0.4217 -0.982 -0.6101 -0.1848 1 71 | 0.4305 0.0635 -0.9011 0.4622 0.8166 -0.6721 -0.5679 0.2975 -0.2941 -1 72 | 0.6433 -0.4014 0.0649 0.9053 0.3765 -0.1543 0.3269 0.3946 0.2356 -1 73 | 0.1617 -0.9885 -0.6974 0.2606 0.4737 -0.8808 0.5885 0.9057 0.4168 -1 74 | 0.0624 -0.0892 0.8487 -0.8727 -0.184 0.2252 -0.0271 -0.857 -0.3802 1 75 | 0.4106 -0.2164 -0.1017 0.7132 -0.9558 -0.628 0.8325 0.6327 -0.7223 1 76 | 0.5663 -0.2714 -0.379 0.415 -0.1441 0.437 -0.3598 0.8288 0.58 -1 77 | -0.5474 0.6195 -0.7293 0.3509 0.3328 -0.6851 0.7229 0.1652 0.9476 -1 78 | -0.8465 -0.7029 -0.7304 -0.2255 0.712 0.1255 -0.7885 -0.6478 -0.0456 1 79 | 0.1437 0.6306 -0.1798 0.4145 -0.0185 -0.847 0.7294 -0.2956 0.3182 1 80 | 0.0927 0.3018 -0.2395 0.3623 -0.9236 -0.5275 -0.5121 -0.7121 -0.1753 1 81 | 0.6346 -0.1202 0.2456 -0.5452 -0.7057 -0.7729 -0.3923 -0.9763 -0.0685 1 82 | -0.878 -0.6548 -0.9133 -0.1175 0.7075 -0.837 0.355 -0.8046 -0.5491 1 83 | -0.7684 0.7061 0.1463 0.4771 -0.8391 0.4406 0.7042 -0.2314 0.4643 -1 84 | 0.0571 -0.5249 -0.2373 0.1438 0.3575 -0.5297 0.3069 -0.2875 -0.3343 1 85 | -0.4453 0.7404 -0.9191 0.701 0.2175 -0.7582 0.1417 -0.0783 0.0104 -1 86 | -0.8114 -0.1131 -0.4669 -0.0486 -0.9693 0.8906 0.4216 0.3376 -0.3969 -1 87 | -0.2346 0.9384 -0.2555 -0.1536 0.6394 0.962 0.0882 -0.2189 -0.1162 -1 88 | 0.8614 0.3468 0.158 -0.6056 -0.7018 0.1887 -0.715 0.7198 -0.4737 -1 89 | 0.3875 -0.0368 -0.0563 -0.868 0.8095 -0.4169 -0.906 -0.1023 0.3642 1 90 | 0.6901 -0.339 0.2563 -0.152 0.0554 0.5544 -0.9633 0.3405 0.2742 -1 91 | 0.1901 0.9995 -0.7577 -0.8662 -0.8685 -0.9482 -0.283 -0.7745 -0.0505 1 92 | -0.258 -0.6876 0.4063 0.9982 0.1604 -0.5383 0.5527 0.1971 0.8022 -1 93 | 0.1874 0.1349 -0.3578 0.4296 0.2687 -0.2263 0.4814 0.9857 -0.0008 -1 94 | 0.1218 0.6413 0.1371 -0.4719 0.6396 -0.7025 -0.0102 0.1922 0.4946 1 95 | 0.4655 0.1148 -0.6657 -0.8923 -0.4556 0.6031 -0.1186 -0.9741 0.5888 1 96 | -0.0921 0.9551 -0.8037 -0.9549 -0.5168 0.8359 -0.6574 0.4731 0.0281 1 97 | -0.7088 -0.4467 -0.9106 -0.3745 -0.339 -0.3662 -0.7714 0.5423 -0.3404 1 98 | -0.9721 -0.586 0.9048 -0.7758 -0.541 -0.6119 -0.9399 -0.1984 0.8611 1 99 | 0.1099 -0.9784 0.7673 0.1993 -0.3529 -0.5718 0.8331 -0.1243 0.9706 -1 100 | 0.5588 -0.8062 0.3135 0.4636 -0.5819 0.7725 0.8517 -0.5218 -0.4259 -1 101 | -------------------------------------------------------------------------------- /Week11/hw8/hw4_nnet_test.dat: -------------------------------------------------------------------------------- 1 | -0.106006 -0.081467 -1 2 | 0.17793 -0.345951 -1 3 | 0.102162 0.718258 1 4 | 0.694078 0.623397 -1 5 | 0.0235411 0.727432 1 6 | -0.319728 -0.834114 -1 7 | -0.186744 0.538878 1 8 | -0.636967 0.152685 1 9 | -0.474463 0.854344 1 10 | -0.0356277 -0.271588 -1 11 | -0.148603 0.161762 -1 12 | -0.180652 -0.128739 -1 13 | -0.602411 0.925507 1 14 | 0.698081 0.794742 -1 15 | 0.881509 -0.201248 1 16 | -0.923849 0.386625 1 17 | -0.765713 -0.0112813 1 18 | 0.135592 0.0317051 -1 19 | -0.155151 -0.33142 -1 20 | 0.485175 0.299031 -1 21 | -0.6029 0.333234 1 22 | -0.572858 0.828352 1 23 | -0.6354 -0.474566 -1 24 | 0.909317 -0.784889 1 25 | 0.252105 -0.893937 1 26 | -0.517634 0.960444 1 27 | -0.385872 -0.31787 -1 28 | 0.823167 -0.127797 1 29 | 0.822486 -0.876843 1 30 | -0.503662 0.980274 1 31 | 0.533874 0.821234 -1 32 | -0.89497 -0.240115 1 33 | 0.342871 0.474977 -1 34 | 0.709289 0.562207 -1 35 | -1.00043 0.0604576 1 36 | 0.524284 0.735195 -1 37 | -0.56033 0.755838 1 38 | 0.697522 -0.67199 1 39 | 0.490423 0.785087 -1 40 | -0.326774 0.343372 1 41 | -0.00293421 -0.415182 -1 42 | -0.631239 0.352634 1 43 | 0.913881 0.593053 -1 44 | 0.218283 0.0396835 -1 45 | -0.616185 -0.886579 -1 46 | -0.528529 0.0286902 1 47 | -0.406523 1.04515 1 48 | -0.229795 0.0714251 -1 49 | -0.502121 0.833738 1 50 | -0.50808 0.79327 1 51 | -0.790678 0.187803 1 52 | -0.382511 0.824742 1 53 | 0.822328 0.401487 -1 54 | 0.985964 -0.329169 1 55 | -0.014047 -0.152387 -1 56 | -0.0541651 0.914285 1 57 | -1.07247 -0.720286 -1 58 | -0.242985 -1.04265 1 59 | -0.324486 -0.28318 -1 60 | 0.247749 -0.255656 -1 61 | -0.172211 -0.8494 1 62 | -0.417263 -0.393271 -1 63 | -0.347838 -0.573809 -1 64 | -0.851834 -0.722664 -1 65 | -0.725244 -0.373707 -1 66 | 0.345327 -0.0222718 -1 67 | 0.742421 0.740857 -1 68 | -0.137123 -0.347256 -1 69 | 0.105915 0.633788 1 70 | 0.332407 -0.565528 1 71 | -0.417541 0.948563 1 72 | -0.404889 -0.613469 -1 73 | -0.797158 0.90701 1 74 | 0.875921 0.360211 -1 75 | 0.54354 -0.181091 1 76 | 0.0753797 -0.511083 -1 77 | 0.564049 0.77191 -1 78 | 0.816991 0.525587 -1 79 | -0.376611 0.105994 1 80 | 0.436029 0.15046 -1 81 | 0.396919 -0.548933 1 82 | -0.274177 0.602238 1 83 | -0.989181 0.157649 1 84 | -0.516441 -0.824574 -1 85 | 0.980643 0.546138 -1 86 | 0.777557 -0.893465 1 87 | -0.259364 -0.64448 -1 88 | -0.237718 -0.906771 -1 89 | -0.603951 0.0881739 1 90 | -0.280116 -0.015129 -1 91 | -0.203737 0.797986 1 92 | -0.163726 0.436005 1 93 | 0.744445 0.410894 -1 94 | -0.332268 -0.458751 -1 95 | -0.0276649 -0.360801 -1 96 | 0.706967 0.754277 -1 97 | -0.823016 -0.302874 1 98 | -0.985381 -0.384415 1 99 | -0.490997 0.702803 1 100 | -0.522472 0.300006 1 101 | -0.56968 0.104462 1 102 | -0.323705 0.721715 1 103 | 0.919689 -0.325742 1 104 | 0.818177 0.349008 -1 105 | -0.712647 -0.491177 -1 106 | 0.536846 1.0477 -1 107 | 0.0488566 0.125227 -1 108 | 0.400446 -0.13359 -1 109 | 0.729577 -0.365942 1 110 | -0.846403 0.870347 1 111 | 0.830743 0.584719 -1 112 | 0.446426 0.283358 -1 113 | 0.635038 0.852612 -1 114 | 0.134707 0.840304 1 115 | -0.580763 -0.0146776 1 116 | -0.426553 0.491576 1 117 | -0.0309166 1.0816 1 118 | 0.487241 -0.75003 1 119 | -0.506428 -0.910393 -1 120 | 0.24941 0.383094 -1 121 | -0.443343 -0.763903 -1 122 | -0.104542 -0.883496 1 123 | -0.283878 -0.571733 -1 124 | 1.01211 0.371876 1 125 | 0.0618232 -0.607985 1 126 | 0.0871395 0.360893 -1 127 | 0.87155 0.413564 -1 128 | -0.422244 0.521393 1 129 | -0.529957 0.699439 1 130 | 0.590191 0.446972 -1 131 | 0.840474 -0.850343 1 132 | 0.091857 -0.231851 -1 133 | -0.0822101 -0.402158 -1 134 | 0.973161 0.641579 -1 135 | 0.435721 0.276361 -1 136 | 0.524528 -0.543545 1 137 | 0.554733 0.38298 -1 138 | 0.955988 -0.801573 1 139 | -0.770892 0.43342 1 140 | 0.889864 -0.531147 1 141 | -0.261993 0.066587 1 142 | 0.641572 -0.18225 1 143 | -0.415312 0.778326 1 144 | 0.467457 0.818292 -1 145 | 0.477891 -0.248978 1 146 | -0.826238 0.943979 1 147 | -0.945929 -0.428109 1 148 | 0.506624 -0.809274 1 149 | -0.536129 0.02724 1 150 | 0.41018 0.430307 -1 151 | -0.261622 -0.580314 -1 152 | 0.319113 0.215579 -1 153 | 0.147568 -0.814993 1 154 | 0.629898 0.699697 -1 155 | -0.954079 0.926492 1 156 | -0.244927 0.018247 -1 157 | 0.581089 0.316104 -1 158 | 0.0410187 -0.457155 -1 159 | 0.583963 0.682676 -1 160 | -0.487305 0.864632 1 161 | 0.825899 -0.0497919 1 162 | 0.521741 -0.889465 1 163 | 0.838825 -0.826463 1 164 | -0.629605 -0.148894 -1 165 | 0.852444 -1.05747 1 166 | 0.383238 0.678823 -1 167 | -0.0569873 0.60584 1 168 | 0.304129 -1.06668 1 169 | -0.861419 -0.226248 1 170 | -0.863202 0.202902 1 171 | 0.559463 0.153533 -1 172 | -0.417588 -0.326951 -1 173 | 0.122127 -0.248965 -1 174 | 0.74666 0.555462 -1 175 | 0.206152 0.57419 1 176 | -0.890985 0.498582 1 177 | -0.68593 -0.46911 -1 178 | -0.197807 -0.13616 -1 179 | 0.616974 0.129276 -1 180 | -0.793416 0.361406 1 181 | -1.01017 0.0784968 1 182 | -0.86176 -0.578659 -1 183 | -0.197205 0.275245 -1 184 | 0.575618 0.937045 1 185 | -0.613542 -0.941164 -1 186 | -0.551814 -0.268043 -1 187 | 0.160037 -0.341978 -1 188 | -0.397164 0.656517 1 189 | -0.57887 -0.873403 -1 190 | 0.831792 -0.0853375 1 191 | -0.298931 -0.428922 -1 192 | -0.149263 0.65394 1 193 | -0.744958 -0.719335 -1 194 | 0.155624 0.920676 1 195 | 0.528938 0.916654 -1 196 | 0.0828245 -0.627955 1 197 | -0.939876 -0.662727 -1 198 | 0.714181 -0.259774 1 199 | -0.0111258 -0.842756 1 200 | 0.542759 0.118396 -1 201 | 0.734106 -0.890722 1 202 | 0.378962 -0.116083 -1 203 | -0.166564 -0.410541 -1 204 | -0.782016 0.37673 1 205 | 0.271773 0.810967 1 206 | -0.867801 -0.67611 -1 207 | -0.181209 0.680912 1 208 | -0.0444729 0.000441472 -1 209 | 0.429315 0.828909 -1 210 | -0.837975 -0.0769672 1 211 | 0.699779 -0.208176 1 212 | 0.774033 0.51157 -1 213 | -0.688138 0.793107 1 214 | -0.425318 -0.850405 -1 215 | 0.443572 -0.242149 1 216 | -0.00218607 0.697437 1 217 | 0.325465 -0.185786 -1 218 | 0.271268 -0.852024 1 219 | 0.208343 -0.829311 1 220 | -0.338032 -0.894042 -1 221 | -0.0242602 -0.550912 -1 222 | 0.255416 -0.288361 -1 223 | -0.716619 0.00041986 1 224 | 0.131717 -0.459831 -1 225 | 0.344558 -0.128777 -1 226 | 0.0824384 -0.972626 1 227 | 0.533274 0.295129 -1 228 | -0.338819 0.919956 1 229 | 0.551242 -0.846343 1 230 | -0.410561 0.512252 1 231 | 0.462925 -0.735639 1 232 | 0.575689 -0.589523 1 233 | -0.632094 -0.980422 -1 234 | -0.167642 -0.529201 -1 235 | 0.720368 -1.04008 1 236 | 0.750029 -0.538094 1 237 | 0.25173 -0.960771 1 238 | -0.724598 0.0744037 1 239 | -0.720427 -0.557209 -1 240 | -0.953844 0.477419 1 241 | 0.71149 -0.990116 1 242 | 0.290836 -0.443204 1 243 | 0.319746 -0.40071 1 244 | 0.233538 0.636726 1 245 | -0.195969 -0.990105 1 246 | -0.437767 0.0116531 1 247 | -0.35483 0.81982 1 248 | 0.347045 -0.545084 1 249 | 0.836376 0.343831 -1 250 | -0.713851 -0.640575 -1 251 | -------------------------------------------------------------------------------- /Week11/hw8/hw4_nnet_train.dat: -------------------------------------------------------------------------------- 1 | -0.77947 0.838221 1 2 | 0.155635 0.895377 1 3 | -0.0599077 -0.71778 1 4 | 0.207596 0.758933 1 5 | -0.195983 -0.375487 -1 6 | 0.588489 -0.842554 1 7 | 0.00719859 -0.548316 -1 8 | 0.738839 -0.603394 1 9 | 0.704648 -0.0204201 1 10 | 0.969927 0.641371 -1 11 | 0.435431 0.744773 -1 12 | -0.844258 0.742354 1 13 | 0.591425 -0.546021 1 14 | -0.0690931 0.03766 -1 15 | -0.951549 -0.733055 -1 16 | -0.129881 0.756761 1 17 | -0.495346 -0.566279 -1 18 | -0.903994 0.509221 1 19 | 0.292351 0.16089 -1 20 | 0.647986 -0.779338 1 21 | 0.375956 0.0782031 -1 22 | 0.24589 0.00451467 -1 23 | -0.457192 0.423905 1 24 | -0.441279 0.705719 1 25 | 0.507447 0.758726 -1 26 | -------------------------------------------------------------------------------- /Week11/hw8/hw8.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 9, 6 | "metadata": { 7 | "collapsed": true, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [], 12 | "source": [ 13 | "# q2-q3\n", 14 | "hidden = []\n", 15 | "\n", 16 | "def subset_sum(numbers, target, partial=[]):\n", 17 | " s = sum(partial)\n", 18 | "\n", 19 | " # check if the partial sum is equals to target\n", 20 | " if s == target:\n", 21 | " # print(\"sum(%s)=%s\" % (partial, target))\n", 22 | " hidden.append(partial)\n", 23 | " if s >= target:\n", 24 | " return # if we reach the number why bother to continue\n", 25 | "\n", 26 | " for i in range(len(numbers)):\n", 27 | " n = numbers[i]\n", 28 | " remaining = numbers[i:]\n", 29 | " subset_sum(remaining, target, partial + [n])" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 10, 35 | "metadata": { 36 | "collapsed": false, 37 | "deletable": true, 38 | "editable": true 39 | }, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "最多情况: 522 \n", 46 | "最少情况: 46\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "subset_sum([i+1 for i in range(36)], 36)\n", 52 | "maxi = 0; mini = 1000\n", 53 | "for i in range(len(hidden)):\n", 54 | " wnum = 0\n", 55 | " hidden[i].append(1)\n", 56 | " for j in range(len(hidden[i])-1):\n", 57 | " wnum += hidden[i][j]*hidden[i][j+1]\n", 58 | " wnum += 10*hidden[i][0]\n", 59 | " maxi = wnum if wnum>maxi else maxi\n", 60 | " mini = wnum if wnum=0] = 1\n", 161 | " Yhat[Yhat<0] = -1\n", 162 | " return np.sum(Yhat != Y)/row" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 34, 168 | "metadata": { 169 | "collapsed": true, 170 | "deletable": true, 171 | "editable": true 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "# 加载数据函数\n", 176 | "def loadData(filename):\n", 177 | " data = pd.read_csv(filename, sep='\\s+', header=None)\n", 178 | " data = data.as_matrix()\n", 179 | " col, row = data.shape\n", 180 | " X = np.c_[np.ones((col, 1)), data[:, 0: row-1]]\n", 181 | " Y = data[:, row-1: row]\n", 182 | " return X, Y" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 35, 188 | "metadata": { 189 | "collapsed": true, 190 | "deletable": true, 191 | "editable": true 192 | }, 193 | "outputs": [], 194 | "source": [ 195 | "X, Y = loadData('hw4_nnet_train.dat')\n", 196 | "Xtest, Ytest = loadData('hw4_nnet_test.dat')" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 36, 202 | "metadata": { 203 | "collapsed": false, 204 | "deletable": true, 205 | "editable": true 206 | }, 207 | "outputs": [ 208 | { 209 | "name": "stdout", 210 | "output_type": "stream", 211 | "text": [ 212 | "[ 0.30592 0.036192 0.03624 0.036248 0.036408]\n" 213 | ] 214 | } 215 | ], 216 | "source": [ 217 | "# Q11\n", 218 | "M = [1, 6, 11, 16, 21]\n", 219 | "eout = np.zeros((len(M),))\n", 220 | "for i in range(500):\n", 221 | " for j in range(len(M)):\n", 222 | " theta1, theta2 = nnetwork(X, Y, M[j], 0.1, 0.1, 50000)\n", 223 | " theta = [theta1, theta2]\n", 224 | " eout[j] += errfun(Xtest, Ytest, theta)\n", 225 | "print(eout/500)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 37, 231 | "metadata": { 232 | "collapsed": false, 233 | "deletable": true, 234 | "editable": true 235 | }, 236 | "outputs": [ 237 | { 238 | "name": "stdout", 239 | "output_type": "stream", 240 | "text": [ 241 | "[ 0.49328 0.036 0.16184 0.3832 0.40072]\n" 242 | ] 243 | } 244 | ], 245 | "source": [ 246 | "# Q12\n", 247 | "r = [0, 0.1, 10, 100, 1000]\n", 248 | "eout = np.zeros((len(r),))\n", 249 | "for i in range(50):\n", 250 | " for j in range(len(r)):\n", 251 | " theta1, theta2 = nnetwork(X, Y, 3, r[j], 0.1, 50000)\n", 252 | " theta = [theta1, theta2]\n", 253 | " eout[j] += errfun(Xtest, Ytest, theta)\n", 254 | "print(eout / 50)" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 38, 260 | "metadata": { 261 | "collapsed": false, 262 | "deletable": true, 263 | "editable": true 264 | }, 265 | "outputs": [ 266 | { 267 | "name": "stdout", 268 | "output_type": "stream", 269 | "text": [ 270 | "[ 0.0808 0.036 0.036 0.4832 0.52 ]\n" 271 | ] 272 | } 273 | ], 274 | "source": [ 275 | "# Q13\n", 276 | "eta = [0.001, 0.01, 0.1, 1, 10]\n", 277 | "eout = np.zeros((len(eta),))\n", 278 | "for i in range(5):\n", 279 | " for j in range(len(eta)):\n", 280 | " theta1, theta2 = nnetwork(X, Y, 3, 0.1, eta[j], 50000)\n", 281 | " theta = [theta1, theta2]\n", 282 | " eout[j] += errfun(Xtest, Ytest, theta)\n", 283 | "print(eout / 5)" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 1, 289 | "metadata": { 290 | "collapsed": true, 291 | "deletable": true, 292 | "editable": true 293 | }, 294 | "outputs": [], 295 | "source": [ 296 | "# 多层神经网络\n", 297 | "def nnetwork2hidden(X, Y, d1, d2, T):\n", 298 | " row, col = X.shape\n", 299 | " theta1 = np.random.uniform(-0.1, 0.1, (col, d1))\n", 300 | " theta2 = np.random.uniform(-0.1, 0.1, (d1+1, d2))\n", 301 | " theta3 = np.random.uniform(-0.1, 0.1, (d2+1, 1))\n", 302 | " for i in range(T):\n", 303 | " # 前向传播\n", 304 | " randpos = np.random.randint(0, row)\n", 305 | " xone = X[randpos: randpos+1, :]\n", 306 | " yone = Y[randpos]\n", 307 | " s1 = xone.dot(theta1)\n", 308 | " x1 = np.tanh(s1)\n", 309 | " x1 = np.c_[np.ones((1, 1)), x1]\n", 310 | " s2 = x1.dot(theta2)\n", 311 | " x2 = np.tanh(s2)\n", 312 | " x2 = np.c_[np.ones((1, 1)), x2]\n", 313 | " s3 = x2.dot(theta3)\n", 314 | " x3 = np.tanh(s3)[0][0]\n", 315 | " delta3 = -2*(yone-x3)\n", 316 | " delta2 = delta3*theta3[1:, :].T*dertanh(s2)\n", 317 | " delta1 = delta2.dot(theta2[1:, :].T)*dertanh(s1)\n", 318 | " theta3 -= 0.01*x2.T*delta3\n", 319 | " theta2 -= 0.01*x1.T*delta2\n", 320 | " theta1 -= 0.01*xone.T.dot(delta1)\n", 321 | " return theta1, theta2, theta3" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 13, 327 | "metadata": { 328 | "collapsed": false 329 | }, 330 | "outputs": [ 331 | { 332 | "name": "stdout", 333 | "output_type": "stream", 334 | "text": [ 335 | "0.036\n" 336 | ] 337 | } 338 | ], 339 | "source": [ 340 | "# Q14\n", 341 | "eout = 0\n", 342 | "for i in range(50):\n", 343 | " theta1, theta2, theta3 = nnetwork2hidden(X, Y, 8, 3, 50000)\n", 344 | " theta = [theta1, theta2, theta3]\n", 345 | " eout += errfun(Xtest, Ytest, theta)\n", 346 | "print(eout/50)" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 15, 352 | "metadata": { 353 | "collapsed": true 354 | }, 355 | "outputs": [], 356 | "source": [ 357 | "#---------kNN----------------\n", 358 | "def kNNeighbor(k, xpred, X, Y):\n", 359 | " xmin = np.sum((xpred - X)**2, 1)\n", 360 | " pos = np.argsort(xmin, 0)\n", 361 | " Ypred = Y[pos[0:k]]\n", 362 | " Ypred = np.sum(Ypred)\n", 363 | " Ypred = 1 if Ypred>=0 else -1\n", 364 | " return Ypred" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 16, 370 | "metadata": { 371 | "collapsed": true 372 | }, 373 | "outputs": [], 374 | "source": [ 375 | "# 预测函数\n", 376 | "def predict(Xtest, X, Y, k):\n", 377 | " row, col = Xtest.shape\n", 378 | " Ypred = np.zeros((row, 1))\n", 379 | " for i in range(row):\n", 380 | " Ypred[i] = kNNeighbor(k, Xtest[i, :], X, Y)\n", 381 | " return Ypred" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 17, 387 | "metadata": { 388 | "collapsed": true 389 | }, 390 | "outputs": [], 391 | "source": [ 392 | "# 加载数据函数\n", 393 | "def loadData(filename):\n", 394 | " data = pd.read_csv(filename, sep='\\s+', header=None)\n", 395 | " data = data.as_matrix()\n", 396 | " col, row = data.shape\n", 397 | " X = data[:, 0: row-1]\n", 398 | " Y = data[:, row-1:row]\n", 399 | " return X, Y" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 18, 405 | "metadata": { 406 | "collapsed": true 407 | }, 408 | "outputs": [], 409 | "source": [ 410 | "# 导入数据\n", 411 | "X, Y = loadData('hw4_knn_train.dat')\n", 412 | "Xtest, Ytest = loadData('hw4_knn_test.dat')" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": 19, 418 | "metadata": { 419 | "collapsed": false 420 | }, 421 | "outputs": [ 422 | { 423 | "name": "stdout", 424 | "output_type": "stream", 425 | "text": [ 426 | "0.344\n" 427 | ] 428 | } 429 | ], 430 | "source": [ 431 | "# Q15-Q16\n", 432 | "Yhat = predict(Xtest, X, Y, 1)\n", 433 | "eout = np.sum(Yhat!=Ytest)/Ytest.shape[0]\n", 434 | "print(eout)" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": 20, 440 | "metadata": { 441 | "collapsed": false 442 | }, 443 | "outputs": [ 444 | { 445 | "name": "stdout", 446 | "output_type": "stream", 447 | "text": [ 448 | "0.16 0.316\n" 449 | ] 450 | } 451 | ], 452 | "source": [ 453 | "# Q17-Q18\n", 454 | "Yhat1 = predict(X, X, Y, 5)\n", 455 | "Yhat2 = predict(Xtest, X, Y, 5)\n", 456 | "ein = np.sum(Yhat1 != Y) / Y.shape[0]\n", 457 | "eout = np.sum(Yhat2 != Ytest) / Ytest.shape[0]\n", 458 | "print(ein, eout)" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 21, 464 | "metadata": { 465 | "collapsed": true 466 | }, 467 | "outputs": [], 468 | "source": [ 469 | "# -----------kMeans------------\n", 470 | "def kMean(k, X):\n", 471 | " row, col = X.shape\n", 472 | " pos = np.random.permutation(row)\n", 473 | " mu = X[pos[0: k], :]\n", 474 | " epsilon = 1e-5; simi = 1\n", 475 | " while simi>epsilon:\n", 476 | " S = np.zeros((row, k))\n", 477 | " for i in range(k):\n", 478 | " S[:, i] = np.sum((X-mu[i, :])**2, 1)\n", 479 | " tempmu = mu.copy()\n", 480 | " pos = np.argmin(S, 1)\n", 481 | " for i in range(k):\n", 482 | " mu[i, :] = np.mean(X[pos == i, :], 0)\n", 483 | " simi = np.sum(tempmu-mu)\n", 484 | " return mu" 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "execution_count": 22, 490 | "metadata": { 491 | "collapsed": true 492 | }, 493 | "outputs": [], 494 | "source": [ 495 | "def errfun(X, mu):\n", 496 | " row, col = X.shape\n", 497 | " k = mu.shape[0]\n", 498 | " err = 0\n", 499 | " S = np.zeros((row, k))\n", 500 | " for i in range(k):\n", 501 | " S[:, i] = np.sum((X - mu[i, :]) ** 2, 1)\n", 502 | " pos = np.argmin(S, 1)\n", 503 | " for i in range(k):\n", 504 | " err += np.sum((X[pos == i, :]-mu[i, :])**2)\n", 505 | " return err/row" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 23, 511 | "metadata": { 512 | "collapsed": true 513 | }, 514 | "outputs": [], 515 | "source": [ 516 | "# 加载数据函数\n", 517 | "def loadData(filename):\n", 518 | " data = pd.read_csv(filename, sep='\\s+', header=None)\n", 519 | " data = data.as_matrix()\n", 520 | " return data" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": 24, 526 | "metadata": { 527 | "collapsed": true 528 | }, 529 | "outputs": [], 530 | "source": [ 531 | "# 导入数据\n", 532 | "X = loadData('hw4_kmeans_train.dat')" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": 26, 538 | "metadata": { 539 | "collapsed": false 540 | }, 541 | "outputs": [ 542 | { 543 | "name": "stdout", 544 | "output_type": "stream", 545 | "text": [ 546 | "2.71678714378\n" 547 | ] 548 | } 549 | ], 550 | "source": [ 551 | "# Q19\n", 552 | "err = 0\n", 553 | "for i in range(100):\n", 554 | " mu = kMean(2, X)\n", 555 | " err += errfun(X, mu)\n", 556 | "print(err/100)" 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": 27, 562 | "metadata": { 563 | "collapsed": false 564 | }, 565 | "outputs": [ 566 | { 567 | "name": "stdout", 568 | "output_type": "stream", 569 | "text": [ 570 | "1.79117604501\n" 571 | ] 572 | } 573 | ], 574 | "source": [ 575 | "# Q20\n", 576 | "err = 0\n", 577 | "for i in range(100):\n", 578 | " mu = kMean(10, X)\n", 579 | " err += errfun(X, mu)\n", 580 | "print(err/100)" 581 | ] 582 | } 583 | ], 584 | "metadata": { 585 | "kernelspec": { 586 | "display_name": "Python 3", 587 | "language": "python", 588 | "name": "python3" 589 | }, 590 | "language_info": { 591 | "codemirror_mode": { 592 | "name": "ipython", 593 | "version": 3 594 | }, 595 | "file_extension": ".py", 596 | "mimetype": "text/x-python", 597 | "name": "python", 598 | "nbconvert_exporter": "python", 599 | "pygments_lexer": "ipython3", 600 | "version": "3.6.0" 601 | } 602 | }, 603 | "nbformat": 4, 604 | "nbformat_minor": 2 605 | } 606 | -------------------------------------------------------------------------------- /Week12/README.md: -------------------------------------------------------------------------------- 1 | ## 天池 O2O 优惠卷使用预测比赛解析与代码分析(进阶) 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /Week2/README.md: -------------------------------------------------------------------------------- 1 | ## 包含文件 2 | 3 | - hw2:第二周作业 4 | 5 | ## 主要内容 6 | 7 | 视频地址: 8 | 9 | https://www.bilibili.com/video/av36731342/?p=18 10 | 11 | https://www.bilibili.com/video/av36731342/?p=19 12 | 13 | https://www.bilibili.com/video/av36731342/?p=20 14 | 15 | https://www.bilibili.com/video/av36731342/?p=21 16 | 17 | https://www.bilibili.com/video/av36731342/?p=22 18 | 19 | https://www.bilibili.com/video/av36731342/?p=23 20 | 21 | https://www.bilibili.com/video/av36731342/?p=24 22 | 23 | https://www.bilibili.com/video/av36731342/?p=25 24 | 25 | https://www.bilibili.com/video/av36731342/?p=26 26 | 27 | https://www.bilibili.com/video/av36731342/?p=27 28 | 29 | https://www.bilibili.com/video/av36731342/?p=28 30 | 31 | https://www.bilibili.com/video/av36731342/?p=29 32 | 33 | https://www.bilibili.com/video/av36731342/?p=30 34 | 35 | https://www.bilibili.com/video/av36731342/?p=31 36 | 37 | https://www.bilibili.com/video/av36731342/?p=32 38 | 39 | https://www.bilibili.com/video/av36731342/?p=33 40 | 41 | 42 | 参考资料: 43 | 44 | https://redstonewill.com/80/ 45 | 46 | https://redstonewill.com/217/ 47 | 48 | https://redstonewill.com/222/ 49 | 50 | https://redstonewill.com/227/ 51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /Week2/hw2/DecisionStump.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from preprocess import * 3 | 4 | class Decision(object): 5 | 6 | def __init__(self): 7 | pass 8 | 9 | # First,Generate a data set of size 20 by the procedure above and run the one-dimensional decisionstump algorithm on the data set. 10 | # Record Ein. Repeat the experiment (including data generation, running the decision stump algorithm, and computing Ein) 5,000 times. 11 | # What is the average Ein ? 12 | def calculate_Ein(self, X, Y): 13 | # calculate median of interval & negative infinite & positive infinite 14 | thetas = np.array([float("-inf")] + [(X[i] + X[i + 1]) / 2 for i in range(0, X.shape[0] - 1)] + [float("inf")]) 15 | Ein = X.shape[0] 16 | sign = 1 17 | target_theta = 0.0 18 | 19 | ### YOUR CODE HERE 20 | 21 | 22 | 23 | ### END YOUR CODE 24 | 25 | # two corner cases 26 | if target_theta == float("inf"): 27 | target_theta = 1.0 28 | if target_theta == float("-inf"): 29 | target_theta = -1.0 30 | Ein = Ein / X.shape[0] # mean of Ein 31 | return Ein, target_theta, sign 32 | 33 | # Repeat the experiment (including data generation, running the decision stump algorithm, and computing Ein) 5,000 times. 34 | # What is the average Ein 35 | def decision_ray(self): 36 | T = 5000 # iteration 37 | Ein_all = [] # list for all Ein 38 | 39 | ### YOUR CODE HERE 40 | 41 | 42 | 43 | ### END YOUR CODE 44 | 45 | # mean of Ein 46 | Ein_mean = np.mean(Ein_all) 47 | return Ein_mean 48 | 49 | # Run the algorithm on the Dtrain . What is the Ein of the optimal decision stump? 50 | def decision_dtrain(self, path): 51 | X, Y = read_input_data(path) 52 | # record optimal descision stump parameters 53 | Ein = 1.0 54 | theta = 0 55 | sign = 1 56 | index = 0 57 | 58 | ### YOUR CODE HERE 59 | 60 | 61 | 62 | ### END YOUR CODE 63 | 64 | return Ein, theta, sign, index 65 | 66 | # Use the returned decision stump to predict the label of each example within the Dtest . Report an estimate Etest. 67 | def decision_dtest(self, path, theta, sign, index): 68 | 69 | ### YOUR CODE HERE 70 | 71 | 72 | 73 | ### END YOUR CODE 74 | 75 | return Etest -------------------------------------------------------------------------------- /Week2/hw2/data/hw2_train.dat: -------------------------------------------------------------------------------- 1 | 8.105 -3.500 4.769 4.541 -9.829 5.252 3.838 -3.408 -4.824 -1 2 | -6.273 -2.097 9.404 1.143 3.487 -5.206 0.061 5.024 -6.687 1 3 | 1.624 -1.173 4.260 -3.607 -6.632 4.431 -8.355 7.206 -8.977 1 4 | -10.000 7.758 -2.670 -8.880 -1.099 -9.183 -4.086 8.962 5.841 1 5 | 8.464 1.762 2.729 2.724 8.155 6.096 -2.844 9.800 3.302 -1 6 | -0.135 6.193 7.705 7.195 7.313 -3.395 8.012 -6.773 -4.433 1 7 | 0.934 -8.379 -2.083 -6.337 4.346 -3.928 9.759 -8.499 -4.128 1 8 | 8.923 -0.018 -6.837 6.628 -2.823 -9.524 -6.767 -4.811 -6.296 1 9 | -9.028 7.010 -9.063 -1.111 -9.328 5.282 4.960 -9.569 6.784 -1 10 | -9.706 1.392 6.562 -6.543 -1.980 -6.261 -6.067 1.254 -1.071 1 11 | -6.891 -4.157 1.057 -5.954 4.732 1.729 9.328 -0.308 2.160 1 12 | -0.845 -5.858 -0.486 -4.282 -2.401 7.534 -0.543 1.531 -1.212 -1 13 | -9.596 -3.929 9.556 1.461 0.117 4.288 -6.810 -0.555 -6.020 1 14 | 9.124 7.287 -7.506 -1.363 -6.995 0.093 -3.828 2.462 -8.376 1 15 | 7.514 7.608 -0.175 7.071 -0.931 9.942 1.359 2.259 -0.613 -1 16 | -1.805 -2.265 -9.636 0.689 6.373 -6.631 -9.218 -7.456 5.831 -1 17 | -3.048 8.819 -8.509 6.777 5.889 0.560 6.719 -2.752 -7.181 -1 18 | -5.873 -9.376 -3.226 -5.509 1.313 -6.853 -2.140 2.095 -4.309 -1 19 | 4.250 -5.350 -6.683 5.741 -8.574 9.207 -3.699 8.145 -3.545 -1 20 | 8.587 -0.571 -7.906 -4.638 3.920 3.407 -1.491 -8.220 -4.498 1 21 | -8.107 0.089 -7.650 -4.790 -4.171 -6.223 -5.583 2.130 -8.078 1 22 | -8.616 9.386 -9.095 -6.522 -5.252 4.825 6.886 3.256 6.605 -1 23 | -10.000 -3.258 -1.998 -7.559 1.952 3.832 -3.782 6.369 -4.038 1 24 | -4.212 -1.462 -2.603 -3.308 2.016 2.144 -8.483 -1.099 -4.600 1 25 | 8.112 3.770 -5.551 -3.885 6.211 6.401 9.946 -7.571 2.770 -1 26 | -8.868 0.669 5.703 -1.472 7.361 -2.282 -9.328 8.879 6.620 1 27 | 6.635 5.312 5.358 -8.916 -8.574 1.569 7.485 -8.628 3.998 1 28 | 7.432 -8.466 -9.884 3.135 0.062 7.477 -9.147 0.734 6.355 -1 29 | -3.031 2.371 -4.132 -7.674 3.454 -2.706 3.895 0.939 -1.334 1 30 | -10.000 -1.108 7.883 -7.978 -7.973 -2.055 9.498 -7.120 8.679 1 31 | 10.000 2.703 -6.408 -4.365 5.029 7.046 2.929 -1.076 -2.015 -1 32 | 3.891 1.182 -0.468 1.774 3.203 1.559 9.719 2.702 4.439 -1 33 | -4.895 7.533 3.229 -1.304 -6.832 -1.742 -4.258 6.097 7.182 1 34 | -6.454 -0.875 4.457 3.077 -9.100 -2.340 -5.364 -9.381 -10.000 -1 35 | 4.393 8.004 -5.783 -2.378 -3.299 -2.615 5.880 2.443 -6.518 1 36 | 0.337 2.622 -4.467 -5.206 -4.301 -3.567 2.454 0.335 -2.949 1 37 | -1.583 7.670 6.972 2.634 -4.708 -6.327 -9.980 -8.828 6.116 1 38 | -8.917 1.634 -6.017 -3.384 6.428 -0.318 3.049 -1.118 -10.000 1 39 | -4.864 1.848 0.375 -7.892 -5.517 5.667 -4.218 -5.498 6.839 -1 40 | 5.545 3.762 -5.996 9.528 -9.622 -9.568 -0.789 3.427 -0.686 -1 41 | 1.361 -5.169 -3.709 -8.264 -3.060 0.774 7.403 2.721 5.276 -1 42 | 7.686 4.347 -0.279 -8.310 3.875 0.099 -7.878 -6.914 -6.474 1 43 | 6.890 -7.670 -8.421 -6.819 -5.934 -1.481 3.954 -8.532 -8.760 1 44 | -1.530 8.711 -0.993 8.191 -9.599 -7.117 -1.710 -7.477 -4.031 1 45 | -4.384 3.295 1.583 -2.805 6.476 5.649 5.713 0.430 7.117 -1 46 | -2.528 -9.359 2.564 6.479 8.832 2.966 9.362 -2.878 5.489 1 47 | 2.867 3.421 9.149 -5.550 -9.384 5.625 -9.901 6.329 -3.945 1 48 | -6.103 3.564 8.529 6.461 0.044 7.361 -0.573 -0.595 -5.517 -1 49 | -10.000 1.217 -5.353 9.365 5.667 -4.737 4.989 5.765 -8.408 -1 50 | -5.352 -3.079 4.530 -6.823 -6.618 -5.426 -9.462 2.809 3.979 1 51 | 9.667 2.303 8.283 -5.686 1.668 3.949 -0.423 -3.343 -0.286 1 52 | -2.993 9.110 2.642 -8.462 -7.713 6.024 -3.888 -7.175 -1.167 1 53 | 5.873 5.954 0.947 4.155 -9.732 -7.385 -1.896 -0.155 -0.728 1 54 | -3.765 4.062 0.545 8.877 5.600 2.833 4.901 -8.289 5.658 -1 55 | -1.065 -3.518 5.746 9.882 -9.363 6.014 -7.503 -1.259 -4.141 -1 56 | -9.823 3.309 -2.012 0.723 2.186 -6.412 -6.445 -2.913 -4.701 1 57 | -7.490 0.047 -5.807 8.256 -0.070 -5.170 4.271 2.427 3.572 -1 58 | -9.071 3.115 -9.485 -1.083 -6.162 2.701 2.505 -2.607 9.788 1 59 | -7.382 1.835 -8.231 -3.189 0.091 1.698 1.642 -5.638 -5.875 1 60 | 2.551 2.422 4.373 3.066 -8.661 8.210 -4.233 3.844 -4.397 -1 61 | -2.114 9.172 3.369 -0.345 -4.017 -6.540 -8.647 7.625 -2.178 1 62 | 5.056 -9.265 6.228 -0.571 3.801 7.567 -2.361 9.569 1.411 -1 63 | -3.013 -0.825 8.785 -9.643 8.830 -5.231 -6.183 -9.817 -7.606 1 64 | -2.241 4.515 4.151 -6.012 -6.056 -2.047 -8.445 1.584 -2.479 1 65 | 5.637 7.266 -6.890 4.422 7.623 -8.061 9.191 -8.560 -7.878 -1 66 | -9.766 -5.208 -8.244 4.386 -1.221 -4.299 -7.662 0.334 7.284 -1 67 | 6.440 4.960 -0.344 9.550 -0.618 -2.722 -8.511 -1.426 -1.281 -1 68 | 8.634 7.211 -6.378 -9.609 1.597 2.401 -3.909 3.935 -7.265 1 69 | 7.875 -7.259 -9.684 -2.469 -7.710 -0.301 4.809 -6.221 8.272 -1 70 | -5.843 7.417 -7.380 -2.221 7.808 4.217 -9.820 -6.101 -1.848 1 71 | 4.305 0.635 -9.011 4.622 8.166 -6.721 -5.679 2.975 -2.941 -1 72 | 6.433 -4.014 0.649 9.053 3.765 -1.543 3.269 3.946 2.356 -1 73 | 1.617 -9.885 -6.974 2.606 4.737 -8.808 5.885 9.057 4.168 -1 74 | 0.624 -0.892 8.487 -8.727 -1.840 2.252 -0.271 -8.570 -3.802 1 75 | 4.106 -2.164 -1.017 7.132 -9.558 -6.280 8.325 6.327 -7.223 1 76 | 5.663 -2.714 -3.790 4.150 -1.441 4.370 -3.598 8.288 5.800 -1 77 | -5.474 6.195 -7.293 3.509 3.328 -6.851 7.229 1.652 9.476 -1 78 | -8.465 -7.029 -7.304 -2.255 7.120 1.255 -7.885 -6.478 -0.456 1 79 | 1.437 6.306 -1.798 4.145 -0.185 -8.470 7.294 -2.956 3.182 1 80 | 0.927 3.018 -2.395 3.623 -9.236 -5.275 -5.121 -7.121 -1.753 1 81 | 6.346 -1.202 2.456 -5.452 -7.057 -7.729 -3.923 -9.763 -0.685 1 82 | -8.780 -6.548 -9.133 -1.175 7.075 -8.370 3.550 -8.046 -5.491 1 83 | -7.684 7.061 1.463 4.771 -8.391 4.406 7.042 -2.314 4.643 -1 84 | 0.571 -5.249 -2.373 1.438 3.575 -5.297 3.069 -2.875 -3.343 1 85 | -4.453 7.404 -9.191 7.010 2.175 -7.582 1.417 -0.783 0.104 -1 86 | -8.114 -1.131 -4.669 -0.486 -9.693 8.906 4.216 3.376 -3.969 -1 87 | -2.346 9.384 -2.555 -1.536 6.394 9.620 0.882 -2.189 -1.162 -1 88 | 8.614 3.468 1.580 -6.056 -7.018 1.887 -7.150 7.198 -4.737 -1 89 | 3.875 -0.368 -0.563 -8.680 8.095 -4.169 -9.060 -1.023 3.642 1 90 | 6.901 -3.390 2.563 -1.520 0.554 5.544 -9.633 3.405 2.742 -1 91 | 1.901 9.995 -7.577 -8.662 -8.685 -9.482 -2.830 -7.745 -0.505 1 92 | -2.580 -6.876 4.063 9.982 1.604 -5.383 5.527 1.971 8.022 -1 93 | 1.874 1.349 -3.578 4.296 2.687 -2.263 4.814 9.857 -0.008 -1 94 | 1.218 6.413 1.371 -4.719 6.396 -7.025 -0.102 1.922 4.946 1 95 | 4.655 1.148 -6.657 -8.923 -4.556 6.031 -1.186 -9.741 5.888 1 96 | -0.921 9.551 -8.037 -9.549 -5.168 8.359 -6.574 4.731 0.281 1 97 | -7.088 -4.467 -9.106 -3.745 -3.390 -3.662 -7.714 5.423 -3.404 1 98 | -9.721 -5.860 9.048 -7.758 -5.410 -6.119 -9.399 -1.984 8.611 1 99 | 1.099 -9.784 7.673 1.993 -3.529 -5.718 8.331 -1.243 9.706 -1 100 | 5.588 -8.062 3.135 4.636 -5.819 7.725 8.517 -5.218 -4.259 -1 101 | -------------------------------------------------------------------------------- /Week2/hw2/hw2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## hw2-1" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "from preprocess import *\n", 18 | "from DecisionStump import *" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "**Repeat the experiment (including data generation, running the decision stump algorithm, and computing Ein) 5,000 times. What is the average Ein?**" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "decision = Decision()\n", 35 | "Ein_mean = decision.decision_ray()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "text/plain": [ 46 | "0.17014000000000001" 47 | ] 48 | }, 49 | "execution_count": 3, 50 | "metadata": {}, 51 | "output_type": "execute_result" 52 | } 53 | ], 54 | "source": [ 55 | "Ein_mean" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "## hw2-2" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "**Run the algorithm on the Dtrain . What is the Ein of the optimal decision stump? **" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "theta = 1.617500\tsign = -1\tindex = 3\tEin = 0.250000\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "Ein, theta, sign, index = decision.decision_dtrain('./data/hw2_train.dat')\n", 87 | "\n", 88 | "print(('theta = %f\\tsign = %d\\tindex = %d\\tEin = %f') % (theta, sign, index, Ein))" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 5, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "Etest = 0.355\n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "path = './data/hw2_test.dat'\n", 106 | "Etest = decision.decision_dtest(path, theta, sign, index)\n", 107 | "print('Etest = ', Etest)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "collapsed": true 115 | }, 116 | "outputs": [], 117 | "source": [] 118 | } 119 | ], 120 | "metadata": { 121 | "kernelspec": { 122 | "display_name": "Python 3", 123 | "language": "python", 124 | "name": "python3" 125 | }, 126 | "language_info": { 127 | "codemirror_mode": { 128 | "name": "ipython", 129 | "version": 3 130 | }, 131 | "file_extension": ".py", 132 | "mimetype": "text/x-python", 133 | "name": "python", 134 | "nbconvert_exporter": "python", 135 | "pygments_lexer": "ipython3", 136 | "version": "3.6.3" 137 | } 138 | }, 139 | "nbformat": 4, 140 | "nbformat_minor": 2 141 | } 142 | -------------------------------------------------------------------------------- /Week2/hw2/preprocess.py: -------------------------------------------------------------------------------- 1 | # hw2 2 | import numpy as np 3 | 4 | # generate random data 5 | def generate_input_data(time_seed): 6 | np.random.seed(time_seed) 7 | raw_X = np.sort(np.random.uniform(-1, 1, 20)) 8 | # 加20%噪声 9 | noised_y = np.sign(raw_X) * np.where(np.random.random(raw_X.shape[0]) < 0.2, -1, 1) 10 | return raw_X, noised_y 11 | 12 | # load the data 13 | def read_input_data(path): 14 | x = [] 15 | y = [] 16 | for line in open(path).readlines(): 17 | items = line.strip().split(' ') 18 | tmp_x = [] 19 | for i in range(0, len(items) - 1): tmp_x.append(float(items[i])) 20 | x.append(tmp_x) 21 | y.append(float(items[-1])) 22 | return np.array(x), np.array(y) 23 | 24 | -------------------------------------------------------------------------------- /Week2/reference_hw2/DecisionStump.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from preprocess import * 3 | 4 | class Decision(object): 5 | 6 | def __init__(self): 7 | pass 8 | 9 | # First,Generate a data set of size 20 by the procedure above and run the one-dimensional decisionstump algorithm on the data set. 10 | # Record Ein. Repeat the experiment (including data generation, running the decision stump algorithm, and computing Ein) 5,000 times. 11 | # What is the average Ein ? 12 | def calculate_Ein(self, X, Y): 13 | # calculate median of interval & negative infinite & positive infinite 14 | thetas = np.array([float("-inf")] + [(X[i] + X[i + 1]) / 2 for i in range(0, X.shape[0] - 1)] + [float("inf")]) 15 | Ein = X.shape[0] 16 | sign = 1 17 | target_theta = 0.0 18 | # positive and negative rays 19 | for theta in thetas: 20 | y_positive = np.where(X > theta, 1, -1) 21 | y_negative = np.where(X < theta, 1, -1) 22 | error_positive = sum(y_positive != Y) 23 | error_negative = sum(y_negative != Y) 24 | if error_positive > error_negative: 25 | if Ein > error_negative: 26 | Ein = error_negative 27 | sign = -1 28 | target_theta = theta 29 | else: 30 | if Ein > error_positive: 31 | Ein = error_positive 32 | sign = 1 33 | target_theta = theta 34 | # two corner cases 35 | if target_theta == float("inf"): 36 | target_theta = 1.0 37 | if target_theta == float("-inf"): 38 | target_theta = -1.0 39 | Ein = Ein / X.shape[0] # mean of Ein 40 | return Ein, target_theta, sign 41 | 42 | # Repeat the experiment (including data generation, running the decision stump algorithm, and computing Ein) 5,000 times. 43 | # What is the average Ein 44 | def decision_ray(self): 45 | T = 5000 # iteration 46 | Ein_all = [] # list for all Ein 47 | 48 | for i in range(T): 49 | X, Y = generate_input_data(i) 50 | Ein, theta, sign = self.calculate_Ein(X, Y) 51 | Ein_all.append(Ein) 52 | #print(('Iter = %d\t Ein = %f') % (i+1, Ein)) 53 | 54 | # mean of Ein 55 | Ein_mean = np.mean(Ein_all) 56 | return Ein_mean 57 | 58 | # Run the algorithm on the Dtrain . What is the Ein of the optimal decision stump? 59 | def decision_dtrain(self, path): 60 | X, Y = read_input_data(path) 61 | # record optimal descision stump parameters 62 | Ein = 1.0 63 | theta = 0 64 | sign = 1 65 | index = 0 66 | # multi decision stump optimal process 67 | for i in range(0, X.shape[1]): 68 | input_x = X[:, i] 69 | input_data = np.transpose(np.array([input_x, Y])) 70 | input_data = input_data[np.argsort(input_data[:, 0])] 71 | curr_Ein, curr_theta, curr_sign = self.calculate_Ein(input_data[:, 0], input_data[:, 1]) 72 | if Ein > curr_Ein: 73 | Ein = curr_Ein 74 | theta = curr_theta 75 | sign = curr_sign 76 | index = i 77 | return Ein, theta, sign, index 78 | 79 | # Use the returned decision stump to predict the label of each example within the Dtest . Report an estimate Etest. 80 | def decision_dtest(self, path, theta, sign, index): 81 | # test process 82 | test_x, test_y = read_input_data(path) 83 | test_x = test_x[:, index] 84 | predict_y = np.array([]) 85 | if sign == 1: 86 | predict_y = np.where(test_x > theta, 1.0, -1.0) 87 | else: 88 | predict_y = np.where(test_x < theta, 1.0, -1.0) 89 | Etest = np.mean(predict_y != test_y) 90 | return Etest -------------------------------------------------------------------------------- /Week2/reference_hw2/hw2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## hw2-1" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "from preprocess import *\n", 18 | "from DecisionStump import *" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "**Repeat the experiment (including data generation, running the decision stump algorithm, and computing Ein) 5,000 times. What is the average Ein?**" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "decision = Decision()\n", 35 | "Ein_mean = decision.decision_ray()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "text/plain": [ 46 | "0.17014000000000001" 47 | ] 48 | }, 49 | "execution_count": 3, 50 | "metadata": {}, 51 | "output_type": "execute_result" 52 | } 53 | ], 54 | "source": [ 55 | "Ein_mean" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "## hw2-2" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "**Run the algorithm on the Dtrain . What is the Ein of the optimal decision stump? **" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "theta = 1.617500\tsign = -1\tindex = 3\tEin = 0.250000\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "Ein, theta, sign, index = decision.decision_dtrain('./data/hw2_train.dat')\n", 87 | "\n", 88 | "print(('theta = %f\\tsign = %d\\tindex = %d\\tEin = %f') % (theta, sign, index, Ein))" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 5, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "Etest = 0.355\n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "path = './data/hw2_test.dat'\n", 106 | "Etest = decision.decision_dtest(path, theta, sign, index)\n", 107 | "print('Etest = ', Etest)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "collapsed": true 115 | }, 116 | "outputs": [], 117 | "source": [] 118 | } 119 | ], 120 | "metadata": { 121 | "kernelspec": { 122 | "display_name": "Python 3", 123 | "language": "python", 124 | "name": "python3" 125 | }, 126 | "language_info": { 127 | "codemirror_mode": { 128 | "name": "ipython", 129 | "version": 3 130 | }, 131 | "file_extension": ".py", 132 | "mimetype": "text/x-python", 133 | "name": "python", 134 | "nbconvert_exporter": "python", 135 | "pygments_lexer": "ipython3", 136 | "version": "3.6.3" 137 | } 138 | }, 139 | "nbformat": 4, 140 | "nbformat_minor": 2 141 | } 142 | -------------------------------------------------------------------------------- /Week2/reference_hw2/preprocess.py: -------------------------------------------------------------------------------- 1 | # hw2 2 | import numpy as np 3 | 4 | # generate random data 5 | def generate_input_data(time_seed): 6 | np.random.seed(time_seed) 7 | raw_X = np.sort(np.random.uniform(-1, 1, 20)) 8 | # 加20%噪声 9 | noised_y = np.sign(raw_X) * np.where(np.random.random(raw_X.shape[0]) < 0.2, -1, 1) 10 | return raw_X, noised_y 11 | 12 | # load the data 13 | def read_input_data(path): 14 | x = [] 15 | y = [] 16 | for line in open(path).readlines(): 17 | items = line.strip().split(' ') 18 | tmp_x = [] 19 | for i in range(0, len(items) - 1): tmp_x.append(float(items[i])) 20 | x.append(tmp_x) 21 | y.append(float(items[-1])) 22 | return np.array(x), np.array(y) 23 | 24 | -------------------------------------------------------------------------------- /Week3/README.md: -------------------------------------------------------------------------------- 1 | ## 包含文件 2 | 3 | - hw3:第三周作业 4 | 5 | ## 主要内容 6 | 7 | 视频地址: 8 | 9 | https://www.bilibili.com/video/av36731342/?p=34 10 | 11 | https://www.bilibili.com/video/av36731342/?p=35 12 | 13 | https://www.bilibili.com/video/av36731342/?p=36 14 | 15 | https://www.bilibili.com/video/av36731342/?p=37 16 | 17 | https://www.bilibili.com/video/av36731342/?p=38 18 | 19 | https://www.bilibili.com/video/av36731342/?p=39 20 | 21 | https://www.bilibili.com/video/av36731342/?p=40 22 | 23 | https://www.bilibili.com/video/av36731342/?p=41 24 | 25 | https://www.bilibili.com/video/av36731342/?p=42 26 | 27 | https://www.bilibili.com/video/av36731342/?p=43 28 | 29 | https://www.bilibili.com/video/av36731342/?p=44 30 | 31 | https://www.bilibili.com/video/av36731342/?p=45 32 | 33 | https://www.bilibili.com/video/av36731342/?p=46 34 | 35 | https://www.bilibili.com/video/av36731342/?p=47 36 | 37 | https://www.bilibili.com/video/av36731342/?p=48 38 | 39 | https://www.bilibili.com/video/av36731342/?p=49 40 | 41 | 参考资料: 42 | 43 | https://redstonewill.com/232/ 44 | 45 | https://redstonewill.com/236/ 46 | 47 | https://redstonewill.com/243/ 48 | 49 | https://redstonewill.com/246/ 50 | 51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /Week3/hw3/LR.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class LinearRegression(object): 4 | 5 | def __init__(self): 6 | pass 7 | 8 | # Q1: Carry out Linear Regression without transformation, i.e., with feature vector:(1,x1 ,x2), 9 | # Run the experiments for 1000 times. What is the average E in over 1000 experiments? 10 | 11 | # generate target function: f(x1, x2) = sign(x1*x1 + x2*x2 - 0.6) 12 | def target_function(self, X1, X2): 13 | y = np.ones(X1.shape[0]) 14 | y[(X1*X1 + X2*X2 - 0.6) < 0] = -1 15 | 16 | return y 17 | 18 | 19 | # create train data with 10% flipping noise 20 | def generate_data_1d(self, time_seed): 21 | np.random.seed(time_seed) 22 | X = np.zeros((1000, 3)) # 1000 train data 23 | y = np.zeros((1000, 1)) 24 | 25 | # X features 26 | ### YOUR CODE HERE 27 | 28 | 29 | 30 | ### END YOUR CODE 31 | # y label without noise 32 | y = self.target_function(X[:, 1], X[:, 2]) 33 | # y label with 10% noise 34 | y_noise = y * np.where(np.random.random(1000) < 0.1, -1, 1) 35 | 36 | return X, y_noise 37 | 38 | 39 | # calculate error rate 40 | def cal_error(self, X, y, W): 41 | # calculate scores 42 | scores = np.dot(X, W) * y 43 | # calculate all errors 44 | error = np.sum(np.where(scores < 0, 1, 0)) 45 | error /= X.shape[0] 46 | 47 | return error 48 | 49 | 50 | # calculate linear regression closed form solution 51 | def LinearR_closed_form(self, X, y): 52 | 53 | # linear regression closed form solution 54 | ### YOUR CODE HERE 55 | 56 | 57 | 58 | ### END YOUR CODE 59 | 60 | 61 | 62 | # calculate the average Ein by 1000 iteration 63 | def cal_Ein_1d(self): 64 | 65 | ### YOUR CODE HERE 66 | 67 | 68 | 69 | ### END YOUR CODE 70 | 71 | return Ein 72 | 73 | 74 | # Q2: Carry out Linear Regression without transformation, i.e., with feature vector:(1, x1, x2, x1x2, x1*x1, x2*x2), 75 | # Run the experiments for 1000 times. What is the average E in over 1000 experiments? 76 | 77 | # create train data with 10% flipping noise 78 | def generate_data_2d(self, time_seed): 79 | 80 | np.random.seed(time_seed) 81 | X = np.zeros((1000, 6)) # 1000 train data 82 | y = np.zeros((1000, 1)) 83 | 84 | # X features 85 | ### YOUR CODE HERE 86 | 87 | 88 | 89 | ### END YOUR CODE 90 | # y label without noise 91 | y = self.target_function(X[:, 1], X[:, 2]) 92 | # y label with 10% noise 93 | y_noise = y * np.where(np.random.random(1000) < 0.1, -1, 1) 94 | 95 | return X, y_noise 96 | 97 | 98 | # calculate the average Ein by 1000 iteration 99 | def cal_Ein_2d(self): 100 | 101 | ### YOUR CODE HERE 102 | 103 | 104 | 105 | ### END YOUR CODE 106 | 107 | return Ein 108 | 109 | 110 | class LogisticRegression(object): 111 | 112 | def __init__(self): 113 | pass 114 | 115 | # Implement the fixed learning rate gradient descent algorithm below for logistic regression, 116 | # initialized with 0. Run the algorithm with η = 0.001 and T = 2000. What is the Eout from your algorithm, 117 | # evaluated using the 0/1 error on the test set? 118 | 119 | # define load data function 120 | def read_input_data(self, path): 121 | x = [] 122 | y = [] 123 | for line in open(path).readlines(): 124 | items = line.strip().split(' ') 125 | tmp_x = [] 126 | for i in range(0, len(items) - 1): 127 | tmp_x.append(float(items[i])) 128 | x.append(tmp_x) 129 | y.append(float(items[-1])) 130 | return np.array(x), np.array(y) 131 | 132 | 133 | # define sigmoid function 134 | def sigmoid(self, x): 135 | 136 | ### YOUR CODE HERE 137 | 138 | 139 | 140 | ### END YOUR CODE 141 | 142 | 143 | # Q1: Gradient Descent 144 | # define gradient descent function 145 | def gradient_descent(self, X, y): 146 | y = y.reshape(-1, 1) # reshape (1000,) to (1000,1) 147 | m = X.shape[0] # number of samples 148 | n = X.shape[1] # number of features 149 | T = 2000 # number of iteration 150 | learning_rate = 0.001 # learning rate 151 | W = np.zeros((n, 1)) # initialize weights 152 | 153 | for i in range(T): 154 | ### YOUR CODE HERE 155 | 156 | 157 | 158 | ### END YOUR CODE 159 | 160 | 161 | return W 162 | 163 | 164 | # define predict error function by W 165 | def predict(self, X, y, W): 166 | y_hat = self.sigmoid(X.dot(W)) # predict probability [0,1] 167 | y_hat[y_hat >= 0.5] = 1 # positive 168 | y_hat[y_hat < 0.5] = -1 # negative 169 | 170 | y = y.reshape(-1,1) # reshape 2D 171 | Ein_mean = np.mean(y_hat != y) 172 | 173 | return Ein_mean 174 | 175 | 176 | # Logistic regression by GD 177 | def lr_gd(self, X_train, y_train, X_test, y_test): 178 | # Gradient descent 179 | W = self.gradient_descent(X_train, y_train) # calculate weights 180 | 181 | # calculate test data error 182 | Ein_mean = self.predict(X_test, y_test, W) 183 | 184 | return Ein_mean 185 | 186 | 187 | # Q2: Stochastic Gradient Descent 188 | # define stochastic gradient descent function 189 | def stochastic_gradient_descent(self, X, y): 190 | m = X.shape[0] # number of samples 191 | n = X.shape[1] # number of features 192 | T = 2000 # number of iteration 193 | learning_rate = 0.001 # learning rate 194 | W = np.zeros((n, 1)) # initialize weights 195 | 196 | for t in range(T): 197 | ### YOUR CODE HERE 198 | 199 | 200 | 201 | ### END YOUR CODE 202 | 203 | return W 204 | 205 | # Logistic regression by SGD 206 | def lr_sgd(self, X_train, y_train, X_test, y_test): 207 | # Gradient descent 208 | W = self.stochastic_gradient_descent(X_train, y_train) # calculate weights 209 | 210 | # calculate test data error 211 | Ein_mean = self.predict(X_test, y_test, W) 212 | 213 | return Ein_mean 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | -------------------------------------------------------------------------------- /Week3/hw3/hw3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## hw3-1" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "from LR import *" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "target function $f(x_1, x_2) = sign(x_1^2 + x_2^2 - 0.6)$" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "### Q1: feature vector: $(1, x_1, x_2)$" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "calculate the average $E_{in}$ by 1000 iteration" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "0.503475\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "linreg = LinearRegression()\n", 58 | "Ein_1d = linreg.cal_Ein_1d()\n", 59 | "print(Ein_1d)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "### Q2: feature vector: $(1, x_1, x_2, x_1x_2, x_1^2, x_2^2)$" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "calculate the average $E_{in}$ by 1000 iteration" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 3, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "name": "stdout", 83 | "output_type": "stream", 84 | "text": [ 85 | "0.123886\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "Ein_2d = linreg.cal_Ein_2d()\n", 91 | "print(Ein_2d)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "## hw3-2" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "### Q1: Gradient Descent " 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 4, 111 | "metadata": { 112 | "collapsed": true 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "logreg = LogisticRegression()" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 5, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# Load train data and test data\n", 126 | "X_train, y_train = logreg.read_input_data('./data/hw3_train.dat')\n", 127 | "X_test, y_test = logreg.read_input_data('./data/hw3_test.dat')" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 6, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "name": "stdout", 137 | "output_type": "stream", 138 | "text": [ 139 | "0.471666666667\n" 140 | ] 141 | } 142 | ], 143 | "source": [ 144 | "Ein_gd = logreg.lr_gd(X_train, y_train, X_test, y_test)\n", 145 | "print(Ein_gd)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "### Q2: Stochastic Gradient Descent" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 7, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | "0.182333333333\n" 165 | ] 166 | } 167 | ], 168 | "source": [ 169 | "Ein_sgd = logreg.lr_sgd(X_train, y_train, X_test, y_test)\n", 170 | "print(Ein_sgd)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": { 177 | "collapsed": true 178 | }, 179 | "outputs": [], 180 | "source": [] 181 | } 182 | ], 183 | "metadata": { 184 | "kernelspec": { 185 | "display_name": "Python 3", 186 | "language": "python", 187 | "name": "python3" 188 | }, 189 | "language_info": { 190 | "codemirror_mode": { 191 | "name": "ipython", 192 | "version": 3 193 | }, 194 | "file_extension": ".py", 195 | "mimetype": "text/x-python", 196 | "name": "python", 197 | "nbconvert_exporter": "python", 198 | "pygments_lexer": "ipython3", 199 | "version": "3.6.3" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 2 204 | } 205 | -------------------------------------------------------------------------------- /Week3/reference_hw3/LR.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class LinearRegression(object): 4 | 5 | def __init__(self): 6 | pass 7 | 8 | # Q1: Carry out Linear Regression without transformation, i.e., with feature vector:(1,x1 ,x2), 9 | # Run the experiments for 1000 times. What is the average E in over 1000 experiments? 10 | 11 | # generate target function: f(x1, x2) = sign(x1*x1 + x2*x2 - 0.6) 12 | def target_function(self, X1, X2): 13 | y = np.ones(X1.shape[0]) 14 | y[(X1*X1 + X2*X2 - 0.6) < 0] = -1 15 | 16 | return y 17 | 18 | 19 | # create train data with 10% flipping noise 20 | def generate_data_1d(self, time_seed): 21 | np.random.seed(time_seed) 22 | X = np.zeros((1000, 3)) # 1000 train data 23 | y = np.zeros((1000, 1)) 24 | 25 | # X features 26 | X[:, 0] = 1 27 | X[:, 1] = np.random.uniform(-1, 1, 1000) 28 | X[:, 2] = np.random.uniform(-1, 1, 1000) 29 | # y label without noise 30 | y = self.target_function(X[:, 1], X[:, 2]) 31 | # y label with 10% noise 32 | y_noise = y * np.where(np.random.random(1000) < 0.1, -1, 1) 33 | 34 | return X, y_noise 35 | 36 | 37 | # calculate error rate 38 | def cal_error(self, X, y, W): 39 | # calculate scores 40 | scores = np.dot(X, W) * y 41 | # calculate all errors 42 | error = np.sum(np.where(scores < 0, 1, 0)) 43 | error /= X.shape[0] 44 | 45 | return error 46 | 47 | 48 | # calculate linear regression closed form solution 49 | def LinearR_closed_form(self, X, y): 50 | 51 | # linear regression closed form solution 52 | return np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y) 53 | 54 | 55 | # calculate the average Ein by 1000 iteration 56 | def cal_Ein_1d(self): 57 | Ein_all = [] # store all Ein 58 | N = 1000 # number of iteration 59 | for i in range(1000): 60 | X, y = self.generate_data_1d(i) # generate data 61 | W = self.LinearR_closed_form(X, y) # calculate weights 62 | Ein = self.cal_error(X, y, W) # calculate Ein 63 | Ein_all.append(Ein) 64 | 65 | # mean of Ein 66 | Ein = np.mean(Ein_all) 67 | return Ein 68 | 69 | 70 | # Q2: Carry out Linear Regression without transformation, i.e., with feature vector:(1, x1, x2, x1x2, x1*x1, x2*x2), 71 | # Run the experiments for 1000 times. What is the average E in over 1000 experiments? 72 | 73 | # create train data with 10% flipping noise 74 | def generate_data_2d(self, time_seed): 75 | 76 | np.random.seed(time_seed) 77 | X = np.zeros((1000, 6)) # 1000 train data 78 | y = np.zeros((1000, 1)) 79 | 80 | # X features 81 | X[:, 0] = 1 82 | X[:, 1] = np.random.uniform(-1, 1, 1000) 83 | X[:, 2] = np.random.uniform(-1, 1, 1000) 84 | X[:, 3] = X[:, 1] * X[:, 2] 85 | X[:, 4] = X[:, 1] * X[:, 1] 86 | X[:, 5] = X[:, 2] * X[:, 2] 87 | # y label without noise 88 | y = self.target_function(X[:, 1], X[:, 2]) 89 | # y label with 10% noise 90 | y_noise = y * np.where(np.random.random(1000) < 0.1, -1, 1) 91 | 92 | return X, y_noise 93 | 94 | 95 | # calculate the average Ein by 1000 iteration 96 | def cal_Ein_2d(self): 97 | Ein_all = [] # store all Ein 98 | N = 1000 # number of iteration 99 | for i in range(1000): 100 | X, y = self.generate_data_2d(i) # generate data 101 | W = self.LinearR_closed_form(X, y) # calculate weights 102 | Ein = self.cal_error(X, y, W) # calculate Ein 103 | Ein_all.append(Ein) 104 | 105 | # mean of Ein 106 | Ein = np.mean(Ein_all) 107 | return Ein 108 | 109 | 110 | class LogisticRegression(object): 111 | 112 | def __init__(self): 113 | pass 114 | 115 | # Implement the fixed learning rate gradient descent algorithm below for logistic regression, 116 | # initialized with 0. Run the algorithm with η = 0.001 and T = 2000. What is the Eout from your algorithm, 117 | # evaluated using the 0/1 error on the test set? 118 | 119 | # define load data function 120 | def read_input_data(self, path): 121 | x = [] 122 | y = [] 123 | for line in open(path).readlines(): 124 | items = line.strip().split(' ') 125 | tmp_x = [] 126 | for i in range(0, len(items) - 1): 127 | tmp_x.append(float(items[i])) 128 | x.append(tmp_x) 129 | y.append(float(items[-1])) 130 | return np.array(x), np.array(y) 131 | 132 | 133 | # define sigmoid function 134 | def sigmoid(self, x): 135 | return 1.0 / (np.exp(-x) + 1) 136 | 137 | 138 | # Q1: Gradient Descent 139 | # define gradient descent function 140 | def gradient_descent(self, X, y): 141 | y = y.reshape(-1, 1) # reshape (1000,) to (1000,1) 142 | m = X.shape[0] # number of samples 143 | n = X.shape[1] # number of features 144 | T = 2000 # number of iteration 145 | learning_rate = 0.001 # learning rate 146 | W = np.zeros((n, 1)) # initialize weights 147 | 148 | for i in range(T): 149 | loss = 1/m * np.sum(np.log(1 + np.exp(-1 * X.dot(W) * y))) 150 | dW = 1/m * np.sum(self.sigmoid(-X.dot(W) * y) * -y * X, axis=0).reshape(n, 1) 151 | W = W - learning_rate * dW 152 | 153 | return W 154 | 155 | 156 | # define predict error function by W 157 | def predict(self, X, y, W): 158 | y_hat = self.sigmoid(X.dot(W)) # predict probability [0,1] 159 | y_hat[y_hat >= 0.5] = 1 # positive 160 | y_hat[y_hat < 0.5] = -1 # negative 161 | 162 | y = y.reshape(-1,1) # reshape 2D 163 | Ein_mean = np.mean(y_hat != y) 164 | 165 | return Ein_mean 166 | 167 | 168 | # Logistic regression by GD 169 | def lr_gd(self, X_train, y_train, X_test, y_test): 170 | # Gradient descent 171 | W = self.gradient_descent(X_train, y_train) # calculate weights 172 | 173 | # calculate test data error 174 | Ein_mean = self.predict(X_test, y_test, W) 175 | 176 | return Ein_mean 177 | 178 | 179 | # Q2: Stochastic Gradient Descent 180 | # define stochastic gradient descent function 181 | def stochastic_gradient_descent(self, X, y): 182 | m = X.shape[0] # number of samples 183 | n = X.shape[1] # number of features 184 | T = 2000 # number of iteration 185 | learning_rate = 0.001 # learning rate 186 | W = np.zeros((n, 1)) # initialize weights 187 | 188 | for t in range(T): 189 | loss = 0.0 190 | for i in range(m): 191 | loss += np.log(1 + np.exp(-1 * X[i,:].dot(W) * y[i])) 192 | dW = (self.sigmoid(-X[i,:].dot(W) * y[i]) * -y[i] * X[i,:]).reshape(n, 1) 193 | W = W - learning_rate * dW 194 | 195 | return W 196 | 197 | # Logistic regression by SGD 198 | def lr_sgd(self, X_train, y_train, X_test, y_test): 199 | # Gradient descent 200 | W = self.stochastic_gradient_descent(X_train, y_train) # calculate weights 201 | 202 | # calculate test data error 203 | Ein_mean = self.predict(X_test, y_test, W) 204 | 205 | return Ein_mean 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | -------------------------------------------------------------------------------- /Week3/reference_hw3/hw3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## hw3-1" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 3, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "from LR import *" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "target function $f(x_1, x_2) = sign(x_1^2 + x_2^2 - 0.6)$" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "### Q1: feature vector: $(1, x_1, x_2)$" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "calculate the average $E_{in}$ by 1000 iteration" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "0.503475\n" 51 | ] 52 | } 53 | ], 54 | "source": [ 55 | "linreg = LinearRegression()\n", 56 | "Ein_1d = linreg.cal_Ein_1d()\n", 57 | "print(Ein_1d)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "### Q2: feature vector: $(1, x_1, x_2, x_1x_2, x_1^2, x_2^2)$" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "calculate the average $E_{in}$ by 1000 iteration" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 5, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | "0.123886\n" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "Ein_2d = linreg.cal_Ein_2d()\n", 89 | "print(Ein_2d)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "## hw3-2" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "### Q1: Gradient Descent " 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 6, 109 | "metadata": { 110 | "collapsed": true 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "logreg = LogisticRegression()" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 7, 120 | "metadata": { 121 | "collapsed": true 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "# Load train data and test data\n", 126 | "X_train, y_train = logreg.read_input_data('./data/hw3_train.dat')\n", 127 | "X_test, y_test = logreg.read_input_data('./data/hw3_test.dat')\n", 128 | "# add bias\n", 129 | "X_train = np.hstack((np.ones(X_train.shape[0]).reshape(-1,1), X_train))\n", 130 | "X_test = np.hstack((np.ones(X_test.shape[0]).reshape(-1,1), X_test))" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 8, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "0.475\n" 143 | ] 144 | } 145 | ], 146 | "source": [ 147 | "Ein_gd = logreg.lr_gd(X_train, y_train, X_test, y_test)\n", 148 | "print(Ein_gd)" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "### Q2: Stochastic Gradient Descent" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 9, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "0.181666666667\n" 168 | ] 169 | } 170 | ], 171 | "source": [ 172 | "Ein_sgd = logreg.lr_sgd(X_train, y_train, X_test, y_test)\n", 173 | "print(Ein_sgd)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": { 180 | "collapsed": true 181 | }, 182 | "outputs": [], 183 | "source": [] 184 | } 185 | ], 186 | "metadata": { 187 | "kernelspec": { 188 | "display_name": "Python 3", 189 | "language": "python", 190 | "name": "python3" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 3 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython3", 202 | "version": "3.6.3" 203 | } 204 | }, 205 | "nbformat": 4, 206 | "nbformat_minor": 2 207 | } 208 | -------------------------------------------------------------------------------- /Week4/README.md: -------------------------------------------------------------------------------- 1 | ## 包含文件 2 | 3 | - hw4:第四周作业 4 | 5 | ## 主要内容 6 | 7 | 视频地址: 8 | 9 | https://www.bilibili.com/video/av36731342/?p=50 10 | 11 | https://www.bilibili.com/video/av36731342/?p=51 12 | 13 | https://www.bilibili.com/video/av36731342/?p=52 14 | 15 | https://www.bilibili.com/video/av36731342/?p=53 16 | 17 | https://www.bilibili.com/video/av36731342/?p=54 18 | 19 | https://www.bilibili.com/video/av36731342/?p=55 20 | 21 | https://www.bilibili.com/video/av36731342/?p=56 22 | 23 | https://www.bilibili.com/video/av36731342/?p=57 24 | 25 | https://www.bilibili.com/video/av36731342/?p=58 26 | 27 | https://www.bilibili.com/video/av36731342/?p=59 28 | 29 | https://www.bilibili.com/video/av36731342/?p=60 30 | 31 | https://www.bilibili.com/video/av36731342/?p=61 32 | 33 | https://www.bilibili.com/video/av36731342/?p=62 34 | 35 | https://www.bilibili.com/video/av36731342/?p=63 36 | 37 | https://www.bilibili.com/video/av36731342/?p=64 38 | 39 | https://www.bilibili.com/video/av36731342/?p=65 40 | 41 | 42 | 参考资料: 43 | 44 | https://redstonewill.com/249/ 45 | 46 | https://redstonewill.com/252/ 47 | 48 | https://redstonewill.com/255/ 49 | 50 | https://redstonewill.com/311/ 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /Week4/hw4/RidgeRegression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class RidReg(object): 4 | 5 | def __init__(self): 6 | pass 7 | 8 | 9 | # Q1: if lambda = 11.26, what is Ein and Eout? 10 | # define load the data function 11 | def read_input_data(self, path): 12 | x = [] 13 | y = [] 14 | for line in open(path).readlines(): 15 | items = line.strip().split(' ') 16 | tmp_x = [] 17 | for i in range(0, len(items) - 1): 18 | tmp_x.append(float(items[i])) 19 | x.append(tmp_x) 20 | y.append(float(items[-1])) 21 | return np.array(x), np.array(y) 22 | 23 | # define ridge regression for classification by closed form solution 24 | def ridge_regression(self, X, y, lamda = 0): 25 | # ridge regression closed form solution 26 | ### YOUR CODE HERE 27 | 28 | 29 | 30 | ### END YOUR CODE 31 | 32 | return W 33 | 34 | # define calculate error function 35 | def cal_error(self, X, y, W): 36 | 37 | ### YOUR CODE HERE 38 | 39 | 40 | 41 | ### END YOUR CODE 42 | 43 | return error 44 | 45 | # define calculate Ein and Eout 46 | def cal_Ein_Eout(self, X_train, y_train, X_test, y_test, lamda=11.26): 47 | 48 | ### YOUR CODE HERE 49 | 50 | 51 | 52 | ### END YOUR CODE 53 | 54 | return Ein, Eout 55 | 56 | 57 | # Q2: train data & validation data.calculate Eval & Eout respect different lambda 58 | def cal_val(self, X_train, y_train, X_test, y_test): 59 | 60 | # split train data to train and val 61 | X_Dtrain = X_train[:120] # first 120 samples 62 | y_Dtrain = y_train[:120] 63 | X_Dval = X_train[-80:] # last 80 samples 64 | y_Dval = y_train[-80:] 65 | 66 | lamda_log = [i for i in range(2, -11, -1)] # log10(lamda) 67 | Eval_best = 1.0 # min Eval 68 | W_best = np.zeros(X_train.shape[1]).reshape(-1,1) # W of min Eval 69 | lamda_best = 0 # initialze lambda 70 | Eval_all = [] # store all Eval 71 | 72 | 73 | ### YOUR CODE HERE 74 | 75 | 76 | 77 | ### END YOUR CODE 78 | 79 | return lamda_best, Eval_best, Eout 80 | 81 | # Q3: 5-folds cross validation 82 | # define 5-folds cross validation 83 | def cross_val(self, X, y): 84 | lamda_log = [i for i in range(2, -11, -1)] # log10(lamda) 85 | Eval_best = 1.0 # min Eval 86 | W_best = np.zeros(X.shape[1]).reshape(-1,1) # W of min Eval 87 | lamda_best = 0 # initialze lambda 88 | Eval_all = [] # store all Eval in different lambda 89 | 90 | k = 5 # k flod cross-validation 91 | num_flod = int(X.shape[0] / k) # samples of one floder 92 | 93 | ### YOUR CODE HERE 94 | 95 | 96 | 97 | ### END YOUR CODE 98 | 99 | return lamda_best, Eval_best, Eval_all 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /Week4/hw4/data/hw4_train.dat: -------------------------------------------------------------------------------- 1 | 0.568304 0.568283 1 2 | 0.310968 0.310956 -1 3 | 0.103376 0.103373 -1 4 | 0.0531882 0.053218 -1 5 | 0.97006 0.970064 1 6 | 0.0941873 0.0941707 -1 7 | 0.655902 0.655892 1 8 | 0.370821 0.370839 -1 9 | 0.558482 0.558476 1 10 | 0.849389 0.849383 1 11 | 0.796038 0.796051 1 12 | 0.723246 0.723252 1 13 | 0.571236 0.571254 1 14 | 0.385144 0.38512 -1 15 | 0.877176 0.877168 1 16 | 0.74655 0.746552 1 17 | 0.0676164 0.0676087 -1 18 | 0.0412524 0.0412649 -1 19 | 0.851637 0.851661 1 20 | 0.586989 0.58698 1 21 | 0.661014 0.660994 1 22 | 0.587988 0.587968 1 23 | 0.257615 0.257628 -1 24 | 0.680505 0.680485 1 25 | 0.895242 0.895257 1 26 | 0.381124 0.381139 -1 27 | 0.314332 0.31433 -1 28 | 0.157744 0.157747 -1 29 | 0.670923 0.670925 1 30 | 0.531716 0.531736 1 31 | 0.810956 0.810938 1 32 | 0.514937 0.51493 1 33 | 0.188567 0.188587 -1 34 | 0.778528 0.778527 1 35 | 0.904966 0.904955 1 36 | 0.563699 0.563708 1 37 | 0.599768 0.59978 1 38 | 0.619909 0.619928 1 39 | 0.650556 0.650556 1 40 | 0.131949 0.131967 -1 41 | 0.251546 0.251546 -1 42 | 0.690874 0.690863 1 43 | 0.381249 0.381284 -1 44 | 0.559231 0.559232 1 45 | 0.197361 0.197367 -1 46 | 0.784776 0.784781 1 47 | 0.620494 0.620499 1 48 | 0.229646 0.229647 -1 49 | 0.0891466 0.0891438 -1 50 | 0.981857 0.981861 1 51 | 0.64711 0.647102 1 52 | 0.725596 0.725592 1 53 | 0.614771 0.614764 1 54 | 0.976315 0.976321 1 55 | 0.250716 0.250708 -1 56 | 0.281071 0.281096 -1 57 | 0.550196 0.550187 1 58 | 0.955756 0.955751 1 59 | 0.251821 0.251838 -1 60 | 0.538196 0.538183 1 61 | 0.58285 0.582836 1 62 | 0.48367 0.48368 -1 63 | 0.481451 0.481471 -1 64 | 0.291576 0.291561 -1 65 | 0.181592 0.181596 -1 66 | 0.232746 0.232759 -1 67 | 0.488322 0.488349 -1 68 | 0.664499 0.664487 1 69 | 0.0420094 0.0420475 -1 70 | 0.950521 0.950524 1 71 | 0.445707 0.445706 -1 72 | 0.430385 0.430396 -1 73 | 0.747574 0.747583 1 74 | 0.245047 0.245078 -1 75 | 0.742838 0.742833 1 76 | 0.284625 0.284627 -1 77 | 0.0613909 0.061374 -1 78 | 0.612767 0.612754 1 79 | 0.378545 0.378555 -1 80 | 0.818764 0.818763 1 81 | 0.0507026 0.0507136 -1 82 | 0.882725 0.882731 1 83 | 0.0810847 0.0810796 -1 84 | 0.836278 0.836279 1 85 | 0.696709 0.696695 1 86 | 0.603346 0.603334 1 87 | 0.513718 0.513712 1 88 | 0.247789 0.247802 -1 89 | 0.704221 0.704213 1 90 | 0.546723 0.546724 1 91 | 0.881583 0.881592 1 92 | 0.13456 0.134545 -1 93 | 0.86883 0.868815 1 94 | 0.980909 0.980887 1 95 | 0.369986 0.369986 -1 96 | 0.194455 0.194457 -1 97 | 0.483858 0.483875 -1 98 | 0.43807 0.43808 -1 99 | 0.159602 0.159592 -1 100 | 0.923499 0.923504 1 101 | 0.419902 0.419906 -1 102 | 0.659252 0.659271 1 103 | 0.419546 0.419546 -1 104 | 0.935494 0.935512 1 105 | 0.712397 0.71239 1 106 | 0.952567 0.952549 1 107 | 0.915359 0.915379 1 108 | 0.182693 0.182675 -1 109 | 0.668527 0.668522 1 110 | 0.0965221 0.0965266 -1 111 | 0.984174 0.984197 1 112 | 0.7437 0.743702 1 113 | 0.213357 0.213341 -1 114 | 0.617402 0.617386 1 115 | 0.335604 0.335604 -1 116 | 0.632581 0.632597 1 117 | 0.515744 0.515757 1 118 | 0.786921 0.786912 1 119 | 0.502608 0.502599 1 120 | 0.164538 0.164537 -1 121 | 0.507454 0.507469 1 122 | 0.822809 0.822806 1 123 | 0.42883 0.428821 -1 124 | 0.157678 0.157693 -1 125 | 0.674884 0.674896 1 126 | 0.276618 0.276622 -1 127 | 0.374795 0.374795 -1 128 | 0.396781 0.396815 -1 129 | 0.132116 0.132101 -1 130 | 0.966203 0.966249 1 131 | 0.961164 0.961159 1 132 | 0.0140044 0.014014 -1 133 | 0.509361 0.509379 1 134 | 0.195082 0.195097 -1 135 | 0.853012 0.853012 1 136 | 0.852883 0.852896 1 137 | 0.574279 0.574282 1 138 | 0.316965 0.316939 -1 139 | 0.386753 0.386761 -1 140 | 0.764792 0.764815 1 141 | 0.680442 0.680428 1 142 | 0.125299 0.125304 -1 143 | 0.619824 0.619818 1 144 | 0.687672 0.687662 1 145 | 0.760271 0.760289 1 146 | 0.227148 0.22713 -1 147 | 0.224288 0.224295 -1 148 | 0.0150326 0.0150352 -1 149 | 0.585322 0.585314 1 150 | 0.732755 0.732777 1 151 | 0.864553 0.864569 1 152 | 0.0788415 0.0788569 -1 153 | 0.4326 0.432602 -1 154 | 0.804816 0.804801 1 155 | 0.50957 0.509589 1 156 | 0.405003 0.404988 -1 157 | 0.465702 0.465691 -1 158 | 0.368576 0.368574 -1 159 | 0.56202 0.562033 1 160 | 0.552361 0.552356 1 161 | 0.18263 0.182606 -1 162 | 0.672912 0.672906 1 163 | 0.642397 0.642413 1 164 | 0.816308 0.816316 1 165 | 0.264986 0.264978 -1 166 | 0.799168 0.799179 1 167 | 0.311442 0.311432 -1 168 | 0.715291 0.715278 1 169 | 0.913262 0.913265 1 170 | 0.703566 0.70358 1 171 | 0.0868818 0.0868856 -1 172 | 0.507828 0.507835 1 173 | 0.77619 0.776196 1 174 | 0.503254 0.503257 1 175 | 0.0585257 0.0585251 -1 176 | 0.668003 0.667995 1 177 | 0.409675 0.409686 -1 178 | 0.00104673 0.00105247 -1 179 | 0.6743 0.674268 1 180 | 0.461383 0.461378 -1 181 | 0.957667 0.957677 1 182 | 0.386593 0.386566 -1 183 | 0.260177 0.260171 -1 184 | 0.208071 0.208076 -1 185 | 0.634661 0.634646 1 186 | 0.354351 0.354351 -1 187 | 0.135384 0.135381 -1 188 | 0.216718 0.216748 -1 189 | 0.606084 0.606096 1 190 | 0.443809 0.443801 -1 191 | 0.480428 0.480418 -1 192 | 0.886987 0.886995 1 193 | 0.0126171 0.012603 -1 194 | 0.578502 0.578495 1 195 | 0.0664441 0.0664438 -1 196 | 0.292442 0.292432 -1 197 | 0.487013 0.487008 -1 198 | 0.176237 0.176234 -1 199 | 0.496052 0.496044 -1 200 | 0.62186 0.621853 1 201 | -------------------------------------------------------------------------------- /Week4/reference_hw4/RidgeRegression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class RidReg(object): 4 | 5 | def __init__(self): 6 | pass 7 | 8 | 9 | # Q1: if lambda = 11.26, what is Ein and Eout? 10 | # define load the data function 11 | def read_input_data(self, path): 12 | x = [] 13 | y = [] 14 | for line in open(path).readlines(): 15 | items = line.strip().split(' ') 16 | tmp_x = [] 17 | for i in range(0, len(items) - 1): 18 | tmp_x.append(float(items[i])) 19 | x.append(tmp_x) 20 | y.append(float(items[-1])) 21 | return np.array(x), np.array(y) 22 | 23 | # define ridge regression for classification by closed form solution 24 | def ridge_regression(self, X, y, lamda = 0): 25 | # ridge regression closed form solution 26 | W = np.linalg.inv(X.T.dot(X) + lamda * np.eye(X.shape[1])).dot(X.T).dot(y) 27 | W = W.reshape(-1,1) 28 | 29 | return W 30 | 31 | # define calculate error function 32 | def cal_error(self, X, y, W): 33 | y = y.reshape(-1, 1) 34 | scores = X.dot(W) 35 | y_pred = np.where(scores >= 0, 1, -1) 36 | error = np.mean(y_pred != y) 37 | 38 | return error 39 | 40 | # define calculate Ein and Eout 41 | def cal_Ein_Eout(self, X_train, y_train, X_test, y_test, lamda=11.26): 42 | W = self.ridge_regression(X_train, y_train, lamda) 43 | Ein = self.cal_error(X_train, y_train, W) 44 | Eout = self.cal_error(X_test, y_test, W) 45 | 46 | return Ein, Eout 47 | 48 | 49 | # Q2: train data & validation data.calculate Eval & Eout respect different lambda 50 | def cal_val(self, X_train, y_train, X_test, y_test): 51 | 52 | # split train data to train and val 53 | X_Dtrain = X_train[:120] # first 120 samples 54 | y_Dtrain = y_train[:120] 55 | X_Dval = X_train[-80:] # last 80 samples 56 | y_Dval = y_train[-80:] 57 | 58 | lamda_log = [i for i in range(2, -11, -1)] # log10(lamda) 59 | Eval_best = 1.0 # min Eval 60 | W_best = np.zeros(X_train.shape[1]).reshape(-1,1) # W of min Eval 61 | lamda_best = 0 # initialze lambda 62 | Eval_all = [] # store all Eval 63 | 64 | for i in range(len(lamda_log)): 65 | W = self.ridge_regression(X_Dtrain, y_Dtrain, lamda=pow(10, lamda_log[i])) # calculate W by closed form solution 66 | Eval = self.cal_error(X_Dval, y_Dval, W) # calculate Eval 67 | Eval_all.append(Eval) 68 | if Eval < Eval_best: 69 | Eval_best = Eval # choose min Eval 70 | W_best = W 71 | lamda_best = pow(10, lamda_log[i]) 72 | 73 | Eout = self.cal_error(X_test, y_test, W_best) 74 | 75 | return lamda_best, Eval_best, Eout 76 | 77 | # Q3: 5-folds cross validation 78 | # define 5-folds cross validation 79 | def cross_val(self, X, y): 80 | lamda_log = [i for i in range(2, -11, -1)] # log10(lamda) 81 | Eval_best = 1.0 # min Eval 82 | W_best = np.zeros(X.shape[1]).reshape(-1,1) # W of min Eval 83 | lamda_best = 0 # initialze lambda 84 | Eval_all = [] # store all Eval in different lambda 85 | 86 | k = 5 # k flod cross-validation 87 | num_flod = int(X.shape[0] / k) # samples of one floder 88 | for l in range(len(lamda_log)): 89 | Eval = 0.0 90 | for i in range(k): 91 | X_train = np.concatenate((X[:i*num_flod], X[(i+1)*num_flod:]), axis=0) 92 | y_train = np.concatenate((y[:i*num_flod], y[(i+1)*num_flod:]), axis=0) 93 | X_val = X[i*num_flod:(i+1)*num_flod] 94 | y_val = y[i*num_flod:(i+1)*num_flod] 95 | W = self.ridge_regression(X_train, y_train, lamda=pow(10, lamda_log[l])) 96 | Eval += self.cal_error(X_val, y_val, W) 97 | Eval /= k # average Eval 98 | Eval_all.append(Eval) 99 | if Eval < Eval_best: 100 | Eval_best = Eval 101 | W_best = W 102 | lamda_best = pow(10, lamda_log[l]) 103 | 104 | return lamda_best, Eval_best, Eval_all 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /Week5/README.md: -------------------------------------------------------------------------------- 1 | ## 天池 O2O 优惠卷使用预测比赛解析与代码分析(初级) 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /Week6/README.md: -------------------------------------------------------------------------------- 1 | ## 主要内容 2 | 3 | 视频地址: 4 | 5 | https://www.bilibili.com/video/av36760800 6 | 7 | https://www.bilibili.com/video/av36760800/?p=2 8 | 9 | https://www.bilibili.com/video/av36760800/?p=3 10 | 11 | https://www.bilibili.com/video/av36760800/?p=4 12 | 13 | https://www.bilibili.com/video/av36760800/?p=5 14 | 15 | https://www.bilibili.com/video/av36760800/?p=6 16 | 17 | https://www.bilibili.com/video/av36760800/?p=7 18 | 19 | https://www.bilibili.com/video/av36760800/?p=8 20 | 21 | https://www.bilibili.com/video/av36760800/?p=9 22 | 23 | https://www.bilibili.com/video/av36760800/?p=10 24 | 25 | https://www.bilibili.com/video/av36760800/?p=11 26 | 27 | https://www.bilibili.com/video/av36760800/?p=12 28 | 29 | https://www.bilibili.com/video/av36760800/?p=13 30 | 31 | https://www.bilibili.com/video/av36760800/?p=14 32 | 33 | https://www.bilibili.com/video/av36760800/?p=15 34 | 35 | https://www.bilibili.com/video/av36760800/?p=16 36 | 37 | https://www.bilibili.com/video/av36760800/?p=17 38 | 39 | 40 | 41 | 参考资料: 42 | 43 | https://redstonewill.com/345/ 44 | 45 | https://redstonewill.com/369/ 46 | 47 | https://redstonewill.com/393/ 48 | 49 | https://redstonewill.com/417/ 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /Week7/README.md: -------------------------------------------------------------------------------- 1 | ## 包含文件 2 | 3 | - hw5:第七周作业 4 | 5 | ## 主要内容 6 | 7 | 视频地址: 8 | 9 | https://www.bilibili.com/video/av36760800/?p=18 10 | 11 | https://www.bilibili.com/video/av36760800/?p=19 12 | 13 | https://www.bilibili.com/video/av36760800/?p=20 14 | 15 | https://www.bilibili.com/video/av36760800/?p=21 16 | 17 | https://www.bilibili.com/video/av36760800/?p=22 18 | 19 | https://www.bilibili.com/video/av36760800/?p=23 20 | 21 | https://www.bilibili.com/video/av36760800/?p=24 22 | 23 | https://www.bilibili.com/video/av36760800/?p=25 24 | 25 | 26 | 参考资料: 27 | 28 | https://redstonewill.com/456/ 29 | 30 | https://redstonewill.com/477/ 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /Week8/README.md: -------------------------------------------------------------------------------- 1 | ## 包含文件 2 | 3 | - SMO:第八周作业 4 | 5 | ## 主要内容 6 | 7 | ### 任务1题目:书籍阅读 8 | 9 | 任务详解:阅读《机器学习实战》书籍第6章6.1/6.2/6.3/6.4/6.5/6.6节 10 | 11 | ## 参考资料: 12 | 13 | 李航《统计学习方法》第7章 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /Week8/SMO/digits.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RedstoneWill/HsuanTienLin-ML-Camp/5d24fe8410d0383db64bfe2663627ab34ad15fb8/Week8/SMO/digits.zip -------------------------------------------------------------------------------- /Week9/README.md: -------------------------------------------------------------------------------- 1 | ## 包含文件 2 | 3 | - hw6:第九周作业 4 | 5 | ## 主要内容 6 | 7 | 视频地址: 8 | 9 | https://www.bilibili.com/video/av36760800/?p=26 10 | 11 | https://www.bilibili.com/video/av36760800/?p=27 12 | 13 | https://www.bilibili.com/video/av36760800/?p=28 14 | 15 | https://www.bilibili.com/video/av36760800/?p=29 16 | 17 | https://www.bilibili.com/video/av36760800/?p=30 18 | 19 | https://www.bilibili.com/video/av36760800/?p=31 20 | 21 | https://www.bilibili.com/video/av36760800/?p=32 22 | 23 | https://www.bilibili.com/video/av36760800/?p=33 24 | 25 | https://www.bilibili.com/video/av36760800/?p=34 26 | 27 | https://www.bilibili.com/video/av36760800/?p=35 28 | 29 | https://www.bilibili.com/video/av36760800/?p=36 30 | 31 | https://www.bilibili.com/video/av36760800/?p=37 32 | 33 | 参考资料: 34 | 35 | https://redstonewill.com/509/ 36 | 37 | https://redstonewill.com/535/ 38 | 39 | https://redstonewill.com/569/ 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /Week9/hw6/Homework6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RedstoneWill/HsuanTienLin-ML-Camp/5d24fe8410d0383db64bfe2663627ab34ad15fb8/Week9/hw6/Homework6.png -------------------------------------------------------------------------------- /Week9/hw6/hw2_adaboost_train.dat: -------------------------------------------------------------------------------- 1 | 0.757222 0.633831 -1 2 | 0.847382 0.281581 -1 3 | 0.24931 0.618635 +1 4 | 0.538526 0.144259 -1 5 | 0.474435 0.414558 -1 6 | 0.374151 0.0120482 1 7 | 0.847185 0.217572 1 8 | 0.983368 0.250496 1 9 | 0.645141 0.485816 1 10 | 0.172211 0.254331 -1 11 | 0.116866 0.378804 -1 12 | 0.55097 0.760426 -1 13 | 0.312109 0.442938 -1 14 | 0.304777 0.0529649 1 15 | 0.572727 0.370527 1 16 | 0.171491 0.50076 -1 17 | 0.644567 0.834055 -1 18 | 0.0529041 0.338461 -1 19 | 0.0323543 0.830701 -1 20 | 0.272193 0.587396 -1 21 | 0.123521 0.0516625 1 22 | 0.905544 0.247013 1 23 | 0.854276 0.559648 1 24 | 0.375914 0.505747 -1 25 | 0.160755 0.238718 -1 26 | 0.45893 0.227062 1 27 | 0.395407 0.791184 -1 28 | 0.742325 0.586444 1 29 | 0.43615 0.136922 1 30 | 0.954217 0.680325 1 31 | 0.916386 0.381431 1 32 | 0.953844 0.439266 1 33 | 0.328701 0.721918 -1 34 | 0.275732 0.43115 -1 35 | 0.892366 0.0136661 1 36 | 0.249529 0.0709084 1 37 | 0.124333 0.611515 -1 38 | 0.54449 0.423701 1 39 | 0.86019 0.93029 -1 40 | 0.432404 0.0901487 1 41 | 0.204973 0.406648 -1 42 | 0.0748025 0.568699 -1 43 | 0.936407 0.106094 1 44 | 0.572728 0.90924 -1 45 | 0.358618 0.651613 -1 46 | 0.631685 0.910141 -1 47 | 0.802581 0.599025 1 48 | 0.366818 0.0135169 1 49 | 0.708026 0.300654 1 50 | 0.243625 0.106277 1 51 | 0.960778 0.59799 1 52 | 0.726241 0.057674 1 53 | 0.158561 0.690295 -1 54 | 0.420638 0.503567 -1 55 | 0.651344 0.290269 1 56 | 0.933469 0.490516 1 57 | 0.502864 0.721677 -1 58 | 0.595151 0.82293 -1 59 | 0.696778 0.300018 1 60 | 0.927038 0.295737 1 61 | 0.145192 0.377728 -1 62 | 0.385435 0.68299 -1 63 | 0.296852 0.868018 -1 64 | 0.659204 0.77369 -1 65 | 0.896153 0.832046 1 66 | 0.466137 0.877674 -1 67 | 0.815532 0.164151 1 68 | 0.310117 0.857713 -1 69 | 0.522385 0.961609 -1 70 | 0.369345 0.781697 -1 71 | 0.901988 0.831265 1 72 | 0.692314 0.0640428 1 73 | 0.836977 0.614453 1 74 | 0.104584 0.357892 -1 75 | 0.265266 0.65833 -1 76 | 0.729254 0.885763 -1 77 | 0.205254 0.404956 -1 78 | 0.032359 0.778401 -1 79 | 0.464724 0.159682 1 80 | 0.940021 0.493738 1 81 | 0.248985 0.646083 -1 82 | 0.541258 0.728218 -1 83 | 0.391575 0.291076 1 84 | 0.0254967 0.300503 -1 85 | 0.475398 0.920203 -1 86 | 0.835664 0.584283 1 87 | 0.296033 0.0885163 1 88 | 0.0435908 0.646312 -1 89 | 0.284148 0.182427 1 90 | 0.627696 0.788116 -1 91 | 0.312939 0.871275 -1 92 | 0.676521 0.316903 1 93 | 0.0123539 0.178643 -1 94 | 0.682164 0.777194 -1 95 | 0.421563 0.302683 1 96 | 0.03183 0.289761 -1 97 | 0.435715 0.190071 1 98 | 0.730492 0.0655594 1 99 | 0.92527 0.524315 1 100 | 0.984815 0.383621 1 101 | -------------------------------------------------------------------------------- /Week9/hw6/hw6.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "import scipy.linalg as lin" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# 加载数据函数\n", 21 | "def loadData(filename):\n", 22 | " data = pd.read_csv(filename, sep='\\s+', header=None)\n", 23 | " data = data.as_matrix()\n", 24 | " col, row = data.shape\n", 25 | " X = data[:, 0: row-1]\n", 26 | " Y = data[:, row-1:row]\n", 27 | " return X, Y" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "# 决策树桩\n", 37 | "def decision_stump(X, Y, thres, U):\n", 38 | " row, col = X.shape\n", 39 | " r, c = thres.shape; besterr = 1\n", 40 | " btheta = 0; bs = 0; index = 0\n", 41 | " for i in range(col):\n", 42 | " Yhat1 = np.sign(np.tile(X[:, i:i+1], (1, r)).T-thres[:, i:i+1]).T\n", 43 | " err1 = (Yhat1!=Y).T.dot(U)\n", 44 | " err2 = (-1*Yhat1!=Y).T.dot(U)\n", 45 | " s = 1 if np.min(err1) < np.min(err2) else -1\n", 46 | " if s == 1 and np.min(err1) < besterr:\n", 47 | " besterr = np.min(err1); bs = 1\n", 48 | " index = i; btheta = thres[np.argmin(err1), i]\n", 49 | " if s == -1 and np.min(err2) < besterr:\n", 50 | " besterr = np.min(err2); bs = -1\n", 51 | " index = i; btheta = thres[np.argmin(err2), i]\n", 52 | " return besterr, btheta, bs, index" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 4, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "# AdaBoost---Stump 算法\n", 62 | "# 需要说明: 与PPT上有点不同,始终保证sum(U)=1\n", 63 | "def ada_boost(X, Y, T):\n", 64 | " row, col = X.shape\n", 65 | " U = np.ones((row, 1))/row\n", 66 | " Xsort = np.sort(X, 0)\n", 67 | " thres = (np.r_[Xsort[0:1, :] - 0.1, Xsort[0:row - 1, :]] + Xsort) / 2\n", 68 | " theta = np.zeros((T,)); s = np.zeros((T,));\n", 69 | " index = np.zeros((T,)).astype(int); alpha = np.zeros((T,))\n", 70 | " err = np.zeros((T,))\n", 71 | " for i in range(T):\n", 72 | " err[i], theta[i], s[i], index[i] = decision_stump(X, Y, thres, U)\n", 73 | " yhat = s[i]*np.sign(X[:, index[i]:index[i]+1]-theta[i])\n", 74 | " delta = np.sqrt((1-err[i])/err[i])\n", 75 | " U[yhat==Y] /= delta\n", 76 | " U[yhat!=Y] *= delta\n", 77 | "# Q14运行时,解除注释\n", 78 | "# if i == T-1:\n", 79 | "# print('sum(U): ', np.sum(U))\n", 80 | " alpha[i] = np.log(delta)\n", 81 | " U /= np.sum(U)\n", 82 | "# Q15运行时,解除注释\n", 83 | "# print('最小的eta: ', np.min(err))\n", 84 | " return theta, index, s, alpha" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 5, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "# 预测函数\n", 94 | "def predict(X, theta, index, s, alpha):\n", 95 | " row, col = X.shape\n", 96 | " num = len(theta)\n", 97 | " ytemp = np.tile(s.reshape((1, num)), (row, 1))*np.sign(X[:, index]-theta.reshape((1, num)))\n", 98 | " yhat = np.sign(ytemp.dot(alpha.reshape(num, 1)))\n", 99 | " return yhat" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 6, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "name": "stderr", 109 | "output_type": "stream", 110 | "text": [ 111 | "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", 112 | " after removing the cwd from sys.path.\n" 113 | ] 114 | } 115 | ], 116 | "source": [ 117 | "# 导入数据\n", 118 | "X, Y = loadData('hw2_adaboost_train.dat')\n", 119 | "Xtest, Ytest = loadData('hw2_adaboost_test.dat')\n", 120 | "row, col = X.shape\n", 121 | "r, c = Xtest.shape" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 7, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "Ein(g1): 0.24\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "# Q12\n", 139 | "theta, index, s, alpha = ada_boost(X, Y, 1)\n", 140 | "Ypred = predict(X, theta, index, s, alpha)\n", 141 | "print('Ein(g1):', np.sum(Ypred!=Y)/row)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 8, 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "name": "stdout", 151 | "output_type": "stream", 152 | "text": [ 153 | "Ein(G): 0.0\n" 154 | ] 155 | } 156 | ], 157 | "source": [ 158 | "# Q13\n", 159 | "theta, index, s, alpha = ada_boost(X, Y, 300)\n", 160 | "Ypred = predict(X, theta, index, s, alpha)\n", 161 | "print('Ein(G):', np.sum(Ypred!=Y)/r)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 9, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "# Q14 --- 打开上述注释项,在运行一次\n", 171 | "theta, index, s, alpha = ada_boost(X, Y, 1)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 10, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "# Q16 \n", 181 | "theta, index, s, alpha = ada_boost(X, Y, 300)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 11, 187 | "metadata": {}, 188 | "outputs": [ 189 | { 190 | "name": "stdout", 191 | "output_type": "stream", 192 | "text": [ 193 | "Eout(g1): 0.29\n" 194 | ] 195 | } 196 | ], 197 | "source": [ 198 | "# Q17\n", 199 | "theta, index, s, alpha = ada_boost(X, Y, 1)\n", 200 | "Ypred = predict(Xtest, theta, index, s, alpha)\n", 201 | "print('Eout(g1):', np.sum(Ypred!=Ytest)/r)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 12, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "name": "stdout", 211 | "output_type": "stream", 212 | "text": [ 213 | "Eout(G): 0.132\n" 214 | ] 215 | } 216 | ], 217 | "source": [ 218 | "# Q18\n", 219 | "theta, index, s, alpha = ada_boost(X, Y, 300)\n", 220 | "Ypred = predict(Xtest, theta, index, s, alpha)\n", 221 | "print('Eout(G):', np.sum(Ypred!=Ytest)/r)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 13, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "# ----------- Q19-20 --------------\n", 231 | "# 获得对偶矩阵K\n", 232 | "def matK(X, X1, gamma):\n", 233 | " row, col =X.shape\n", 234 | " r, c = X1.shape\n", 235 | " K = np.zeros((row, r))\n", 236 | " for i in range(r):\n", 237 | " K[:, i] = np.sum((X-X1[i:i+1, :])**2, 1)\n", 238 | " K = np.exp(-gamma*K)\n", 239 | " return K" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 14, 245 | "metadata": {}, 246 | "outputs": [ 247 | { 248 | "name": "stderr", 249 | "output_type": "stream", 250 | "text": [ 251 | "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", 252 | " after removing the cwd from sys.path.\n" 253 | ] 254 | } 255 | ], 256 | "source": [ 257 | "# 加载数据\n", 258 | "X, Y = loadData('hw2_lssvm_all.dat')\n", 259 | "Xtrain = X[0:400, :]; Ytrain = Y[0:400, :]\n", 260 | "Xtest = X[400:, :]; Ytest = Y[400:, :]\n", 261 | "row, col = Xtest.shape" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 15, 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "name": "stdout", 271 | "output_type": "stream", 272 | "text": [ 273 | "最小的Ein: 0.0\n", 274 | "最小的Eout: 0.39\n" 275 | ] 276 | } 277 | ], 278 | "source": [ 279 | "# 测试\n", 280 | "gamma = [32, 2, 0.125]\n", 281 | "lamb = [0.001, 1, 1000]\n", 282 | "Ein = np.zeros((len(gamma), len(lamb)))\n", 283 | "Eout = np.zeros((len(gamma), len(lamb)))\n", 284 | "for i in range(len(gamma)):\n", 285 | " K = matK(Xtrain, Xtrain, gamma[i])\n", 286 | " K2 = matK(Xtrain, Xtest, gamma[i])\n", 287 | " for j in range(len(lamb)):\n", 288 | " beta = lin.pinv(lamb[j]*np.eye(400)+K).dot(Ytrain)\n", 289 | " yhat = np.sign(K.dot(beta))\n", 290 | " Ein[i, j] = np.sum(yhat != Ytrain)/400\n", 291 | " yhat2 = np.sign(K2.T.dot(beta))\n", 292 | " Eout[i, j] = np.sum(yhat2 != Ytest)/row\n", 293 | "print('最小的Ein: ', np.min(Ein))\n", 294 | "print('最小的Eout: ', np.min(Eout))" 295 | ] 296 | } 297 | ], 298 | "metadata": { 299 | "kernelspec": { 300 | "display_name": "Python 3", 301 | "language": "python", 302 | "name": "python3" 303 | }, 304 | "language_info": { 305 | "codemirror_mode": { 306 | "name": "ipython", 307 | "version": 3 308 | }, 309 | "file_extension": ".py", 310 | "mimetype": "text/x-python", 311 | "name": "python", 312 | "nbconvert_exporter": "python", 313 | "pygments_lexer": "ipython3", 314 | "version": "3.7.1" 315 | } 316 | }, 317 | "nbformat": 4, 318 | "nbformat_minor": 2 319 | } 320 | --------------------------------------------------------------------------------