├── Part1 ├── Least_squares.ipynb ├── Least_squares.py ├── Numpy_P_R_F1_etc.ipynb ├── Numpy_P_R_F1_etc.py ├── Sklearn_P_R_F1_ect.ipynb └── Sklearn_P_R_F1_ect.py ├── Part2 ├── .idea │ ├── Part2.iml │ ├── misc.xml │ ├── modules.xml │ ├── vcs.xml │ └── workspace.xml ├── LinearDiscriminantAnalysis_LDA.ipynb ├── LinearDiscriminantAnalysis_LDA.py ├── LinearRegression.ipynb ├── LinearRegression.py ├── LogisticRegression.ipynb └── LogisticRegression.py ├── Part3 ├── Naive_bayes.ipynb └── Naive_bayes.py ├── Part4 ├── DecionTree.ipynb └── DecionTree.py ├── Part5 ├── BoostTree.ipynb └── BoostTree.py ├── Part6 ├── GBDT.ipynb ├── GBDT.py ├── xgboost.ipynb └── xgboost.py ├── Part7 ├── SVM.ipynb └── SVM.py └── README.md /Part1/Least_squares.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | import numpy as np 8 | import scipy as sp 9 | from scipy.optimize import leastsq 10 | import matplotlib.pyplot as plt 11 | get_ipython().run_line_magic('matplotlib', 'inline') 12 | 13 | 14 | # In[2]: 15 | 16 | 17 | # 目标函数 18 | def real_func(x): 19 | return np.cos(2*np.pi*x) 20 | 21 | # 多项式 22 | def fit_func(p, x): 23 | f = np.poly1d(p) 24 | return f(x) 25 | 26 | # 残差 27 | def residuals_func(p, x, y): 28 | ret = fit_func(p, x) - y 29 | return ret 30 | 31 | 32 | # In[3]: 33 | 34 | 35 | # 十个点 36 | x = np.linspace(0, 1, 10) 37 | x_points = np.linspace(0, 1, 1000) 38 | # 加上正态分布噪音的目标函数的值 39 | y_ = real_func(x) 40 | y = [np.random.normal(0, 0.1) + y1 for y1 in y_] 41 | 42 | 43 | def fitting(M=0): 44 | """ 45 | M 为 多项式的次数 46 | """ 47 | # 随机初始化多项式参数 48 | p_init = np.random.rand(M + 1) 49 | # 最小二乘法 50 | p_lsq = leastsq(residuals_func, p_init, args=(x, y)) 51 | print('Fitting Parameters:', p_lsq[0]) 52 | 53 | # 可视化 54 | plt.plot(x_points, real_func(x_points), label='real') 55 | plt.plot(x_points, fit_func(p_lsq[0], x_points), label='fitted curve') 56 | plt.plot(x, y, 'bo', label='noise') 57 | plt.legend() 58 | return p_lsq 59 | 60 | 61 | # In[4]: 62 | 63 | 64 | # M=0 65 | p_lsq_0 = fitting(M=0) 66 | 67 | 68 | # In[5]: 69 | 70 | 71 | # M=1 72 | p_lsq_1 = fitting(M=1) 73 | 74 | 75 | # In[6]: 76 | 77 | 78 | # M=3 79 | p_lsq_3 = fitting(M=3) 80 | 81 | 82 | # In[7]: 83 | 84 | 85 | 86 | # M=9 87 | p_lsq_9 = fitting(M=9) 88 | 89 | 90 | # In[8]: 91 | 92 | 93 | regularization = 0.0001 94 | def residuals_func_regularization(p, x, y): 95 | ret = fit_func(p, x) - y 96 | ret = np.append(ret, 97 | np.sqrt(0.5 * regularization * np.square(p))) # L2范数作为正则化项 98 | return ret 99 | 100 | # 最小二乘法,加正则化项 101 | p_init = np.random.rand(9 + 1) 102 | p_lsq_regularization = leastsq( 103 | residuals_func_regularization, p_init, args=(x, y)) 104 | 105 | 106 | plt.plot(x_points, real_func(x_points), label='real') 107 | plt.plot(x_points, fit_func(p_lsq_9[0], x_points), label='fitted curve') 108 | plt.plot( 109 | x_points, 110 | fit_func(p_lsq_regularization[0], x_points), 111 | label='regularization') 112 | plt.plot(x, y, 'bo', label='noise') 113 | plt.legend() 114 | 115 | 116 | # In[ ]: 117 | 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /Part1/Numpy_P_R_F1_etc.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "['TN', 'TN', 'TN', 'TN', 'FP', 'FP', 'TN', 'FP', 'TN', 'TN', 'FP', 'TN', 'TP', 'FN', 'FN', 'FN', 'TP', 'FN', 'TP', 'FN']\n", 13 | "0.39999999999999997\n", 14 | "0.765625\n", 15 | "0.765625\n" 16 | ] 17 | } 18 | ], 19 | "source": [ 20 | "import numpy as np\n", 21 | "import pandas as pd\n", 22 | " \n", 23 | "class Score():\n", 24 | " def __init__(self,pre_score,rel_label,threshold,beta):\n", 25 | " self.tn = 0\n", 26 | " self.fn = 0\n", 27 | " self.fp = 0\n", 28 | " self.tp = 0\n", 29 | " self.pre_score = pre_score\n", 30 | " self.rel_label = rel_label\n", 31 | " self.threshold = threshold\n", 32 | " self.beta = beta\n", 33 | " list(map(self.__getCM_count,\n", 34 | " self.pre_score,\n", 35 | " self.rel_label))\n", 36 | " \n", 37 | " def __getCM(self,pre, rel):\n", 38 | " if (pre < self.threshold):\n", 39 | " if (rel == 0): return 'TN'\n", 40 | " if (rel == 1): return 'FN'\n", 41 | " if (pre >= self.threshold):\n", 42 | " if (rel == 0): return 'FP'\n", 43 | " if (rel == 1): return 'TP'\n", 44 | " \n", 45 | " def get_cm(self):\n", 46 | " return list(map(self.__getCM,\n", 47 | " self.pre_score,\n", 48 | " self.rel_label))\n", 49 | " \n", 50 | " def __getCM_count(self,pre, rel):\n", 51 | " if (pre < self.threshold):\n", 52 | " if (rel == 0): self.tn += 1\n", 53 | " if (rel == 1): self.fn += 1\n", 54 | " if (pre >= self.threshold):\n", 55 | " if (rel == 0): self.fp += 1\n", 56 | " if (rel == 1): self.tp += 1\n", 57 | " \n", 58 | " def get_f1(self):\n", 59 | " P = self.tp/(self.tp+self.fp)\n", 60 | " R = self.tp/(self.tp+self.fn)\n", 61 | " if(P == 0.0):\n", 62 | " return 0.0\n", 63 | " else:\n", 64 | " return (self.beta*self.beta+1)*P*R/(self.beta*self.beta*P+R)\n", 65 | " \n", 66 | " # 方法二 precision——分数精度\n", 67 | " def get_auc_by_count(self,precision=100):\n", 68 | " # 正样本数\n", 69 | " postive_len = sum(self.rel_label)\n", 70 | " # 负样本数\n", 71 | " negative_len = len(self.rel_label) - postive_len\n", 72 | " # 总对比数\n", 73 | " total_case = postive_len * negative_len\n", 74 | " # 正样本分数计数器(填0在range...)\n", 75 | " pos_histogram = [0 for _ in range(precision+1)]\n", 76 | " # 负样本分数计数器(填0在range...)\n", 77 | " neg_histogram = [0 for _ in range(precision+1)]\n", 78 | " # 分数放大\n", 79 | " bin_width = 1.0 / precision\n", 80 | " \n", 81 | " for i in range(len(self.rel_label)):\n", 82 | " nth_bin = int(self.pre_score[i] / bin_width)\n", 83 | " if self.rel_label[i] == 1:\n", 84 | " pos_histogram[nth_bin] += 1\n", 85 | " else:\n", 86 | " neg_histogram[nth_bin] += 1\n", 87 | " \n", 88 | " accumulated_neg = 0\n", 89 | " satisfied_pair = 0\n", 90 | " for i in range(precision+1):\n", 91 | " satisfied_pair += (pos_histogram[i] * accumulated_neg + pos_histogram[i] * neg_histogram[i] * 0.5)\n", 92 | " accumulated_neg += neg_histogram[i]\n", 93 | " return satisfied_pair / float(total_case)\n", 94 | " \n", 95 | " # 方法三\n", 96 | " def get_auc_by_rank(self):\n", 97 | " # 拼接排序\n", 98 | " df = pd.DataFrame({'pre_score':self.pre_score,'rel_label':self.rel_label})\n", 99 | " df = df.sort_values(by='pre_score',ascending=False).reset_index(drop=True)\n", 100 | " # 获取 n,N,M\n", 101 | " n = len(df)\n", 102 | " M = len(df[df['rel_label']==1])\n", 103 | " N = n - M\n", 104 | " # 初始化rank 和同值统计ank_tmp,count_all,count_p\n", 105 | " rank = 0.0\n", 106 | " rank_tmp,count_all,count_p = 0.0,0,0\n", 107 | " # 添加防止越界的一条不影响结果的记录\n", 108 | " df.loc[n] = [0,0]\n", 109 | " # 遍历一次\n", 110 | " for i in range(n):\n", 111 | " # 判断i+1是否与i同值,不同值则要考虑是否刚刚结束同值统计\n", 112 | " if(df['pre_score'][i+1] != df['pre_score'][i]):\n", 113 | " # 正样本\n", 114 | " if(df['rel_label'][i] == 1):\n", 115 | " # 计数不为0,刚刚结束同值统计\n", 116 | " if (count_all != 0):\n", 117 | " # 同值统计结果加在rank上,这里注意补回结束统计时漏掉的最后一条同值数据\n", 118 | " rank += (rank_tmp + n - i) * (count_p+1) / (count_all+1)\n", 119 | " rank_tmp, count_all, count_p = 0.0, 0, 0\n", 120 | " continue\n", 121 | " rank += (n-i)\n", 122 | " else:\n", 123 | " if (count_all != 0):\n", 124 | " rank += (rank_tmp + n - i) * (count_p) / (count_all+1)\n", 125 | " rank_tmp, count_all, count_p = 0.0, 0, 0\n", 126 | " continue\n", 127 | " else:\n", 128 | " rank_tmp += (n-i)\n", 129 | " count_all += 1\n", 130 | " if(df['rel_label'][i] == 1):\n", 131 | " count_p += 1\n", 132 | " return (rank-M*(1+M)/2)/(M*N)\n", 133 | " \n", 134 | " \n", 135 | "if __name__ == '__main__':\n", 136 | " learn_data_L2 = [0.2,0.3,0.4,0.35,0.6,0.55,0.2,0.57,0.3,0.15,0.77,0.33,0.9,0.49, 0.45,0.41, 0.66,0.43,0.7,0.4]\n", 137 | " learn_data_R2 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]\n", 138 | " learn_data2 = pd.DataFrame({'Learn': learn_data_L2, 'Real': learn_data_R2})\n", 139 | " \n", 140 | " score2 = Score(learn_data2['Learn'], learn_data2['Real'], 0.5, 1)\n", 141 | " \n", 142 | " print(score2.get_cm())\n", 143 | " print(score2.get_f1())\n", 144 | " print(score2.get_auc_by_count())\n", 145 | " print(score2.get_auc_by_rank())" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [] 154 | } 155 | ], 156 | "metadata": { 157 | "kernelspec": { 158 | "display_name": "Python 3", 159 | "language": "python", 160 | "name": "python3" 161 | }, 162 | "language_info": { 163 | "codemirror_mode": { 164 | "name": "ipython", 165 | "version": 3 166 | }, 167 | "file_extension": ".py", 168 | "mimetype": "text/x-python", 169 | "name": "python", 170 | "nbconvert_exporter": "python", 171 | "pygments_lexer": "ipython3", 172 | "version": "3.7.3" 173 | } 174 | }, 175 | "nbformat": 4, 176 | "nbformat_minor": 2 177 | } 178 | -------------------------------------------------------------------------------- /Part1/Numpy_P_R_F1_etc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | import numpy as np 8 | import pandas as pd 9 | 10 | class Score(): 11 | def __init__(self,pre_score,rel_label,threshold,beta): 12 | self.tn = 0 13 | self.fn = 0 14 | self.fp = 0 15 | self.tp = 0 16 | self.pre_score = pre_score 17 | self.rel_label = rel_label 18 | self.threshold = threshold 19 | self.beta = beta 20 | list(map(self.__getCM_count, 21 | self.pre_score, 22 | self.rel_label)) 23 | 24 | def __getCM(self,pre, rel): 25 | if (pre < self.threshold): 26 | if (rel == 0): return 'TN' 27 | if (rel == 1): return 'FN' 28 | if (pre >= self.threshold): 29 | if (rel == 0): return 'FP' 30 | if (rel == 1): return 'TP' 31 | 32 | def get_cm(self): 33 | return list(map(self.__getCM, 34 | self.pre_score, 35 | self.rel_label)) 36 | 37 | def __getCM_count(self,pre, rel): 38 | if (pre < self.threshold): 39 | if (rel == 0): self.tn += 1 40 | if (rel == 1): self.fn += 1 41 | if (pre >= self.threshold): 42 | if (rel == 0): self.fp += 1 43 | if (rel == 1): self.tp += 1 44 | 45 | def get_f1(self): 46 | P = self.tp/(self.tp+self.fp) 47 | R = self.tp/(self.tp+self.fn) 48 | if(P == 0.0): 49 | return 0.0 50 | else: 51 | return (self.beta*self.beta+1)*P*R/(self.beta*self.beta*P+R) 52 | 53 | # 方法二 precision——分数精度 54 | def get_auc_by_count(self,precision=100): 55 | # 正样本数 56 | postive_len = sum(self.rel_label) 57 | # 负样本数 58 | negative_len = len(self.rel_label) - postive_len 59 | # 总对比数 60 | total_case = postive_len * negative_len 61 | # 正样本分数计数器(填0在range...) 62 | pos_histogram = [0 for _ in range(precision+1)] 63 | # 负样本分数计数器(填0在range...) 64 | neg_histogram = [0 for _ in range(precision+1)] 65 | # 分数放大 66 | bin_width = 1.0 / precision 67 | 68 | for i in range(len(self.rel_label)): 69 | nth_bin = int(self.pre_score[i] / bin_width) 70 | if self.rel_label[i] == 1: 71 | pos_histogram[nth_bin] += 1 72 | else: 73 | neg_histogram[nth_bin] += 1 74 | 75 | accumulated_neg = 0 76 | satisfied_pair = 0 77 | for i in range(precision+1): 78 | satisfied_pair += (pos_histogram[i] * accumulated_neg + pos_histogram[i] * neg_histogram[i] * 0.5) 79 | accumulated_neg += neg_histogram[i] 80 | return satisfied_pair / float(total_case) 81 | 82 | # 方法三 83 | def get_auc_by_rank(self): 84 | # 拼接排序 85 | df = pd.DataFrame({'pre_score':self.pre_score,'rel_label':self.rel_label}) 86 | df = df.sort_values(by='pre_score',ascending=False).reset_index(drop=True) 87 | # 获取 n,N,M 88 | n = len(df) 89 | M = len(df[df['rel_label']==1]) 90 | N = n - M 91 | # 初始化rank 和同值统计ank_tmp,count_all,count_p 92 | rank = 0.0 93 | rank_tmp,count_all,count_p = 0.0,0,0 94 | # 添加防止越界的一条不影响结果的记录 95 | df.loc[n] = [0,0] 96 | # 遍历一次 97 | for i in range(n): 98 | # 判断i+1是否与i同值,不同值则要考虑是否刚刚结束同值统计 99 | if(df['pre_score'][i+1] != df['pre_score'][i]): 100 | # 正样本 101 | if(df['rel_label'][i] == 1): 102 | # 计数不为0,刚刚结束同值统计 103 | if (count_all != 0): 104 | # 同值统计结果加在rank上,这里注意补回结束统计时漏掉的最后一条同值数据 105 | rank += (rank_tmp + n - i) * (count_p+1) / (count_all+1) 106 | rank_tmp, count_all, count_p = 0.0, 0, 0 107 | continue 108 | rank += (n-i) 109 | else: 110 | if (count_all != 0): 111 | rank += (rank_tmp + n - i) * (count_p) / (count_all+1) 112 | rank_tmp, count_all, count_p = 0.0, 0, 0 113 | continue 114 | else: 115 | rank_tmp += (n-i) 116 | count_all += 1 117 | if(df['rel_label'][i] == 1): 118 | count_p += 1 119 | return (rank-M*(1+M)/2)/(M*N) 120 | 121 | 122 | if __name__ == '__main__': 123 | learn_data_L2 = [0.2,0.3,0.4,0.35,0.6,0.55,0.2,0.57,0.3,0.15,0.77,0.33,0.9,0.49, 0.45,0.41, 0.66,0.43,0.7,0.4] 124 | learn_data_R2 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] 125 | learn_data2 = pd.DataFrame({'Learn': learn_data_L2, 'Real': learn_data_R2}) 126 | 127 | score2 = Score(learn_data2['Learn'], learn_data2['Real'], 0.5, 1) 128 | 129 | print(score2.get_cm()) 130 | print(score2.get_f1()) 131 | print(score2.get_auc_by_count()) 132 | print(score2.get_auc_by_rank()) 133 | 134 | 135 | # In[ ]: 136 | 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /Part1/Sklearn_P_R_F1_ect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | # 准确率 8 | import numpy as np 9 | from sklearn.metrics import accuracy_score 10 | y_pred = [0, 2, 1, 3,9,9,8,5,8] 11 | y_true = [0, 1, 2, 3,2,6,3,5,9] 12 | 13 | accuracy_score(y_true, y_pred) 14 | 15 | 16 | # In[2]: 17 | 18 | 19 | accuracy_score(y_true, y_pred, normalize=False) # 类似海明距离,每个类别求准确后,再求微平均 20 | 21 | 22 | # In[4]: 23 | 24 | 25 | import warnings 26 | warnings.filterwarnings('ignore') 27 | from sklearn import metrics 28 | print(metrics.precision_score(y_true, y_pred, average='micro')) # 微平均,精确率 29 | print(metrics.precision_score(y_true, y_pred, average='macro')) # 宏平均,精确率 30 | print(metrics.precision_score(y_true, y_pred, labels=[0, 1, 2, 3], average='macro')) # 指定特定分类标签的精确率 31 | 32 | 33 | # In[5]: 34 | 35 | 36 | #召回率 37 | print(metrics.recall_score(y_true, y_pred, average='micro')) 38 | print(metrics.recall_score(y_true, y_pred, average='macro')) 39 | #F1 40 | print(metrics.f1_score(y_true, y_pred, average='weighted')) 41 | 42 | 43 | # In[9]: 44 | 45 | 46 | # 混淆矩阵 47 | from sklearn.metrics import confusion_matrix 48 | print(confusion_matrix(y_true, y_pred)) 49 | 50 | # 分类报告:precision/recall/fi-score/均值/分类个数 51 | from sklearn.metrics import classification_report 52 | y_true = [0, 1, 2, 2, 0] 53 | y_pred = [0, 0, 2, 2, 0] 54 | target_names = ['class 0', 'class 1', 'class 2'] 55 | print(classification_report(y_true, y_pred, target_names=target_names)) 56 | 57 | 58 | # In[10]: 59 | 60 | 61 | #ROC与AUC值 62 | import numpy as np 63 | from sklearn.metrics import roc_auc_score 64 | y_true = np.array([0, 0, 1, 1]) 65 | y_scores = np.array([0.1, 0.4, 0.35, 0.8]) 66 | roc_auc_score(y_true, y_scores) 67 | 68 | 69 | # In[13]: 70 | 71 | 72 | #-----------------以iris数据集为例(线性核的支持向量机)--------------------- 73 | import numpy as np 74 | import matplotlib.pyplot as plt 75 | from itertools import cycle 76 | 77 | from sklearn import svm, datasets 78 | from sklearn.metrics import roc_curve, auc 79 | from sklearn.model_selection import train_test_split 80 | from sklearn.preprocessing import label_binarize 81 | from sklearn.multiclass import OneVsRestClassifier 82 | from scipy import interp 83 | 84 | # Import some data to play with 85 | iris = datasets.load_iris() 86 | X = iris.data 87 | y = iris.target 88 | 89 | # Binarize the output 90 | y = label_binarize(y, classes=[0, 1, 2]) 91 | n_classes = y.shape[1] 92 | 93 | # Add noisy features to make the problem harder 94 | random_state = np.random.RandomState(0) 95 | n_samples, n_features = X.shape 96 | X = np.c_[X, random_state.randn(n_samples, 200 * n_features)] 97 | 98 | # shuffle and split training and test sets 99 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, 100 | random_state=0) 101 | 102 | # Learn to predict each class against the other 103 | classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, 104 | random_state=random_state)) 105 | y_score = classifier.fit(X_train, y_train).decision_function(X_test) 106 | 107 | # Compute ROC curve and ROC area for each class 108 | fpr = dict() 109 | tpr = dict() 110 | roc_auc = dict() 111 | for i in range(n_classes): 112 | fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i]) 113 | roc_auc[i] = auc(fpr[i], tpr[i]) 114 | 115 | # Compute micro-average ROC curve and ROC area 116 | fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel()) 117 | roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) 118 | 119 | plt.figure() 120 | lw = 2 121 | plt.plot(fpr[2], tpr[2], color='darkorange', 122 | lw=lw, label='ROC curve (area = %0.2f)' % roc_auc[2]) 123 | plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') 124 | plt.xlim([0.0, 1.0]) 125 | plt.ylim([0.0, 1.05]) 126 | plt.xlabel('False Positive Rate') 127 | plt.ylabel('True Positive Rate') 128 | plt.title('Receiver operating characteristic example') 129 | plt.legend(loc="lower right") 130 | plt.show() 131 | 132 | 133 | # In[14]: 134 | 135 | 136 | # Compute macro-average ROC curve and ROC area 137 | 138 | # First aggregate all false positive rates 139 | all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) 140 | 141 | # Then interpolate all ROC curves at this points 142 | mean_tpr = np.zeros_like(all_fpr) 143 | for i in range(n_classes): 144 | mean_tpr += interp(all_fpr, fpr[i], tpr[i]) 145 | 146 | # Finally average it and compute AUC 147 | mean_tpr /= n_classes 148 | 149 | fpr["macro"] = all_fpr 150 | tpr["macro"] = mean_tpr 151 | roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) 152 | 153 | # Plot all ROC curves 154 | plt.figure() 155 | plt.plot(fpr["micro"], tpr["micro"], 156 | label='micro-average ROC curve (area = {0:0.2f})' 157 | ''.format(roc_auc["micro"]), 158 | color='deeppink', linestyle=':', linewidth=4) 159 | 160 | plt.plot(fpr["macro"], tpr["macro"], 161 | label='macro-average ROC curve (area = {0:0.2f})' 162 | ''.format(roc_auc["macro"]), 163 | color='navy', linestyle=':', linewidth=4) 164 | 165 | colors = cycle(['aqua', 'darkorange', 'cornflowerblue']) 166 | for i, color in zip(range(n_classes), colors): 167 | plt.plot(fpr[i], tpr[i], color=color, lw=lw, 168 | label='ROC curve of class {0} (area = {1:0.2f})' 169 | ''.format(i, roc_auc[i])) 170 | 171 | plt.plot([0, 1], [0, 1], 'k--', lw=lw) 172 | plt.xlim([0.0, 1.0]) 173 | plt.ylim([0.0, 1.05]) 174 | plt.xlabel('False Positive Rate') 175 | plt.ylabel('True Positive Rate') 176 | plt.title('Some extension of Receiver operating characteristic to multi-class') 177 | plt.legend(loc="lower right") 178 | plt.show() 179 | 180 | 181 | # In[ ]: 182 | 183 | 184 | 185 | 186 | -------------------------------------------------------------------------------- /Part2/.idea/Part2.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /Part2/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | -------------------------------------------------------------------------------- /Part2/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Part2/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Part2/.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 51 | 52 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 |