├── .gitattributes ├── Boosting--LightGBM ├── lgb-python │ ├── 1.lgb_model应用案例.py │ └── 2.lightgbm调参案例.py ├── lgb-sklearn │ ├── 1-lightgbm基础版.py │ └── 2-lightgbm进阶版(网格搜索).py └── lightgbm参数.xls ├── Boosting--XGBoost ├── XGBoost调参-1.py ├── XGBoost调参.py ├── sklearn的GridSearchCV和StratifiedKFold对xgboost模型参数调优.py ├── xgboost基本用法.py ├── xgboost模块的XGBClassifier函数.py ├── xgboost模块的fit函数.py ├── xgboost模块的plot_importance函数.py └── xgboost调参-2.py ├── GridSearchCV_example └── GridSearchCV_example.py ├── KNN ├── 1.KNN实现 │ └── KNN.py └── 2.KNN应用 │ ├── datingTestSet.txt │ └── kNN_example1_machine_learning_in_action.py ├── README.md └── metrics └── auc_example.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /Boosting--LightGBM/lgb-python/1.lgb_model应用案例.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Mar 31 21:19:09 2018 5 | 6 | @author: hello4720 7 | """ 8 | import numpy as np 9 | import pandas as pd 10 | import lightgbm as lgb 11 | from sklearn import metrics 12 | from sklearn.model_selection import train_test_split 13 | 14 | ### 读取数据 15 | print("载入数据") 16 | dataset1 = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data1.csv') 17 | dataset2 = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data2.csv') 18 | dataset3 = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data3.csv') 19 | dataset4 = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data4.csv') 20 | dataset5 = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data5.csv') 21 | 22 | dataset1.drop_duplicates(inplace=True) 23 | dataset2.drop_duplicates(inplace=True) 24 | dataset3.drop_duplicates(inplace=True) 25 | dataset4.drop_duplicates(inplace=True) 26 | dataset5.drop_duplicates(inplace=True) 27 | 28 | ### 数据合并 29 | print("数据合并") 30 | trains = pd.concat([dataset1,dataset2],axis=0) 31 | trains = pd.concat([trains,dataset3],axis=0) 32 | trains = pd.concat([trains,dataset4],axis=0) 33 | 34 | online_test = dataset5 35 | 36 | ### 数据拆分 37 | print("数据拆分") 38 | train_xy,offline_test = train_test_split(trains, test_size = 0.2,random_state=21) 39 | train,val = train_test_split(train_xy, test_size = 0.2,random_state=21) 40 | 41 | print("训练集") 42 | y = train.is_trade # 训练集标签 43 | X = train.drop(['instance_id','is_trade'],axis=1) # 训练集特征矩阵 44 | 45 | print("验证集") 46 | val_y = val.is_trade # 验证集标签 47 | val_X = val.drop(['instance_id','is_trade'],axis=1) # 验证集特征矩阵 48 | 49 | print("测试集") 50 | offline_test_X=offline_test.drop(['instance_id','is_trade'],axis=1) # 线下测试特征矩阵 51 | online_test_X=online_test.drop(['instance_id'],axis=1) # 线上测试特征矩阵 52 | 53 | ### 数据转换 54 | lgb_train = lgb.Dataset(X, y, free_raw_data=False) 55 | lgb_eval = lgb.Dataset(val_X, val_y, reference=lgb_train,free_raw_data=False) 56 | 57 | ### 开始训练 58 | print('设置参数') 59 | params = { 60 | 'boosting_type': 'gbdt', 61 | 'boosting': 'dart', 62 | 'objective': 'binary', 63 | 'metric': 'binary_logloss', 64 | 65 | 'learning_rate': 0.01, 66 | 'num_leaves':25, 67 | 'max_depth':3, 68 | 69 | 'max_bin':10, 70 | 'min_data_in_leaf':8, 71 | 72 | 'feature_fraction': 0.6, 73 | 'bagging_fraction': 1, 74 | 'bagging_freq':0, 75 | 76 | 'lambda_l1': 0, 77 | 'lambda_l2': 0, 78 | 'min_split_gain': 0 79 | } 80 | 81 | print("开始训练") 82 | gbm = lgb.train(params, # 参数字典 83 | lgb_train, # 训练集 84 | num_boost_round=2000, # 迭代次数 85 | valid_sets=lgb_eval, # 验证集 86 | early_stopping_rounds=30) # 早停系数 87 | ### 线下预测 88 | print ("线下预测") 89 | preds_offline = gbm.predict(offline_test_X, num_iteration=gbm.best_iteration) # 输出概率 90 | offline=offline_test[['instance_id','is_trade']] 91 | offline['preds']=preds_offline 92 | offline.is_trade = offline['is_trade'].astype(np.float64) 93 | print('log_loss', metrics.log_loss(offline.is_trade, offline.preds)) 94 | 95 | ### 线上预测 96 | print("线上预测") 97 | preds_online = gbm.predict(online_test_X, num_iteration=gbm.best_iteration) # 输出概率 98 | online=online_test[['instance_id']] 99 | online['preds']=preds_online 100 | online.rename(columns={'preds':'predicted_score'},inplace=True) 101 | online.to_csv("./data/20180405.txt",index=None,sep=' ') 102 | 103 | ### 保存模型 104 | from sklearn.externals import joblib 105 | joblib.dump(gbm,'gbm.pkl') 106 | 107 | ### 特征选择 108 | df = pd.DataFrame(X.columns.tolist(), columns=['feature']) 109 | df['importance']=list(gbm.feature_importance()) 110 | df = df.sort_values(by='importance',ascending=False) 111 | df.to_csv("./data/feature_score_20180405.csv",index=None,encoding='gbk') -------------------------------------------------------------------------------- /Boosting--LightGBM/lgb-python/2.lightgbm调参案例.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | # 作者:wanglei5205 4 | # 邮箱:wanglei5205@126.com 5 | # 博客:http://cnblogs.com/wanglei5205 6 | # github:http://github.com/wanglei5205 7 | """ 8 | ### 导入模块 9 | import numpy as np 10 | import pandas as pd 11 | import lightgbm as lgb 12 | from sklearn import metrics 13 | 14 | ### 载入数据 15 | print('载入数据') 16 | dataset1 = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data1.csv') 17 | dataset2 = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data2.csv') 18 | dataset3 = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data3.csv') 19 | dataset4 = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data4.csv') 20 | dataset5 = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data5.csv') 21 | 22 | print('数据去重') 23 | dataset1.drop_duplicates(inplace=True) 24 | dataset2.drop_duplicates(inplace=True) 25 | dataset3.drop_duplicates(inplace=True) 26 | dataset4.drop_duplicates(inplace=True) 27 | dataset5.drop_duplicates(inplace=True) 28 | 29 | print('数据合并') 30 | trains = pd.concat([dataset1,dataset2],axis=0) 31 | trains = pd.concat([trains,dataset3],axis=0) 32 | trains = pd.concat([trains,dataset4],axis=0) 33 | 34 | online_test = dataset5 35 | 36 | ### 数据拆分(训练集+验证集+测试集) 37 | print('数据拆分') 38 | from sklearn.model_selection import train_test_split 39 | train_xy,offline_test = train_test_split(trains,test_size = 0.2,random_state=21) 40 | train,val = train_test_split(train_xy,test_size = 0.2,random_state=21) 41 | 42 | # 训练集 43 | y_train = train.is_trade # 训练集标签 44 | X_train = train.drop(['instance_id','is_trade'],axis=1) # 训练集特征矩阵 45 | 46 | # 验证集 47 | y_val = val.is_trade # 验证集标签 48 | X_val = val.drop(['instance_id','is_trade'],axis=1) # 验证集特征矩阵 49 | 50 | # 测试集 51 | offline_test_X = offline_test.drop(['instance_id','is_trade'],axis=1) # 线下测试特征矩阵 52 | online_test_X = online_test.drop(['instance_id'],axis=1) # 线上测试特征矩阵 53 | 54 | ### 数据转换 55 | print('数据转换') 56 | lgb_train = lgb.Dataset(X_train, y_train, free_raw_data=False) 57 | lgb_eval = lgb.Dataset(X_val, y_val, reference=lgb_train,free_raw_data=False) 58 | 59 | ### 设置初始参数--不含交叉验证参数 60 | print('设置参数') 61 | params = { 62 | 'boosting_type': 'gbdt', 63 | 'objective': 'binary', 64 | 'metric': 'binary_logloss', 65 | } 66 | 67 | ### 交叉验证(调参) 68 | print('交叉验证') 69 | min_merror = float('Inf') 70 | best_params = {} 71 | 72 | # 准确率 73 | print("调参1:提高准确率") 74 | for num_leaves in range(20,200,5): 75 | for max_depth in range(3,8,1): 76 | params['num_leaves'] = num_leaves 77 | params['max_depth'] = max_depth 78 | 79 | cv_results = lgb.cv( 80 | params, 81 | lgb_train, 82 | seed=2018, 83 | nfold=3, 84 | metrics=['binary_error'], 85 | early_stopping_rounds=10, 86 | verbose_eval=True 87 | ) 88 | 89 | mean_merror = pd.Series(cv_results['binary_error-mean']).min() 90 | boost_rounds = pd.Series(cv_results['binary_error-mean']).argmin() 91 | 92 | if mean_merror < min_merror: 93 | min_merror = mean_merror 94 | best_params['num_leaves'] = num_leaves 95 | best_params['max_depth'] = max_depth 96 | 97 | params['num_leaves'] = best_params['num_leaves'] 98 | params['max_depth'] = best_params['max_depth'] 99 | 100 | # 过拟合 101 | print("调参2:降低过拟合") 102 | for max_bin in range(1,255,5): 103 | for min_data_in_leaf in range(10,200,5): 104 | params['max_bin'] = max_bin 105 | params['min_data_in_leaf'] = min_data_in_leaf 106 | 107 | cv_results = lgb.cv( 108 | params, 109 | lgb_train, 110 | seed=42, 111 | nfold=3, 112 | metrics=['binary_error'], 113 | early_stopping_rounds=3, 114 | verbose_eval=True 115 | ) 116 | 117 | mean_merror = pd.Series(cv_results['binary_error-mean']).min() 118 | boost_rounds = pd.Series(cv_results['binary_error-mean']).argmin() 119 | 120 | if mean_merror < min_merror: 121 | min_merror = mean_merror 122 | best_params['max_bin']= max_bin 123 | best_params['min_data_in_leaf'] = min_data_in_leaf 124 | 125 | params['min_data_in_leaf'] = best_params['min_data_in_leaf'] 126 | params['max_bin'] = best_params['max_bin'] 127 | 128 | print("调参3:降低过拟合") 129 | for feature_fraction in [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]: 130 | for bagging_fraction in [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]: 131 | for bagging_freq in range(0,50,5): 132 | params['feature_fraction'] = feature_fraction 133 | params['bagging_fraction'] = bagging_fraction 134 | params['bagging_freq'] = bagging_freq 135 | 136 | cv_results = lgb.cv( 137 | params, 138 | lgb_train, 139 | seed=42, 140 | nfold=3, 141 | metrics=['binary_error'], 142 | early_stopping_rounds=3, 143 | verbose_eval=True 144 | ) 145 | 146 | mean_merror = pd.Series(cv_results['binary_error-mean']).min() 147 | boost_rounds = pd.Series(cv_results['binary_error-mean']).argmin() 148 | 149 | if mean_merror < min_merror: 150 | min_merror = mean_merror 151 | best_params['feature_fraction'] = feature_fraction 152 | best_params['bagging_fraction'] = bagging_fraction 153 | best_params['bagging_freq'] = bagging_freq 154 | 155 | params['feature_fraction'] = best_params['feature_fraction'] 156 | params['bagging_fraction'] = best_params['bagging_fraction'] 157 | params['bagging_freq'] = best_params['bagging_freq'] 158 | 159 | print("调参4:降低过拟合") 160 | for lambda_l1 in [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]: 161 | for lambda_l2 in [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]: 162 | for min_split_gain in [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]: 163 | params['lambda_l1'] = lambda_l1 164 | params['lambda_l2'] = lambda_l2 165 | params['min_split_gain'] = min_split_gain 166 | 167 | cv_results = lgb.cv( 168 | params, 169 | lgb_train, 170 | seed=42, 171 | nfold=3, 172 | metrics=['binary_error'], 173 | early_stopping_rounds=3, 174 | verbose_eval=True 175 | ) 176 | 177 | mean_merror = pd.Series(cv_results['binary_error-mean']).min() 178 | boost_rounds = pd.Series(cv_results['binary_error-mean']).argmin() 179 | 180 | if mean_merror < min_merror: 181 | min_merror = mean_merror 182 | best_params['lambda_l1'] = lambda_l1 183 | best_params['lambda_l2'] = lambda_l2 184 | best_params['min_split_gain'] = min_split_gain 185 | 186 | params['lambda_l1'] = best_params['lambda_l1'] 187 | params['lambda_l2'] = best_params['lambda_l2'] 188 | params['min_split_gain'] = best_params['min_split_gain'] 189 | 190 | 191 | print(best_params) 192 | 193 | ### 训练 194 | params['learning_rate']=0.01 195 | lgb.train( 196 | params, # 参数字典 197 | lgb_train, # 训练集 198 | valid_sets=lgb_eval, # 验证集 199 | num_boost_round=2000, # 迭代次数 200 | early_stopping_rounds=50 # 早停次数 201 | ) 202 | 203 | ### 线下预测 204 | print ("线下预测") 205 | preds_offline = lgb.predict(offline_test_X, num_iteration=lgb.best_iteration) # 输出概率 206 | offline=offline_test[['instance_id','is_trade']] 207 | offline['preds']=preds_offline 208 | offline.is_trade = offline['is_trade'].astype(np.float64) 209 | print('log_loss', metrics.log_loss(offline.is_trade, offline.preds)) 210 | 211 | ### 线上预测 212 | print("线上预测") 213 | preds_online = lgb.predict(online_test_X, num_iteration=lgb.best_iteration) # 输出概率 214 | online=online_test[['instance_id']] 215 | online['preds']=preds_online 216 | online.rename(columns={'preds':'predicted_score'},inplace=True) # 更改列名 217 | online.to_csv("./data/20180405.txt",index=None,sep=' ') # 保存结果 218 | 219 | ### 保存模型 220 | from sklearn.externals import joblib 221 | joblib.dump(lgb,'lgb.pkl') 222 | 223 | ### 特征选择 224 | df = pd.DataFrame(X_train.columns.tolist(), columns=['feature']) 225 | df['importance']=list(lgb.feature_importance()) # 特征分数 226 | df = df.sort_values(by='importance',ascending=False) # 特征排序 227 | df.to_csv("./data/feature_score_20180331.csv",index=None,encoding='gbk') # 保存分数 228 | -------------------------------------------------------------------------------- /Boosting--LightGBM/lgb-sklearn/1-lightgbm基础版.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | # 作者:wanglei5205 4 | # 邮箱:wanglei5205@126.com 5 | # 博客:http://cnblogs.com/wanglei5205 6 | # github:http://github.com/wanglei5205 7 | """ 8 | import numpy as np 9 | import pandas as pd 10 | from lightgbm import LGBMClassifier 11 | from sklearn.metrics import log_loss 12 | 13 | # 载入数据 14 | print('载入数据') 15 | dataset1 = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data1.csv') 16 | dataset2 = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data2.csv') 17 | dataset3 = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data3.csv') 18 | dataset4 = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data4.csv') 19 | dataset5 = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data5.csv') 20 | 21 | print('删除重复数据') 22 | dataset1.drop_duplicates(inplace=True) 23 | dataset2.drop_duplicates(inplace=True) 24 | dataset3.drop_duplicates(inplace=True) 25 | dataset4.drop_duplicates(inplace=True) 26 | dataset5.drop_duplicates(inplace=True) 27 | 28 | print('数据合并') 29 | trains = pd.concat([dataset1,dataset2],axis=0) 30 | trains = pd.concat([trains,dataset3],axis=0) 31 | trains = pd.concat([trains,dataset4],axis=0) 32 | 33 | online_test = dataset5 34 | 35 | # 数据拆分(训练集+验证集+测试集) 36 | print('数据拆分') 37 | from sklearn.model_selection import train_test_split 38 | train_xy,offline_test = train_test_split(trains, test_size = 0.2,random_state=21) # 训练集和测试集 39 | train,val = train_test_split(train_xy, test_size = 0.2,random_state=21) # 训练集和验证集 40 | 41 | y = train.is_trade # 训练集标签 42 | X = train.drop(['instance_id','is_trade'],axis=1) # 训练集特征矩阵 43 | 44 | val_y = val.is_trade # 验证集标签 45 | val_X = val.drop(['instance_id','is_trade'],axis=1) # 验证集特征矩阵 46 | 47 | offline_test_X=offline_test.drop(['instance_id','is_trade'],axis=1) # 线下测试特征矩阵 48 | online_test_X=online_test.drop(['instance_id'],axis=1) # 线上测试特征矩阵 49 | 50 | # 建立模型 51 | print('建立模型') 52 | gbm = LGBMClassifier( 53 | objective='binary', 54 | n_estimators=1000, 55 | subsample=0.8, 56 | subsample_freq=10, 57 | colsample_bytree=0.8, 58 | learning_rate = 0.01, 59 | reg_alpha=0.8 60 | 61 | ) 62 | 63 | # 训练模型 64 | print('训练模型') 65 | gbm.fit(X, # 训练集--特征矩阵 66 | y, # 训练集--标签 67 | eval_set=[(val_X, val_y)], # 验证集 68 | eval_metric='logloss', # 评估标准 69 | early_stopping_rounds=50, # 早停系数 70 | verbose = 2) # 显示中间过程(0/1/2) 71 | 72 | print('best_score:',gbm.best_score) 73 | print('best_iteration:',gbm.best_iteration) 74 | 75 | # 线下测评 76 | print('线下预测') 77 | preds_offline = gbm.predict(offline_test_X, num_iteration=gbm.best_iteration) # 线下 78 | offline=offline_test[['instance_id','is_trade']] 79 | offline['preds']=preds_offline 80 | 81 | offline.is_trade = offline['is_trade'].astype(np.float64) 82 | print('线下分数log_loss', log_loss(offline.is_trade, offline.preds)) 83 | 84 | # 线上测评 85 | print('线上预测') 86 | preds_online = gbm.predict(online_test_X,num_iteration=gbm.best_iteration) # 线上 87 | online=online_test[['instance_id']] 88 | online['preds']=preds_online 89 | 90 | print('保存结果') 91 | online.rename(columns={'preds':'predicted_score'},inplace=True) 92 | online.to_csv("G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/20180331.txt",index=None,sep=' ') 93 | 94 | # 特征选择 95 | df = pd.DataFrame(X.columns.tolist(), columns=['feature']) 96 | df['importance']=list(gbm.feature_importances_) 97 | df = df.sort_values(by='importance',ascending=False) 98 | df.to_csv("G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/feature_score_20180331.csv",index=None,encoding='gbk') -------------------------------------------------------------------------------- /Boosting--LightGBM/lgb-sklearn/2-lightgbm进阶版(网格搜索).py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | # 作者:wanglei5205 4 | # 邮箱:wanglei5205@126.com 5 | # 博客:http://cnblogs.com/wanglei5205 6 | # github:http://github.com/wanglei5205 7 | """ 8 | import pandas as pd 9 | from lightgbm import LGBMClassifier 10 | from sklearn.metrics import log_loss 11 | from sklearn.model_selection import GridSearchCV 12 | 13 | ### 载入数据 14 | print('载入数据') 15 | df_train = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data1.csv') 16 | df_test = pd.read_csv('G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data2.csv') 17 | 18 | y_train = df_train['is_trade'] 19 | y_test = df_test['is_trade'] 20 | X_train=df_train.drop(['is_trade','instance_id],axis=1) 21 | X_test=df_test.drop(['is_trade','instance_id'],axis=1) 22 | 23 | ### 建立模型 24 | print('# 建立模型') 25 | estimator = LGBMClassifier( 26 | objective='binary', # 二分类问题 27 | num_leaves=31, # 默认31, 叶子个数 28 | learning_rate=0.3, # 默认0.1,学习率 29 | n_estimators=67, # 默认10,决策树个数 30 | subsample_for_bin=1 , # 31 | subsample=1, # 默认1, 32 | metric = 'logloss', # 评估指标 33 | silent =True, # 输出中间过程 34 | reg_alpha=0.0, # L1正则化系数 35 | min_split_gain=0.0, # 默认0,分裂最小权重 36 | early_stopping_rounds=50 # 提前终止训练 37 | ) 38 | 39 | ### 网格搜素 40 | print('# 网格搜索') 41 | param_grid = { 42 | 'num_leaves':list(range(25,80,5)), 43 | 'min_child_weight':list(range(2,10,1)) 44 | 45 | } 46 | 47 | gs = GridSearchCV(estimator, # 分类器 48 | param_grid, # 参数字典 49 | scoring='neg_log_loss', # 评价标准 50 | cv=3, # 三折交叉验证 51 | verbose = 2, # 打印全部中间过程(0/1/2) 52 | n_jobs=1) # 并行计算CPU个数 53 | 54 | gs.fit(X_train,y_train) 55 | print('最佳参数:',gs.best_params_) 56 | print('最优分数:',gs.best_score_) 57 | 58 | ### 训练模型 59 | print('# 训练模型') 60 | lgbm = gs.best_estimator_ # 最优分类器 61 | lgbm.fit(X_train,y_train) # 模型训练 62 | 63 | # 模型属性 64 | print('best_score:',lgbm.best_score) # 最优分数 65 | print('best_iteration:',lgbm.best_iteration) # 最佳迭代器个数(早停系数) 66 | 67 | ### 模型预测 68 | print('# 模型预测') 69 | y_pred = lgbm.predict(X_test, num_iteration=lgbm.best_iteration) 70 | 71 | ### 性能评估 72 | print('log_loss',log_loss(y_test, y_pred)) 73 | 74 | ### 特征选择 75 | #print('特征选择') 76 | #print('打印Feature importances:', list(lgbm.feature_importances_)) -------------------------------------------------------------------------------- /Boosting--LightGBM/lightgbm参数.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wanglei5205/Machine_learning/a5a29faf63355addeb76fd426c8df8c47caa2e52/Boosting--LightGBM/lightgbm参数.xls -------------------------------------------------------------------------------- /Boosting--XGBoost/XGBoost调参-1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | # 作者:wanglei5205 4 | # 邮箱:wanglei5205@126.com 5 | # 博客:http://cnblogs.com/wanglei5205 6 | # github:http://github.com/wanglei5205 7 | """ 8 | ### 载入库 9 | import numpy as np 10 | import pandas as pd 11 | import matplotlib.pyplot as plt 12 | 13 | ### 载入数据 14 | from sklearn.datasets import load_digits 15 | digits = load_digits() 16 | 17 | ### 数据分割 18 | from sklearn.model_selection import train_test_split 19 | x_train,x_test,y_train,y_test = train_test_split(digits.data, 20 | digits.target, 21 | test_size = 0.3, 22 | random_state = 33) 23 | 24 | ### 载入模型 25 | from xgboost import XGBClassifier 26 | xgb1 = XGBClassifier( 27 | learning_rate =0.1, 28 | n_estimators=1000, 29 | max_depth=5, 30 | min_child_weight=1, 31 | gamma=0, 32 | subsample=0.8, 33 | colsample_bytree=0.8, 34 | objective= 'binary:logistic', 35 | nthread=4, 36 | scale_pos_weight=1, 37 | seed=27) 38 | 39 | ### 网格搜素 40 | from sklearn.model_selection import GridSearchCV 41 | param_grid = {'n_estimators':[100,110,120,130], 42 | 'max_depth':[2,3,4,5,6,]} 43 | gs = GridSearchCV(xgb1, 44 | param_grid, 45 | scoring='accuracy', 46 | cv=3, 47 | verbose=0, 48 | n_jobs=1) 49 | 50 | gs.fit(x_train,y_train) 51 | print(gs.best_params_,gs.best_score_) 52 | 53 | """ 54 | xgb1.fit(x_train,y_train) 55 | 56 | ### 模型预测 57 | y_pred = xgb1.predict(x_test) 58 | 59 | ### 性能评估 60 | from sklearn.metrics import accuracy_score 61 | from sklearn.metrics import accuracy_score 62 | acc = accuracy_score(y_test,y_pred) 63 | print(acc) 64 | """ 65 | -------------------------------------------------------------------------------- /Boosting--XGBoost/XGBoost调参.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ############################################################################### 4 | # 作者:wanglei5205 5 | # 邮箱:wanglei5205@126.com 6 | # 代码:http://github.com/wanglei5205 7 | # 博客:http://cnblogs.com/wanglei5205 8 | # 目的:学习xgboost的XGBClassifier函数 9 | # 官方API文档:http://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.training 10 | ############################################################################### 11 | """ 12 | ### load module 13 | import matplotlib.pyplot as plt 14 | from sklearn import datasets 15 | from sklearn.model_selection import train_test_split 16 | from sklearn.metrics import accuracy_score 17 | from xgboost import XGBClassifier 18 | from xgboost import plot_importance 19 | 20 | ### load datasets 21 | digits = datasets.load_digits() # mnist 手写数字识别(多分类任务) 22 | 23 | ### data analysis 24 | print(digits.data.shape) # 特征空间维度 25 | print(digits.target.shape) # 输出空间维度 26 | 27 | ### data split 28 | x_train,x_test,y_train,y_test = train_test_split(digits.data, 29 | digits.target, 30 | test_size = 0.3, 31 | random_state = 33) 32 | ### fit model for train data 33 | model = XGBClassifier(learning_rate=0.1, 34 | n_estimators=1000, # 树的个数--1000棵树建立xgboost 35 | max_depth=6, # 树的深度 36 | min_child_weight = 1, # 叶子节点最小权重 37 | gamma=0., # 惩罚项中叶子结点个数前的参数 38 | subsample=0.8, # 随机选择80%样本建立树 39 | colsample_btree=0.8, # 随机算哦80%样本选择特征 40 | objective='multi:softmax', # 指定损失函数 41 | scale_pos_weight=1, # 解决样本个数不平衡的问题 42 | random_state=27 # 随机数 43 | ) 44 | model.fit(x_train,y_train) 45 | """ 46 | eval_set = [(x_test,y_test)], 47 | eval_metric = "mlogloss", 48 | early_stopping_rounds = 10, 49 | verbose = True 50 | 51 | """ 52 | 53 | ### plot feature importance 54 | fig,ax = plt.subplots(figsize=(15,15)) 55 | plot_importance(model, 56 | height=0.5, 57 | ax=ax, 58 | max_num_features=64) 59 | plt.show() 60 | 61 | ### make prediction for test data 62 | y_pred = model.predict(x_test) 63 | 64 | ### model evaluate 65 | accuracy = accuracy_score(y_test,y_pred) 66 | print("accuarcy: %.2f%%" % (accuracy*100.0)) 67 | """ 68 | 95.74% 69 | """ -------------------------------------------------------------------------------- /Boosting--XGBoost/sklearn的GridSearchCV和StratifiedKFold对xgboost模型参数调优.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ############################################################################### 4 | # 作者:wanglei5205 5 | # 邮箱:wanglei5205@126.com 6 | # 代码:http://github.com/wanglei5205 7 | # 博客:http://cnblogs.com/wanglei5205 8 | # 目的:学习使用GridSearchCV和StratifiedKFold对xgboost调参 9 | # 官方API文档:http://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.training 10 | ############################################################################### 11 | """ 12 | ### load module 13 | import matplotlib.pyplot as plt 14 | from sklearn.datasets import load_digits 15 | from sklearn.model_selection import train_test_split 16 | from sklearn.model_selection import GridSearchCV 17 | from sklearn.model_selection import StratifiedKFold 18 | from sklearn.metrics import accuracy_score 19 | 20 | from xgboost import XGBClassifier 21 | from xgboost import plot_importance 22 | 23 | ### load datasets 24 | digits = load_digits() 25 | 26 | ### data analysis 27 | print(digits.data.shape) 28 | print(digits.target.shape) 29 | 30 | ### data split 31 | x_train,x_test,y_train,y_test = train_test_split(digits.data, 32 | digits.target, 33 | test_size = 0.3, 34 | random_state = 33) 35 | ### fit model for train data 36 | model = XGBClassifier(learning_rate=0.1, 37 | n_estimators=1000, # 树的个数--1000棵树建立xgboost 38 | max_depth=6, # 树的深度 39 | min_child_weight = 1, # 叶子节点最小权重 40 | gamma=0., # 惩罚项中叶子结点个数前的参数 41 | subsample=0.8, # 随机选择80%样本建立树 42 | colsample_btree=0.8, # 随机算哦80%样本选择特征 43 | objective='multi:softmax', # 指定损失函数 44 | scale_pos_weight=1, # 解决样本个数不平衡的问题 45 | random_state=27 # 随机数 46 | ) 47 | 48 | model.fit(x_train, 49 | y_train, 50 | eval_set = [(x_test,y_test)], # 评估数据集 51 | eval_metric = "mlogloss", # 评估标准 52 | early_stopping_rounds = 10, # 当loss有10次未变,提前结束评估 53 | verbose = False) # 显示提前结束 54 | 55 | 56 | # 参数字典 57 | param_grid = {'learning_rate':[0.05,0.1,0.25,0.3], 58 | 'max_depth':range(2,10), 59 | 'n_estimators':range(100,110,120)} 60 | 61 | kfold = StratifiedKFold(n_splits=10,shuffle=True,random_state=7) 62 | grid_search = GridSearchCV(model, # 模型 63 | param_grid, # 待调参数(字典) 64 | scoring="neg_log_loss", # 模型评估准则 65 | n_jobs=1, # -1表示使用全部的cpu运算 66 | cv=kfold) 67 | grid_result = grid_search.fit(digits.data,digits.target) 68 | 69 | print(grid_search.grid_scores_) 70 | print(grid_search.best_score_) 71 | print(grid_search.best_params_) 72 | 73 | ### summarize results 74 | print("best:%f using %s" % (grid_result.best_score_,grid_result.best_params)) 75 | means = grid_result.cv_results_['mean_test_score'] 76 | params = grid_result.cv_results_['params'] 77 | 78 | for mean, param in zip(means,params): 79 | print("%f with: %r" % (mean,param)) 80 | 81 | ### plot feature importance 82 | fig,ax = plt.subplots(figsize=(15,15)) 83 | plot_importance(model, 84 | height=0.5, 85 | ax=ax, 86 | max_num_features=64) 87 | plt.show() 88 | 89 | ### make prediction for test data 90 | y_pred = model.predict(x_test) 91 | 92 | ### model evaluate 93 | accuracy = accuracy_score(y_test,y_pred) 94 | print("accuarcy: %.2f%%" % (accuracy*100.0)) 95 | -------------------------------------------------------------------------------- /Boosting--XGBoost/xgboost基本用法.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ############################################################################### 4 | # 作者:wanglei5205 5 | # 邮箱:wanglei5205@126.com 6 | # 代码:http://github.com/wanglei5205 7 | # 博客:http://cnblogs.com/wanglei5205 8 | # 目的:xgboost基本用法 9 | ############################################################################### 10 | """ 11 | ### 载入数据 12 | from sklearn import datasets # 载入数据集 13 | digits = datasets.load_digits() # 载入mnist数据集 14 | print(digits.data.shape) # 打印输入空间维度 15 | print(digits.target.shape) # 打印输出空间维度 16 | 17 | ### 数据分割 18 | from sklearn.model_selection import train_test_split # 载入数据分割函数train_test_split 19 | x_train,x_test,y_train,y_test = train_test_split(digits.data, # 特征空间 20 | digits.target, # 输出空间 21 | test_size = 0.3, # 测试集占30% 22 | random_state = 33) # 为了复现实验,设置一个随机数 23 | 24 | ### 建立模型 25 | from xgboost import XGBClassifier 26 | model = XGBClassifier() # 载入模型 27 | model.fit(x_train,y_train) # 训练模型(训练集) 28 | y_pred = model.predict(x_test) # 模型预测(测试集) 29 | 30 | ### 性能度量 31 | from sklearn.metrics import accuracy_score # 准确率 32 | accuracy = accuracy_score(y_test,y_pred) 33 | print("accuarcy: %.2f%%" % (accuracy*100.0)) 34 | 35 | from sklearn import metrics 36 | # 混淆矩阵 37 | confusion = metrics.confusion_matrix(y_test,y_pred) 38 | print(confusion) 39 | print(confusion[1,1]) 40 | 41 | ### 特征重要性 42 | import matplotlib.pyplot as plt 43 | from xgboost import plot_importance 44 | fig,ax = plt.subplots(figsize=(10,15)) 45 | plot_importance(model,height=0.5,max_num_features=64,ax=ax) 46 | plt.show() 47 | -------------------------------------------------------------------------------- /Boosting--XGBoost/xgboost模块的XGBClassifier函数.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ############################################################################### 4 | # 作者:wanglei5205 5 | # 邮箱:wanglei5205@126.com 6 | # 代码:http://github.com/wanglei5205 7 | # 博客:http://cnblogs.com/wanglei5205 8 | # 目的:学习xgboost的XGBClassifier函数 9 | # 官方API文档:http://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.training 10 | ############################################################################### 11 | """ 12 | ### load module 13 | import matplotlib.pyplot as plt 14 | from sklearn import datasets 15 | from sklearn.model_selection import train_test_split 16 | from sklearn.metrics import accuracy_score 17 | from xgboost import XGBClassifier 18 | from xgboost import plot_importance 19 | 20 | ### load datasets 21 | digits = datasets.load_digits() 22 | 23 | ### data analysis 24 | print(digits.data.shape) 25 | print(digits.target.shape) 26 | 27 | print(digits.target.value_counts()) 28 | ### data split 29 | x_train,x_test,y_train,y_test = train_test_split(digits.data, 30 | digits.target, 31 | test_size = 0.3, 32 | random_state = 33) 33 | 34 | ### fit model for train data 35 | model = XGBClassifier(learning_rate=0.1, 36 | n_estimators=1000, # 树的个数--1000棵树建立xgboost 37 | max_depth=6, # 树的深度 38 | min_child_weight = 1, # 叶子节点最小权重 39 | gamma=0., # 惩罚项中叶子结点个数前的参数 40 | subsample=0.8, # 随机选择80%样本建立决策树 41 | colsample_btree=0.8, # 随机选择80%特征建立决策树 42 | objective='multi:softmax', # 损失函数 43 | scale_pos_weight=1, # 解决样本个数不平衡的问题(二分类) 44 | random_state=27 # 随机数 45 | ) 46 | model.fit(x_train,y_train) 47 | 48 | ### plot feature importance 49 | fig,ax = plt.subplots(figsize=(15,15)) 50 | plot_importance(model, 51 | height=0.5, 52 | ax=ax, 53 | max_num_features=64) 54 | plt.show() 55 | 56 | ### make prediction for test data 57 | y_pred = model.predict(x_test) 58 | 59 | ### model evaluate 60 | accuracy = accuracy_score(y_test,y_pred) 61 | print("accuarcy: %.2f%%" % (accuracy*100.0)) 62 | """ 63 | 95.74% 64 | """ -------------------------------------------------------------------------------- /Boosting--XGBoost/xgboost模块的fit函数.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ############################################################################### 4 | # 作者:wanglei5205 5 | # 邮箱:wanglei5205@126.com 6 | # 代码:http://github.com/wanglei5205 7 | # 博客:http://cnblogs.com/wanglei5205 8 | # 目的:学习xgboost的XGBClassifier函数 9 | # 官方API文档:http://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.training 10 | ############################################################################### 11 | """ 12 | ### load module 13 | from sklearn import datasets 14 | from sklearn.model_selection import train_test_split 15 | from xgboost import XGBClassifier 16 | 17 | ### load datasets 18 | digits = datasets.load_digits() 19 | 20 | ### data analysis 21 | print(digits.data.shape) 22 | print(digits.target.shape) 23 | 24 | ### data split 25 | x_train,x_test,y_train,y_test = train_test_split(digits.data, 26 | digits.target, 27 | test_size = 0.3, 28 | random_state = 33) 29 | 30 | ### fit model for train data 31 | # fit函数参数:eval_set=[(x_test,y_test)] 评估数据集,list类型 32 | # fit函数参数:eval_metric="mlogloss" 评估标准(多分类问题,使用mlogloss作为损失函数) 33 | # fit函数参数:early_stopping_rounds= 10 如果模型的loss十次内没有减小,则提前结束模型训练 34 | # fit函数参数:verbose = True True显示,False不显示 35 | model = XGBClassifier() 36 | model.fit(x_train, 37 | y_train, 38 | eval_set = [(x_test,y_test)], # 评估数据集 39 | 40 | eval_metric = "mlogloss", 41 | early_stopping_rounds = 10, 42 | verbose = True) 43 | 44 | ### make prediction for test data 45 | y_pred = model.predict(x_test) 46 | 47 | ### model evaluate 48 | from sklearn.metrics import accuracy_score 49 | accuracy = accuracy_score(y_test,y_pred) 50 | print("accuarcy: %.2f%%" % (accuracy*100.0)) 51 | """ 52 | 95.0% 53 | """ -------------------------------------------------------------------------------- /Boosting--XGBoost/xgboost模块的plot_importance函数.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ############################################################################### 4 | # 作者:wanglei5205 5 | # 邮箱:wanglei5205@126.com 6 | # 代码:http://github.com/wanglei5205 7 | # 博客:http://cnblogs.com/wanglei5205 8 | # 目的:学习xgboost的plot_importance函数 9 | ############################################################################### 10 | """ 11 | ### load module 12 | import matplotlib.pyplot as plt 13 | from sklearn.datasets import load_digits # 载入数据 14 | from sklearn.model_selection import train_test_split # 数据分割 15 | from xgboost import XGBClassifier # 载入模型 16 | from xgboost import plot_importance # 特征权重 17 | from sklearn.metrics import accuracy_score # 模型评估 18 | 19 | ### load datasets 20 | digits = load_digits() # 载入mnist数据集 21 | 22 | ### data analysis 23 | print(digits.data.shape) # 打印输入空间维度 24 | print(digits.target.shape) # 打印输出空间维度 25 | 26 | ### data split 27 | x_train,x_test,y_train,y_test = train_test_split(digits.data, 28 | digits.target, 29 | test_size = 0.3, # 测试集占30% 30 | random_state = 33)# 随机种子 31 | ### fit model for train set 32 | model = XGBClassifier() 33 | model.fit(x_train,y_train) 34 | 35 | ### make prediction for test data 36 | y_pred = model.predict(x_test) 37 | 38 | ### model evaluate 39 | accuracy = accuracy_score(y_test,y_pred) 40 | print("accuarcy: %.2f%%" % (accuracy*100.0)) 41 | 42 | ### plot feature importance 43 | fig,ax = plt.subplots(figsize=(15,15)) 44 | plot_importance(model, 45 | height=0.5, 46 | ax=ax, 47 | max_num_features=64) 48 | plt.show() 49 | 50 | """ 51 | 95.0% 52 | """ -------------------------------------------------------------------------------- /Boosting--XGBoost/xgboost调参-2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | # 时间:2018.3.25 4 | # 作者:wanglei5205 5 | # 邮箱:wanglei5205@126.com 6 | # 博客:http://cnblogs.com/wanglei5205 7 | # github:http://github.com/wanglei5205 8 | """ 9 | ### 导入模块 10 | import pandas as pd # 数据分析库 11 | import numpy as np # 科学计算库 12 | import matplotlib.pylab as plt # 数据可视化库 13 | import xgboost as xgb # xgboost模型 14 | from xgboost.sklearn import XGBClassifier # xgboost模型(sklearn) 15 | from sklearn import cross_validation # 交叉验证 16 | from sklearn import metrics # 性能度量 17 | from sklearn.model_selection import GridSearchCV # 网格搜索 18 | 19 | from matplotlib.pylab import rcParams 20 | rcParams['figure.figsize'] = 12, 4 21 | 22 | ### 载入数据 23 | train = pd.read_csv("G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data1.csv") # 有标签的训练集 24 | test = pd.read_csv("G:/ML/ML_match/IJCAI/data3.22/3.22ICJAI/data/7_train_data5.csv") # 无标签的测试集 25 | #test_results = pd.read_csv('test_results.csv') # 测试集标签 26 | 27 | # 统一标签 28 | target = 'is_trade' 29 | IDcol = 'instance_id' 30 | predictors = [x for x in train.columns if x not in [target, IDcol]] # 特征列表 31 | 32 | ### 训练模型(交叉验证) 33 | """ 34 | # alg 算法 35 | # dtrain 训练集 36 | # dtest 测试集 37 | # predictors 38 | # useTrainCV 39 | # cv_flods 40 | # early_stopping_rounds 41 | 42 | # XGBClassifier是封装在sklearn中的xgboost模型 43 | """ 44 | def modelfit(alg, dtrain, dtest, predictors,useTrainCV=True, cv_folds=5, early_stopping_rounds=50): 45 | 46 | # xgboost.cv()交叉验证 47 | if useTrainCV: 48 | # 获取XGBClassifier的参数 49 | xgb_param = alg.get_xgb_params() 50 | 51 | # 训练集和测试集转换为xgboost 52 | xgtrain = xgb.DMatrix(dtrain[predictors].values, label=dtrain[target].values) 53 | # xgtest = xgb.DMatrix(dtest[predictors].values) 54 | 55 | # xgboost.cv()交叉验证 56 | cvresult = xgb.cv(xgb_param, 57 | xgtrain, 58 | num_boost_round=alg.get_params()['n_estimators'], 59 | nfold=cv_folds, 60 | metrics='logloss', 61 | early_stopping_rounds=early_stopping_rounds, 62 | show_stdv=False) 63 | 64 | # 设置XGBClassifier参数 65 | alg.set_params(n_estimators=cvresult.shape[0]) 66 | 67 | # 训练XGBClassifier 68 | alg.fit(dtrain[predictors],dtrain[target],eval_metric='logloss') 69 | 70 | # 模型预测 71 | dtrain_predictions = alg.predict(dtrain[predictors]) # 返回类别 72 | dtrain_predprob = alg.predict_proba(dtrain[predictors])[:,1] # 返回概率 73 | 74 | # 性能评估 75 | print("\nModel Report") 76 | print("logloss : %.6g" % metrics.log_loss(dtrain[target].values, dtrain_predictions)) # 负对数似然损失 77 | print("AUC Score (Train): %f" % metrics.roc_auc_score(dtrain[target], dtrain_predprob)) # AUC值 78 | 79 | # 知道测试集target时使用 80 | #dtest['predprob'] = alg.predict_proba(dtest[predictors])[:,1] 81 | #results = test_results.merge(dtest[[IDcol,'predprob']], on='ID') 82 | #print('AUC Score (Test): %f' % metrics.roc_auc_score(results[target], results['predprob'])) 83 | 84 | # 85 | #feat_imp = pd.Series(alg.booster().get_fscore()).sort_values(ascending=False) 86 | #feat_imp.plot(kind='bar', title='Feature Importances') 87 | #plt.ylabel('Feature Importance Score') 88 | 89 | 90 | ### 建立模型 91 | print('### 建立XGBClassifier') 92 | xgbc = XGBClassifier(learning_rate =0.1, # 学习率 93 | silent = 1, # 输出中间过程 94 | n_estimators=150, # 决策树个数 95 | max_depth=5, # 决策树深度 96 | min_child_weight=1, # 最小叶子节点权重和? 97 | gamma=0, # 惩罚项系数 98 | subsample=0.8, # 训练一棵树所用的数据占全部数据集比例 99 | colsample_bytree=0.8, # 训练一颗树所用的特征占全部数据集比例 100 | objective= 'binary:logistic', # 损失函数 101 | nthread=4, # 线程数 102 | scale_pos_weight=1, # 样本不平衡 103 | eval_metric='logloss', # 评估指标 104 | reg_alpha=0.03, # 正则化系数 105 | seed=27) # 随机种子 106 | 107 | ### 网格搜索 108 | ## step1:决策树个数 n_estimators 109 | print("### 调参:决策树个数") 110 | #modelfit(xgbc, train, test, predictors) 111 | 112 | ## step2:决策树参数 max_depth/min_child_weight/gamma/subsample/colsample_bytree 113 | print("### 调参:决策树参数") 114 | param_test1 = {'max_depth':list(range(3,10,2)),'min_child_weight':list(range(1,6,2))} 115 | param_test2 = {'max_depth':[4,5,6],'min_child_weight':[4,5,6]} 116 | param_test2b ={'min_child_weight':[6,8,10,12]} 117 | param_test3 = {'gamma':[i/10.0 for i in range(0,5)]} 118 | param_test4 = {'subsample':[i/10.0 for i in range(6,10)],'colsample_bytree':[i/10.0 for i in range(6,10)]} 119 | param_test5 = {'subsample':[i/100.0 for i in range(75,90,5)],'colsample_bytree':[i/100.0 for i in range(75,90,5)]} 120 | 121 | ## step3:正则化参数 reg_alpha 122 | print("### 调参:正则化参数") 123 | param_test6 = {'reg_alpha':[1e-5, 1e-2, 0.1, 1, 100]} 124 | param_test7 = {'reg_alpha':[0, 0.001, 0.005, 0.01, 0.05]} 125 | 126 | gsearch = GridSearchCV(estimator = xgbc, # 待调参模型 127 | param_grid = param_test1, # 参数字典 128 | scoring='neg_log_loss', # 评价函数 129 | n_jobs=1, # 并行CPU个数 130 | iid=False, # 独立同分布 131 | verbose=2, # 显示中间过程 132 | cv=5) # 5折交叉验证 133 | 134 | gsearch.fit(train[predictors],train[target]) # 训练GridSearchCV 135 | 136 | print(gsearch.best_params_) # GridSearchCV最佳参数 137 | print(gsearch.best_score_) # GridSearchCV最佳分数 138 | 139 | xgbc = gsearch.best_estimator_ # GridSEarchCV最佳分类器 140 | 141 | ### 训练XGBClassifier模型 142 | print("### 训练XGBClassifier") 143 | modelfit(xgbc, train, test, predictors) -------------------------------------------------------------------------------- /GridSearchCV_example/GridSearchCV_example.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | # 数据:20类新闻文本 4 | # 模型:svc 5 | # 调参:gridsearch 6 | """ 7 | ### 加载模块 8 | import numpy as np 9 | import pandas as pd 10 | 11 | ### 载入数据 12 | from sklearn.datasets import fetch_20newsgroups # 20类新闻数据 13 | news = fetch_20newsgroups(subset='all') # 生成20类新闻数据 14 | 15 | ### 数据分割 16 | from sklearn.cross_validation import train_test_split 17 | X_train, X_test, y_train, y_test = train_test_split(news.data[:300], 18 | news.target[:300], 19 | test_size=0.25, # 测试集占比25% 20 | random_state=33) # 随机数 21 | ### pipe-line 22 | from sklearn.feature_extraction.text import TfidfVectorizer # 特征提取 23 | from sklearn.svm import SVC # 载入模型 24 | from sklearn.pipeline import Pipeline # pipe_line模式 25 | clf = Pipeline([('vect', TfidfVectorizer(stop_words='english', analyzer='word')), 26 | ('svc', SVC())]) 27 | 28 | ### 网格搜索 29 | from sklearn.model_selection import GridSearchCV 30 | parameters = {'svc__gamma': np.logspace(-1, 1)} # 参数范围(字典类型) 31 | 32 | gs = GridSearchCV(clf, # 模型 33 | parameters, # 参数字典 34 | n_jobs=1, # 使用1个cpu 35 | verbose=0, # 不打印中间过程 36 | cv=5) # 5折交叉验证 37 | 38 | gs.fit(X_train, y_train) # 在训练集上进行网格搜索 39 | 40 | ### 最佳参数在测试集上模型分数 41 | print("best:%f using %s" % (gs.best_score_,gs.best_params_)) 42 | 43 | ### 测试集下的分数 44 | print("test datasets score" % gs.score(X_test, y_test)) 45 | 46 | ### 模型不同参数下的分数 47 | # 方式一(0.20版本将删除) 48 | print(gs.grid_scores_) 49 | 50 | # 方式二(0.20推荐的方式) 51 | means = gs.cv_results_['mean_test_score'] 52 | params = gs.cv_results_['params'] 53 | 54 | for mean, param in zip(means,params): 55 | print("%f with: %r" % (mean,param)) -------------------------------------------------------------------------------- /KNN/1.KNN实现/KNN.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np # 科学计算库 3 | import collections # 集合模块 4 | 5 | """ 6 | # 实现KNN算法,并使用算法分类 7 | """ 8 | 9 | """ 10 | ### kNN分类器 11 | # 参数: 12 | dataSet 特征(训练集) 13 | labels 标签(训练集) 14 | k - kNN算法参数,选择距离最小的k个点 15 | inX - 特征(测试集) 16 | 17 | # 返回值: 18 | sortedClassCount[0][0] - 分类结果(标签) 19 | """ 20 | def classify0(dataset,labels,k,inx): 21 | 22 | # 计算欧氏距离 23 | dist = np.sum((inx - dataset)**2, axis=1)**0.5 24 | 25 | # 距离递增排序--k个最近的标签 26 | k_labels = [labels[index] for index in dist.argsort()[0:k]] 27 | 28 | # 统计标签个数--出现次数最多的标签即为最终类别 29 | label = collections.Counter(k_labels).most_common(1)[0][0] 30 | 31 | return label 32 | 33 | """ 34 | ### 测试程序 35 | # 创建训练集和测试集,使用KNN分类,打印分类结果 36 | """ 37 | 38 | if __name__ == '__main__': 39 | 40 | # 创建数据集 41 | # 训练集 42 | group = np.array([[1,101],[5,89],[108,5],[115,8]]) 43 | #四组特征的标签 44 | labels = ['爱情片','爱情片','动作片','动作片'] 45 | 46 | print("# 特征\n",group) 47 | print("# 标签\n",labels) 48 | 49 | # 测试集 50 | test = [101,20] 51 | 52 | # kNN分类 53 | test_class = classify0(group,labels,3,test) 54 | 55 | # 打印分类结果 56 | print(test_class) -------------------------------------------------------------------------------- /KNN/2.KNN应用/datingTestSet.txt: -------------------------------------------------------------------------------- 1 | 40920 8.326976 0.953952 largeDoses 2 | 14488 7.153469 1.673904 smallDoses 3 | 26052 1.441871 0.805124 didntLike 4 | 75136 13.147394 0.428964 didntLike 5 | 38344 1.669788 0.134296 didntLike 6 | 72993 10.141740 1.032955 didntLike 7 | 35948 6.830792 1.213192 largeDoses 8 | 42666 13.276369 0.543880 largeDoses 9 | 67497 8.631577 0.749278 didntLike 10 | 35483 12.273169 1.508053 largeDoses 11 | 50242 3.723498 0.831917 didntLike 12 | 63275 8.385879 1.669485 didntLike 13 | 5569 4.875435 0.728658 smallDoses 14 | 51052 4.680098 0.625224 didntLike 15 | 77372 15.299570 0.331351 didntLike 16 | 43673 1.889461 0.191283 didntLike 17 | 61364 7.516754 1.269164 didntLike 18 | 69673 14.239195 0.261333 didntLike 19 | 15669 0.000000 1.250185 smallDoses 20 | 28488 10.528555 1.304844 largeDoses 21 | 6487 3.540265 0.822483 smallDoses 22 | 37708 2.991551 0.833920 didntLike 23 | 22620 5.297865 0.638306 smallDoses 24 | 28782 6.593803 0.187108 largeDoses 25 | 19739 2.816760 1.686209 smallDoses 26 | 36788 12.458258 0.649617 largeDoses 27 | 5741 0.000000 1.656418 smallDoses 28 | 28567 9.968648 0.731232 largeDoses 29 | 6808 1.364838 0.640103 smallDoses 30 | 41611 0.230453 1.151996 didntLike 31 | 36661 11.865402 0.882810 largeDoses 32 | 43605 0.120460 1.352013 didntLike 33 | 15360 8.545204 1.340429 largeDoses 34 | 63796 5.856649 0.160006 didntLike 35 | 10743 9.665618 0.778626 smallDoses 36 | 70808 9.778763 1.084103 didntLike 37 | 72011 4.932976 0.632026 didntLike 38 | 5914 2.216246 0.587095 smallDoses 39 | 14851 14.305636 0.632317 largeDoses 40 | 33553 12.591889 0.686581 largeDoses 41 | 44952 3.424649 1.004504 didntLike 42 | 17934 0.000000 0.147573 smallDoses 43 | 27738 8.533823 0.205324 largeDoses 44 | 29290 9.829528 0.238620 largeDoses 45 | 42330 11.492186 0.263499 largeDoses 46 | 36429 3.570968 0.832254 didntLike 47 | 39623 1.771228 0.207612 didntLike 48 | 32404 3.513921 0.991854 didntLike 49 | 27268 4.398172 0.975024 didntLike 50 | 5477 4.276823 1.174874 smallDoses 51 | 14254 5.946014 1.614244 smallDoses 52 | 68613 13.798970 0.724375 didntLike 53 | 41539 10.393591 1.663724 largeDoses 54 | 7917 3.007577 0.297302 smallDoses 55 | 21331 1.031938 0.486174 smallDoses 56 | 8338 4.751212 0.064693 smallDoses 57 | 5176 3.692269 1.655113 smallDoses 58 | 18983 10.448091 0.267652 largeDoses 59 | 68837 10.585786 0.329557 didntLike 60 | 13438 1.604501 0.069064 smallDoses 61 | 48849 3.679497 0.961466 didntLike 62 | 12285 3.795146 0.696694 smallDoses 63 | 7826 2.531885 1.659173 smallDoses 64 | 5565 9.733340 0.977746 smallDoses 65 | 10346 6.093067 1.413798 smallDoses 66 | 1823 7.712960 1.054927 smallDoses 67 | 9744 11.470364 0.760461 largeDoses 68 | 16857 2.886529 0.934416 smallDoses 69 | 39336 10.054373 1.138351 largeDoses 70 | 65230 9.972470 0.881876 didntLike 71 | 2463 2.335785 1.366145 smallDoses 72 | 27353 11.375155 1.528626 largeDoses 73 | 16191 0.000000 0.605619 smallDoses 74 | 12258 4.126787 0.357501 smallDoses 75 | 42377 6.319522 1.058602 didntLike 76 | 25607 8.680527 0.086955 largeDoses 77 | 77450 14.856391 1.129823 didntLike 78 | 58732 2.454285 0.222380 didntLike 79 | 46426 7.292202 0.548607 largeDoses 80 | 32688 8.745137 0.857348 largeDoses 81 | 64890 8.579001 0.683048 didntLike 82 | 8554 2.507302 0.869177 smallDoses 83 | 28861 11.415476 1.505466 largeDoses 84 | 42050 4.838540 1.680892 didntLike 85 | 32193 10.339507 0.583646 largeDoses 86 | 64895 6.573742 1.151433 didntLike 87 | 2355 6.539397 0.462065 smallDoses 88 | 0 2.209159 0.723567 smallDoses 89 | 70406 11.196378 0.836326 didntLike 90 | 57399 4.229595 0.128253 didntLike 91 | 41732 9.505944 0.005273 largeDoses 92 | 11429 8.652725 1.348934 largeDoses 93 | 75270 17.101108 0.490712 didntLike 94 | 5459 7.871839 0.717662 smallDoses 95 | 73520 8.262131 1.361646 didntLike 96 | 40279 9.015635 1.658555 largeDoses 97 | 21540 9.215351 0.806762 largeDoses 98 | 17694 6.375007 0.033678 smallDoses 99 | 22329 2.262014 1.022169 didntLike 100 | 46570 5.677110 0.709469 didntLike 101 | 42403 11.293017 0.207976 largeDoses 102 | 33654 6.590043 1.353117 didntLike 103 | 9171 4.711960 0.194167 smallDoses 104 | 28122 8.768099 1.108041 largeDoses 105 | 34095 11.502519 0.545097 largeDoses 106 | 1774 4.682812 0.578112 smallDoses 107 | 40131 12.446578 0.300754 largeDoses 108 | 13994 12.908384 1.657722 largeDoses 109 | 77064 12.601108 0.974527 didntLike 110 | 11210 3.929456 0.025466 smallDoses 111 | 6122 9.751503 1.182050 largeDoses 112 | 15341 3.043767 0.888168 smallDoses 113 | 44373 4.391522 0.807100 didntLike 114 | 28454 11.695276 0.679015 largeDoses 115 | 63771 7.879742 0.154263 didntLike 116 | 9217 5.613163 0.933632 smallDoses 117 | 69076 9.140172 0.851300 didntLike 118 | 24489 4.258644 0.206892 didntLike 119 | 16871 6.799831 1.221171 smallDoses 120 | 39776 8.752758 0.484418 largeDoses 121 | 5901 1.123033 1.180352 smallDoses 122 | 40987 10.833248 1.585426 largeDoses 123 | 7479 3.051618 0.026781 smallDoses 124 | 38768 5.308409 0.030683 largeDoses 125 | 4933 1.841792 0.028099 smallDoses 126 | 32311 2.261978 1.605603 didntLike 127 | 26501 11.573696 1.061347 largeDoses 128 | 37433 8.038764 1.083910 largeDoses 129 | 23503 10.734007 0.103715 largeDoses 130 | 68607 9.661909 0.350772 didntLike 131 | 27742 9.005850 0.548737 largeDoses 132 | 11303 0.000000 0.539131 smallDoses 133 | 0 5.757140 1.062373 smallDoses 134 | 32729 9.164656 1.624565 largeDoses 135 | 24619 1.318340 1.436243 didntLike 136 | 42414 14.075597 0.695934 largeDoses 137 | 20210 10.107550 1.308398 largeDoses 138 | 33225 7.960293 1.219760 largeDoses 139 | 54483 6.317292 0.018209 didntLike 140 | 18475 12.664194 0.595653 largeDoses 141 | 33926 2.906644 0.581657 didntLike 142 | 43865 2.388241 0.913938 didntLike 143 | 26547 6.024471 0.486215 largeDoses 144 | 44404 7.226764 1.255329 largeDoses 145 | 16674 4.183997 1.275290 smallDoses 146 | 8123 11.850211 1.096981 largeDoses 147 | 42747 11.661797 1.167935 largeDoses 148 | 56054 3.574967 0.494666 didntLike 149 | 10933 0.000000 0.107475 smallDoses 150 | 18121 7.937657 0.904799 largeDoses 151 | 11272 3.365027 1.014085 smallDoses 152 | 16297 0.000000 0.367491 smallDoses 153 | 28168 13.860672 1.293270 largeDoses 154 | 40963 10.306714 1.211594 largeDoses 155 | 31685 7.228002 0.670670 largeDoses 156 | 55164 4.508740 1.036192 didntLike 157 | 17595 0.366328 0.163652 smallDoses 158 | 1862 3.299444 0.575152 smallDoses 159 | 57087 0.573287 0.607915 didntLike 160 | 63082 9.183738 0.012280 didntLike 161 | 51213 7.842646 1.060636 largeDoses 162 | 6487 4.750964 0.558240 smallDoses 163 | 4805 11.438702 1.556334 largeDoses 164 | 30302 8.243063 1.122768 largeDoses 165 | 68680 7.949017 0.271865 didntLike 166 | 17591 7.875477 0.227085 smallDoses 167 | 74391 9.569087 0.364856 didntLike 168 | 37217 7.750103 0.869094 largeDoses 169 | 42814 0.000000 1.515293 didntLike 170 | 14738 3.396030 0.633977 smallDoses 171 | 19896 11.916091 0.025294 largeDoses 172 | 14673 0.460758 0.689586 smallDoses 173 | 32011 13.087566 0.476002 largeDoses 174 | 58736 4.589016 1.672600 didntLike 175 | 54744 8.397217 1.534103 didntLike 176 | 29482 5.562772 1.689388 didntLike 177 | 27698 10.905159 0.619091 largeDoses 178 | 11443 1.311441 1.169887 smallDoses 179 | 56117 10.647170 0.980141 largeDoses 180 | 39514 0.000000 0.481918 didntLike 181 | 26627 8.503025 0.830861 largeDoses 182 | 16525 0.436880 1.395314 smallDoses 183 | 24368 6.127867 1.102179 didntLike 184 | 22160 12.112492 0.359680 largeDoses 185 | 6030 1.264968 1.141582 smallDoses 186 | 6468 6.067568 1.327047 smallDoses 187 | 22945 8.010964 1.681648 largeDoses 188 | 18520 3.791084 0.304072 smallDoses 189 | 34914 11.773195 1.262621 largeDoses 190 | 6121 8.339588 1.443357 smallDoses 191 | 38063 2.563092 1.464013 didntLike 192 | 23410 5.954216 0.953782 didntLike 193 | 35073 9.288374 0.767318 largeDoses 194 | 52914 3.976796 1.043109 didntLike 195 | 16801 8.585227 1.455708 largeDoses 196 | 9533 1.271946 0.796506 smallDoses 197 | 16721 0.000000 0.242778 smallDoses 198 | 5832 0.000000 0.089749 smallDoses 199 | 44591 11.521298 0.300860 largeDoses 200 | 10143 1.139447 0.415373 smallDoses 201 | 21609 5.699090 1.391892 smallDoses 202 | 23817 2.449378 1.322560 didntLike 203 | 15640 0.000000 1.228380 smallDoses 204 | 8847 3.168365 0.053993 smallDoses 205 | 50939 10.428610 1.126257 largeDoses 206 | 28521 2.943070 1.446816 didntLike 207 | 32901 10.441348 0.975283 largeDoses 208 | 42850 12.478764 1.628726 largeDoses 209 | 13499 5.856902 0.363883 smallDoses 210 | 40345 2.476420 0.096075 didntLike 211 | 43547 1.826637 0.811457 didntLike 212 | 70758 4.324451 0.328235 didntLike 213 | 19780 1.376085 1.178359 smallDoses 214 | 44484 5.342462 0.394527 didntLike 215 | 54462 11.835521 0.693301 largeDoses 216 | 20085 12.423687 1.424264 largeDoses 217 | 42291 12.161273 0.071131 largeDoses 218 | 47550 8.148360 1.649194 largeDoses 219 | 11938 1.531067 1.549756 smallDoses 220 | 40699 3.200912 0.309679 didntLike 221 | 70908 8.862691 0.530506 didntLike 222 | 73989 6.370551 0.369350 didntLike 223 | 11872 2.468841 0.145060 smallDoses 224 | 48463 11.054212 0.141508 largeDoses 225 | 15987 2.037080 0.715243 smallDoses 226 | 70036 13.364030 0.549972 didntLike 227 | 32967 10.249135 0.192735 largeDoses 228 | 63249 10.464252 1.669767 didntLike 229 | 42795 9.424574 0.013725 largeDoses 230 | 14459 4.458902 0.268444 smallDoses 231 | 19973 0.000000 0.575976 smallDoses 232 | 5494 9.686082 1.029808 largeDoses 233 | 67902 13.649402 1.052618 didntLike 234 | 25621 13.181148 0.273014 largeDoses 235 | 27545 3.877472 0.401600 didntLike 236 | 58656 1.413952 0.451380 didntLike 237 | 7327 4.248986 1.430249 smallDoses 238 | 64555 8.779183 0.845947 didntLike 239 | 8998 4.156252 0.097109 smallDoses 240 | 11752 5.580018 0.158401 smallDoses 241 | 76319 15.040440 1.366898 didntLike 242 | 27665 12.793870 1.307323 largeDoses 243 | 67417 3.254877 0.669546 didntLike 244 | 21808 10.725607 0.588588 largeDoses 245 | 15326 8.256473 0.765891 smallDoses 246 | 20057 8.033892 1.618562 largeDoses 247 | 79341 10.702532 0.204792 didntLike 248 | 15636 5.062996 1.132555 smallDoses 249 | 35602 10.772286 0.668721 largeDoses 250 | 28544 1.892354 0.837028 didntLike 251 | 57663 1.019966 0.372320 didntLike 252 | 78727 15.546043 0.729742 didntLike 253 | 68255 11.638205 0.409125 didntLike 254 | 14964 3.427886 0.975616 smallDoses 255 | 21835 11.246174 1.475586 largeDoses 256 | 7487 0.000000 0.645045 smallDoses 257 | 8700 0.000000 1.424017 smallDoses 258 | 26226 8.242553 0.279069 largeDoses 259 | 65899 8.700060 0.101807 didntLike 260 | 6543 0.812344 0.260334 smallDoses 261 | 46556 2.448235 1.176829 didntLike 262 | 71038 13.230078 0.616147 didntLike 263 | 47657 0.236133 0.340840 didntLike 264 | 19600 11.155826 0.335131 largeDoses 265 | 37422 11.029636 0.505769 largeDoses 266 | 1363 2.901181 1.646633 smallDoses 267 | 26535 3.924594 1.143120 didntLike 268 | 47707 2.524806 1.292848 didntLike 269 | 38055 3.527474 1.449158 didntLike 270 | 6286 3.384281 0.889268 smallDoses 271 | 10747 0.000000 1.107592 smallDoses 272 | 44883 11.898890 0.406441 largeDoses 273 | 56823 3.529892 1.375844 didntLike 274 | 68086 11.442677 0.696919 didntLike 275 | 70242 10.308145 0.422722 didntLike 276 | 11409 8.540529 0.727373 smallDoses 277 | 67671 7.156949 1.691682 didntLike 278 | 61238 0.720675 0.847574 didntLike 279 | 17774 0.229405 1.038603 smallDoses 280 | 53376 3.399331 0.077501 didntLike 281 | 30930 6.157239 0.580133 didntLike 282 | 28987 1.239698 0.719989 didntLike 283 | 13655 6.036854 0.016548 smallDoses 284 | 7227 5.258665 0.933722 smallDoses 285 | 40409 12.393001 1.571281 largeDoses 286 | 13605 9.627613 0.935842 smallDoses 287 | 26400 11.130453 0.597610 largeDoses 288 | 13491 8.842595 0.349768 largeDoses 289 | 30232 10.690010 1.456595 largeDoses 290 | 43253 5.714718 1.674780 largeDoses 291 | 55536 3.052505 1.335804 didntLike 292 | 8807 0.000000 0.059025 smallDoses 293 | 25783 9.945307 1.287952 largeDoses 294 | 22812 2.719723 1.142148 didntLike 295 | 77826 11.154055 1.608486 didntLike 296 | 38172 2.687918 0.660836 didntLike 297 | 31676 10.037847 0.962245 largeDoses 298 | 74038 12.404762 1.112080 didntLike 299 | 44738 10.237305 0.633422 largeDoses 300 | 17410 4.745392 0.662520 smallDoses 301 | 5688 4.639461 1.569431 smallDoses 302 | 36642 3.149310 0.639669 didntLike 303 | 29956 13.406875 1.639194 largeDoses 304 | 60350 6.068668 0.881241 didntLike 305 | 23758 9.477022 0.899002 largeDoses 306 | 25780 3.897620 0.560201 smallDoses 307 | 11342 5.463615 1.203677 smallDoses 308 | 36109 3.369267 1.575043 didntLike 309 | 14292 5.234562 0.825954 smallDoses 310 | 11160 0.000000 0.722170 smallDoses 311 | 23762 12.979069 0.504068 largeDoses 312 | 39567 5.376564 0.557476 didntLike 313 | 25647 13.527910 1.586732 largeDoses 314 | 14814 2.196889 0.784587 smallDoses 315 | 73590 10.691748 0.007509 didntLike 316 | 35187 1.659242 0.447066 didntLike 317 | 49459 8.369667 0.656697 largeDoses 318 | 31657 13.157197 0.143248 largeDoses 319 | 6259 8.199667 0.908508 smallDoses 320 | 33101 4.441669 0.439381 largeDoses 321 | 27107 9.846492 0.644523 largeDoses 322 | 17824 0.019540 0.977949 smallDoses 323 | 43536 8.253774 0.748700 largeDoses 324 | 67705 6.038620 1.509646 didntLike 325 | 35283 6.091587 1.694641 largeDoses 326 | 71308 8.986820 1.225165 didntLike 327 | 31054 11.508473 1.624296 largeDoses 328 | 52387 8.807734 0.713922 largeDoses 329 | 40328 0.000000 0.816676 didntLike 330 | 34844 8.889202 1.665414 largeDoses 331 | 11607 3.178117 0.542752 smallDoses 332 | 64306 7.013795 0.139909 didntLike 333 | 32721 9.605014 0.065254 largeDoses 334 | 33170 1.230540 1.331674 didntLike 335 | 37192 10.412811 0.890803 largeDoses 336 | 13089 0.000000 0.567161 smallDoses 337 | 66491 9.699991 0.122011 didntLike 338 | 15941 0.000000 0.061191 smallDoses 339 | 4272 4.455293 0.272135 smallDoses 340 | 48812 3.020977 1.502803 didntLike 341 | 28818 8.099278 0.216317 largeDoses 342 | 35394 1.157764 1.603217 didntLike 343 | 71791 10.105396 0.121067 didntLike 344 | 40668 11.230148 0.408603 largeDoses 345 | 39580 9.070058 0.011379 largeDoses 346 | 11786 0.566460 0.478837 smallDoses 347 | 19251 0.000000 0.487300 smallDoses 348 | 56594 8.956369 1.193484 largeDoses 349 | 54495 1.523057 0.620528 didntLike 350 | 11844 2.749006 0.169855 smallDoses 351 | 45465 9.235393 0.188350 largeDoses 352 | 31033 10.555573 0.403927 largeDoses 353 | 16633 6.956372 1.519308 smallDoses 354 | 13887 0.636281 1.273984 smallDoses 355 | 52603 3.574737 0.075163 didntLike 356 | 72000 9.032486 1.461809 didntLike 357 | 68497 5.958993 0.023012 didntLike 358 | 35135 2.435300 1.211744 didntLike 359 | 26397 10.539731 1.638248 largeDoses 360 | 7313 7.646702 0.056513 smallDoses 361 | 91273 20.919349 0.644571 didntLike 362 | 24743 1.424726 0.838447 didntLike 363 | 31690 6.748663 0.890223 largeDoses 364 | 15432 2.289167 0.114881 smallDoses 365 | 58394 5.548377 0.402238 didntLike 366 | 33962 6.057227 0.432666 didntLike 367 | 31442 10.828595 0.559955 largeDoses 368 | 31044 11.318160 0.271094 largeDoses 369 | 29938 13.265311 0.633903 largeDoses 370 | 9875 0.000000 1.496715 smallDoses 371 | 51542 6.517133 0.402519 largeDoses 372 | 11878 4.934374 1.520028 smallDoses 373 | 69241 10.151738 0.896433 didntLike 374 | 37776 2.425781 1.559467 didntLike 375 | 68997 9.778962 1.195498 didntLike 376 | 67416 12.219950 0.657677 didntLike 377 | 59225 7.394151 0.954434 didntLike 378 | 29138 8.518535 0.742546 largeDoses 379 | 5962 2.798700 0.662632 smallDoses 380 | 10847 0.637930 0.617373 smallDoses 381 | 70527 10.750490 0.097415 didntLike 382 | 9610 0.625382 0.140969 smallDoses 383 | 64734 10.027968 0.282787 didntLike 384 | 25941 9.817347 0.364197 largeDoses 385 | 2763 0.646828 1.266069 smallDoses 386 | 55601 3.347111 0.914294 didntLike 387 | 31128 11.816892 0.193798 largeDoses 388 | 5181 0.000000 1.480198 smallDoses 389 | 69982 10.945666 0.993219 didntLike 390 | 52440 10.244706 0.280539 largeDoses 391 | 57350 2.579801 1.149172 didntLike 392 | 57869 2.630410 0.098869 didntLike 393 | 56557 11.746200 1.695517 largeDoses 394 | 42342 8.104232 1.326277 largeDoses 395 | 15560 12.409743 0.790295 largeDoses 396 | 34826 12.167844 1.328086 largeDoses 397 | 8569 3.198408 0.299287 smallDoses 398 | 77623 16.055513 0.541052 didntLike 399 | 78184 7.138659 0.158481 didntLike 400 | 7036 4.831041 0.761419 smallDoses 401 | 69616 10.082890 1.373611 didntLike 402 | 21546 10.066867 0.788470 largeDoses 403 | 36715 8.129538 0.329913 largeDoses 404 | 20522 3.012463 1.138108 smallDoses 405 | 42349 3.720391 0.845974 didntLike 406 | 9037 0.773493 1.148256 smallDoses 407 | 26728 10.962941 1.037324 largeDoses 408 | 587 0.177621 0.162614 smallDoses 409 | 48915 3.085853 0.967899 didntLike 410 | 9824 8.426781 0.202558 smallDoses 411 | 4135 1.825927 1.128347 smallDoses 412 | 9666 2.185155 1.010173 smallDoses 413 | 59333 7.184595 1.261338 didntLike 414 | 36198 0.000000 0.116525 didntLike 415 | 34909 8.901752 1.033527 largeDoses 416 | 47516 2.451497 1.358795 didntLike 417 | 55807 3.213631 0.432044 didntLike 418 | 14036 3.974739 0.723929 smallDoses 419 | 42856 9.601306 0.619232 largeDoses 420 | 64007 8.363897 0.445341 didntLike 421 | 59428 6.381484 1.365019 didntLike 422 | 13730 0.000000 1.403914 smallDoses 423 | 41740 9.609836 1.438105 largeDoses 424 | 63546 9.904741 0.985862 didntLike 425 | 30417 7.185807 1.489102 largeDoses 426 | 69636 5.466703 1.216571 didntLike 427 | 64660 0.000000 0.915898 didntLike 428 | 14883 4.575443 0.535671 smallDoses 429 | 7965 3.277076 1.010868 smallDoses 430 | 68620 10.246623 1.239634 didntLike 431 | 8738 2.341735 1.060235 smallDoses 432 | 7544 3.201046 0.498843 smallDoses 433 | 6377 6.066013 0.120927 smallDoses 434 | 36842 8.829379 0.895657 largeDoses 435 | 81046 15.833048 1.568245 didntLike 436 | 67736 13.516711 1.220153 didntLike 437 | 32492 0.664284 1.116755 didntLike 438 | 39299 6.325139 0.605109 largeDoses 439 | 77289 8.677499 0.344373 didntLike 440 | 33835 8.188005 0.964896 largeDoses 441 | 71890 9.414263 0.384030 didntLike 442 | 32054 9.196547 1.138253 largeDoses 443 | 38579 10.202968 0.452363 largeDoses 444 | 55984 2.119439 1.481661 didntLike 445 | 72694 13.635078 0.858314 didntLike 446 | 42299 0.083443 0.701669 didntLike 447 | 26635 9.149096 1.051446 largeDoses 448 | 8579 1.933803 1.374388 smallDoses 449 | 37302 14.115544 0.676198 largeDoses 450 | 22878 8.933736 0.943352 largeDoses 451 | 4364 2.661254 0.946117 smallDoses 452 | 4985 0.988432 1.305027 smallDoses 453 | 37068 2.063741 1.125946 didntLike 454 | 41137 2.220590 0.690754 didntLike 455 | 67759 6.424849 0.806641 didntLike 456 | 11831 1.156153 1.613674 smallDoses 457 | 34502 3.032720 0.601847 didntLike 458 | 4088 3.076828 0.952089 smallDoses 459 | 15199 0.000000 0.318105 smallDoses 460 | 17309 7.750480 0.554015 largeDoses 461 | 42816 10.958135 1.482500 largeDoses 462 | 43751 10.222018 0.488678 largeDoses 463 | 58335 2.367988 0.435741 didntLike 464 | 75039 7.686054 1.381455 didntLike 465 | 42878 11.464879 1.481589 largeDoses 466 | 42770 11.075735 0.089726 largeDoses 467 | 8848 3.543989 0.345853 smallDoses 468 | 31340 8.123889 1.282880 largeDoses 469 | 41413 4.331769 0.754467 largeDoses 470 | 12731 0.120865 1.211961 smallDoses 471 | 22447 6.116109 0.701523 largeDoses 472 | 33564 7.474534 0.505790 largeDoses 473 | 48907 8.819454 0.649292 largeDoses 474 | 8762 6.802144 0.615284 smallDoses 475 | 46696 12.666325 0.931960 largeDoses 476 | 36851 8.636180 0.399333 largeDoses 477 | 67639 11.730991 1.289833 didntLike 478 | 171 8.132449 0.039062 smallDoses 479 | 26674 10.296589 1.496144 largeDoses 480 | 8739 7.583906 1.005764 smallDoses 481 | 66668 9.777806 0.496377 didntLike 482 | 68732 8.833546 0.513876 didntLike 483 | 69995 4.907899 1.518036 didntLike 484 | 82008 8.362736 1.285939 didntLike 485 | 25054 9.084726 1.606312 largeDoses 486 | 33085 14.164141 0.560970 largeDoses 487 | 41379 9.080683 0.989920 largeDoses 488 | 39417 6.522767 0.038548 largeDoses 489 | 12556 3.690342 0.462281 smallDoses 490 | 39432 3.563706 0.242019 didntLike 491 | 38010 1.065870 1.141569 didntLike 492 | 69306 6.683796 1.456317 didntLike 493 | 38000 1.712874 0.243945 didntLike 494 | 46321 13.109929 1.280111 largeDoses 495 | 66293 11.327910 0.780977 didntLike 496 | 22730 4.545711 1.233254 didntLike 497 | 5952 3.367889 0.468104 smallDoses 498 | 72308 8.326224 0.567347 didntLike 499 | 60338 8.978339 1.442034 didntLike 500 | 13301 5.655826 1.582159 smallDoses 501 | 27884 8.855312 0.570684 largeDoses 502 | 11188 6.649568 0.544233 smallDoses 503 | 56796 3.966325 0.850410 didntLike 504 | 8571 1.924045 1.664782 smallDoses 505 | 4914 6.004812 0.280369 smallDoses 506 | 10784 0.000000 0.375849 smallDoses 507 | 39296 9.923018 0.092192 largeDoses 508 | 13113 2.389084 0.119284 smallDoses 509 | 70204 13.663189 0.133251 didntLike 510 | 46813 11.434976 0.321216 largeDoses 511 | 11697 0.358270 1.292858 smallDoses 512 | 44183 9.598873 0.223524 largeDoses 513 | 2225 6.375275 0.608040 smallDoses 514 | 29066 11.580532 0.458401 largeDoses 515 | 4245 5.319324 1.598070 smallDoses 516 | 34379 4.324031 1.603481 didntLike 517 | 44441 2.358370 1.273204 didntLike 518 | 2022 0.000000 1.182708 smallDoses 519 | 26866 12.824376 0.890411 largeDoses 520 | 57070 1.587247 1.456982 didntLike 521 | 32932 8.510324 1.520683 largeDoses 522 | 51967 10.428884 1.187734 largeDoses 523 | 44432 8.346618 0.042318 largeDoses 524 | 67066 7.541444 0.809226 didntLike 525 | 17262 2.540946 1.583286 smallDoses 526 | 79728 9.473047 0.692513 didntLike 527 | 14259 0.352284 0.474080 smallDoses 528 | 6122 0.000000 0.589826 smallDoses 529 | 76879 12.405171 0.567201 didntLike 530 | 11426 4.126775 0.871452 smallDoses 531 | 2493 0.034087 0.335848 smallDoses 532 | 19910 1.177634 0.075106 smallDoses 533 | 10939 0.000000 0.479996 smallDoses 534 | 17716 0.994909 0.611135 smallDoses 535 | 31390 11.053664 1.180117 largeDoses 536 | 20375 0.000000 1.679729 smallDoses 537 | 26309 2.495011 1.459589 didntLike 538 | 33484 11.516831 0.001156 largeDoses 539 | 45944 9.213215 0.797743 largeDoses 540 | 4249 5.332865 0.109288 smallDoses 541 | 6089 0.000000 1.689771 smallDoses 542 | 7513 0.000000 1.126053 smallDoses 543 | 27862 12.640062 1.690903 largeDoses 544 | 39038 2.693142 1.317518 didntLike 545 | 19218 3.328969 0.268271 smallDoses 546 | 62911 7.193166 1.117456 didntLike 547 | 77758 6.615512 1.521012 didntLike 548 | 27940 8.000567 0.835341 largeDoses 549 | 2194 4.017541 0.512104 smallDoses 550 | 37072 13.245859 0.927465 largeDoses 551 | 15585 5.970616 0.813624 smallDoses 552 | 25577 11.668719 0.886902 largeDoses 553 | 8777 4.283237 1.272728 smallDoses 554 | 29016 10.742963 0.971401 largeDoses 555 | 21910 12.326672 1.592608 largeDoses 556 | 12916 0.000000 0.344622 smallDoses 557 | 10976 0.000000 0.922846 smallDoses 558 | 79065 10.602095 0.573686 didntLike 559 | 36759 10.861859 1.155054 largeDoses 560 | 50011 1.229094 1.638690 didntLike 561 | 1155 0.410392 1.313401 smallDoses 562 | 71600 14.552711 0.616162 didntLike 563 | 30817 14.178043 0.616313 largeDoses 564 | 54559 14.136260 0.362388 didntLike 565 | 29764 0.093534 1.207194 didntLike 566 | 69100 10.929021 0.403110 didntLike 567 | 47324 11.432919 0.825959 largeDoses 568 | 73199 9.134527 0.586846 didntLike 569 | 44461 5.071432 1.421420 didntLike 570 | 45617 11.460254 1.541749 largeDoses 571 | 28221 11.620039 1.103553 largeDoses 572 | 7091 4.022079 0.207307 smallDoses 573 | 6110 3.057842 1.631262 smallDoses 574 | 79016 7.782169 0.404385 didntLike 575 | 18289 7.981741 0.929789 largeDoses 576 | 43679 4.601363 0.268326 didntLike 577 | 22075 2.595564 1.115375 didntLike 578 | 23535 10.049077 0.391045 largeDoses 579 | 25301 3.265444 1.572970 smallDoses 580 | 32256 11.780282 1.511014 largeDoses 581 | 36951 3.075975 0.286284 didntLike 582 | 31290 1.795307 0.194343 didntLike 583 | 38953 11.106979 0.202415 largeDoses 584 | 35257 5.994413 0.800021 didntLike 585 | 25847 9.706062 1.012182 largeDoses 586 | 32680 10.582992 0.836025 largeDoses 587 | 62018 7.038266 1.458979 didntLike 588 | 9074 0.023771 0.015314 smallDoses 589 | 33004 12.823982 0.676371 largeDoses 590 | 44588 3.617770 0.493483 didntLike 591 | 32565 8.346684 0.253317 largeDoses 592 | 38563 6.104317 0.099207 didntLike 593 | 75668 16.207776 0.584973 didntLike 594 | 9069 6.401969 1.691873 smallDoses 595 | 53395 2.298696 0.559757 didntLike 596 | 28631 7.661515 0.055981 largeDoses 597 | 71036 6.353608 1.645301 didntLike 598 | 71142 10.442780 0.335870 didntLike 599 | 37653 3.834509 1.346121 didntLike 600 | 76839 10.998587 0.584555 didntLike 601 | 9916 2.695935 1.512111 smallDoses 602 | 38889 3.356646 0.324230 didntLike 603 | 39075 14.677836 0.793183 largeDoses 604 | 48071 1.551934 0.130902 didntLike 605 | 7275 2.464739 0.223502 smallDoses 606 | 41804 1.533216 1.007481 didntLike 607 | 35665 12.473921 0.162910 largeDoses 608 | 67956 6.491596 0.032576 didntLike 609 | 41892 10.506276 1.510747 largeDoses 610 | 38844 4.380388 0.748506 didntLike 611 | 74197 13.670988 1.687944 didntLike 612 | 14201 8.317599 0.390409 smallDoses 613 | 3908 0.000000 0.556245 smallDoses 614 | 2459 0.000000 0.290218 smallDoses 615 | 32027 10.095799 1.188148 largeDoses 616 | 12870 0.860695 1.482632 smallDoses 617 | 9880 1.557564 0.711278 smallDoses 618 | 72784 10.072779 0.756030 didntLike 619 | 17521 0.000000 0.431468 smallDoses 620 | 50283 7.140817 0.883813 largeDoses 621 | 33536 11.384548 1.438307 largeDoses 622 | 9452 3.214568 1.083536 smallDoses 623 | 37457 11.720655 0.301636 largeDoses 624 | 17724 6.374475 1.475925 largeDoses 625 | 43869 5.749684 0.198875 largeDoses 626 | 264 3.871808 0.552602 smallDoses 627 | 25736 8.336309 0.636238 largeDoses 628 | 39584 9.710442 1.503735 largeDoses 629 | 31246 1.532611 1.433898 didntLike 630 | 49567 9.785785 0.984614 largeDoses 631 | 7052 2.633627 1.097866 smallDoses 632 | 35493 9.238935 0.494701 largeDoses 633 | 10986 1.205656 1.398803 smallDoses 634 | 49508 3.124909 1.670121 didntLike 635 | 5734 7.935489 1.585044 smallDoses 636 | 65479 12.746636 1.560352 didntLike 637 | 77268 10.732563 0.545321 didntLike 638 | 28490 3.977403 0.766103 didntLike 639 | 13546 4.194426 0.450663 smallDoses 640 | 37166 9.610286 0.142912 largeDoses 641 | 16381 4.797555 1.260455 smallDoses 642 | 10848 1.615279 0.093002 smallDoses 643 | 35405 4.614771 1.027105 didntLike 644 | 15917 0.000000 1.369726 smallDoses 645 | 6131 0.608457 0.512220 smallDoses 646 | 67432 6.558239 0.667579 didntLike 647 | 30354 12.315116 0.197068 largeDoses 648 | 69696 7.014973 1.494616 didntLike 649 | 33481 8.822304 1.194177 largeDoses 650 | 43075 10.086796 0.570455 largeDoses 651 | 38343 7.241614 1.661627 largeDoses 652 | 14318 4.602395 1.511768 smallDoses 653 | 5367 7.434921 0.079792 smallDoses 654 | 37894 10.467570 1.595418 largeDoses 655 | 36172 9.948127 0.003663 largeDoses 656 | 40123 2.478529 1.568987 didntLike 657 | 10976 5.938545 0.878540 smallDoses 658 | 12705 0.000000 0.948004 smallDoses 659 | 12495 5.559181 1.357926 smallDoses 660 | 35681 9.776654 0.535966 largeDoses 661 | 46202 3.092056 0.490906 didntLike 662 | 11505 0.000000 1.623311 smallDoses 663 | 22834 4.459495 0.538867 didntLike 664 | 49901 8.334306 1.646600 largeDoses 665 | 71932 11.226654 0.384686 didntLike 666 | 13279 3.904737 1.597294 smallDoses 667 | 49112 7.038205 1.211329 largeDoses 668 | 77129 9.836120 1.054340 didntLike 669 | 37447 1.990976 0.378081 didntLike 670 | 62397 9.005302 0.485385 didntLike 671 | 0 1.772510 1.039873 smallDoses 672 | 15476 0.458674 0.819560 smallDoses 673 | 40625 10.003919 0.231658 largeDoses 674 | 36706 0.520807 1.476008 didntLike 675 | 28580 10.678214 1.431837 largeDoses 676 | 25862 4.425992 1.363842 didntLike 677 | 63488 12.035355 0.831222 didntLike 678 | 33944 10.606732 1.253858 largeDoses 679 | 30099 1.568653 0.684264 didntLike 680 | 13725 2.545434 0.024271 smallDoses 681 | 36768 10.264062 0.982593 largeDoses 682 | 64656 9.866276 0.685218 didntLike 683 | 14927 0.142704 0.057455 smallDoses 684 | 43231 9.853270 1.521432 largeDoses 685 | 66087 6.596604 1.653574 didntLike 686 | 19806 2.602287 1.321481 smallDoses 687 | 41081 10.411776 0.664168 largeDoses 688 | 10277 7.083449 0.622589 smallDoses 689 | 7014 2.080068 1.254441 smallDoses 690 | 17275 0.522844 1.622458 smallDoses 691 | 31600 10.362000 1.544827 largeDoses 692 | 59956 3.412967 1.035410 didntLike 693 | 42181 6.796548 1.112153 largeDoses 694 | 51743 4.092035 0.075804 didntLike 695 | 5194 2.763811 1.564325 smallDoses 696 | 30832 12.547439 1.402443 largeDoses 697 | 7976 5.708052 1.596152 smallDoses 698 | 14602 4.558025 0.375806 smallDoses 699 | 41571 11.642307 0.438553 largeDoses 700 | 55028 3.222443 0.121399 didntLike 701 | 5837 4.736156 0.029871 smallDoses 702 | 39808 10.839526 0.836323 largeDoses 703 | 20944 4.194791 0.235483 smallDoses 704 | 22146 14.936259 0.888582 largeDoses 705 | 42169 3.310699 1.521855 didntLike 706 | 7010 2.971931 0.034321 smallDoses 707 | 3807 9.261667 0.537807 smallDoses 708 | 29241 7.791833 1.111416 largeDoses 709 | 52696 1.480470 1.028750 didntLike 710 | 42545 3.677287 0.244167 didntLike 711 | 24437 2.202967 1.370399 didntLike 712 | 16037 5.796735 0.935893 smallDoses 713 | 8493 3.063333 0.144089 smallDoses 714 | 68080 11.233094 0.492487 didntLike 715 | 59016 1.965570 0.005697 didntLike 716 | 11810 8.616719 0.137419 smallDoses 717 | 68630 6.609989 1.083505 didntLike 718 | 7629 1.712639 1.086297 smallDoses 719 | 71992 10.117445 1.299319 didntLike 720 | 13398 0.000000 1.104178 smallDoses 721 | 26241 9.824777 1.346821 largeDoses 722 | 11160 1.653089 0.980949 smallDoses 723 | 76701 18.178822 1.473671 didntLike 724 | 32174 6.781126 0.885340 largeDoses 725 | 45043 8.206750 1.549223 largeDoses 726 | 42173 10.081853 1.376745 largeDoses 727 | 69801 6.288742 0.112799 didntLike 728 | 41737 3.695937 1.543589 didntLike 729 | 46979 6.726151 1.069380 largeDoses 730 | 79267 12.969999 1.568223 didntLike 731 | 4615 2.661390 1.531933 smallDoses 732 | 32907 7.072764 1.117386 largeDoses 733 | 37444 9.123366 1.318988 largeDoses 734 | 569 3.743946 1.039546 smallDoses 735 | 8723 2.341300 0.219361 smallDoses 736 | 6024 0.541913 0.592348 smallDoses 737 | 52252 2.310828 1.436753 didntLike 738 | 8358 6.226597 1.427316 smallDoses 739 | 26166 7.277876 0.489252 largeDoses 740 | 18471 0.000000 0.389459 smallDoses 741 | 3386 7.218221 1.098828 smallDoses 742 | 41544 8.777129 1.111464 largeDoses 743 | 10480 2.813428 0.819419 smallDoses 744 | 5894 2.268766 1.412130 smallDoses 745 | 7273 6.283627 0.571292 smallDoses 746 | 22272 7.520081 1.626868 largeDoses 747 | 31369 11.739225 0.027138 largeDoses 748 | 10708 3.746883 0.877350 smallDoses 749 | 69364 12.089835 0.521631 didntLike 750 | 37760 12.310404 0.259339 largeDoses 751 | 13004 0.000000 0.671355 smallDoses 752 | 37885 2.728800 0.331502 didntLike 753 | 52555 10.814342 0.607652 largeDoses 754 | 38997 12.170268 0.844205 largeDoses 755 | 69698 6.698371 0.240084 didntLike 756 | 11783 3.632672 1.643479 smallDoses 757 | 47636 10.059991 0.892361 largeDoses 758 | 15744 1.887674 0.756162 smallDoses 759 | 69058 8.229125 0.195886 didntLike 760 | 33057 7.817082 0.476102 largeDoses 761 | 28681 12.277230 0.076805 largeDoses 762 | 34042 10.055337 1.115778 largeDoses 763 | 29928 3.596002 1.485952 didntLike 764 | 9734 2.755530 1.420655 smallDoses 765 | 7344 7.780991 0.513048 smallDoses 766 | 7387 0.093705 0.391834 smallDoses 767 | 33957 8.481567 0.520078 largeDoses 768 | 9936 3.865584 0.110062 smallDoses 769 | 36094 9.683709 0.779984 largeDoses 770 | 39835 10.617255 1.359970 largeDoses 771 | 64486 7.203216 1.624762 didntLike 772 | 0 7.601414 1.215605 smallDoses 773 | 39539 1.386107 1.417070 didntLike 774 | 66972 9.129253 0.594089 didntLike 775 | 15029 1.363447 0.620841 smallDoses 776 | 44909 3.181399 0.359329 didntLike 777 | 38183 13.365414 0.217011 largeDoses 778 | 37372 4.207717 1.289767 didntLike 779 | 0 4.088395 0.870075 smallDoses 780 | 17786 3.327371 1.142505 smallDoses 781 | 39055 1.303323 1.235650 didntLike 782 | 37045 7.999279 1.581763 largeDoses 783 | 6435 2.217488 0.864536 smallDoses 784 | 72265 7.751808 0.192451 didntLike 785 | 28152 14.149305 1.591532 largeDoses 786 | 25931 8.765721 0.152808 largeDoses 787 | 7538 3.408996 0.184896 smallDoses 788 | 1315 1.251021 0.112340 smallDoses 789 | 12292 6.160619 1.537165 smallDoses 790 | 49248 1.034538 1.585162 didntLike 791 | 9025 0.000000 1.034635 smallDoses 792 | 13438 2.355051 0.542603 smallDoses 793 | 69683 6.614543 0.153771 didntLike 794 | 25374 10.245062 1.450903 largeDoses 795 | 55264 3.467074 1.231019 didntLike 796 | 38324 7.487678 1.572293 largeDoses 797 | 69643 4.624115 1.185192 didntLike 798 | 44058 8.995957 1.436479 largeDoses 799 | 41316 11.564476 0.007195 largeDoses 800 | 29119 3.440948 0.078331 didntLike 801 | 51656 1.673603 0.732746 didntLike 802 | 3030 4.719341 0.699755 smallDoses 803 | 35695 10.304798 1.576488 largeDoses 804 | 1537 2.086915 1.199312 smallDoses 805 | 9083 6.338220 1.131305 smallDoses 806 | 47744 8.254926 0.710694 largeDoses 807 | 71372 16.067108 0.974142 didntLike 808 | 37980 1.723201 0.310488 didntLike 809 | 42385 3.785045 0.876904 didntLike 810 | 22687 2.557561 0.123738 didntLike 811 | 39512 9.852220 1.095171 largeDoses 812 | 11885 3.679147 1.557205 smallDoses 813 | 4944 9.789681 0.852971 smallDoses 814 | 73230 14.958998 0.526707 didntLike 815 | 17585 11.182148 1.288459 largeDoses 816 | 68737 7.528533 1.657487 didntLike 817 | 13818 5.253802 1.378603 smallDoses 818 | 31662 13.946752 1.426657 largeDoses 819 | 86686 15.557263 1.430029 didntLike 820 | 43214 12.483550 0.688513 largeDoses 821 | 24091 2.317302 1.411137 didntLike 822 | 52544 10.069724 0.766119 largeDoses 823 | 61861 5.792231 1.615483 didntLike 824 | 47903 4.138435 0.475994 didntLike 825 | 37190 12.929517 0.304378 largeDoses 826 | 6013 9.378238 0.307392 smallDoses 827 | 27223 8.361362 1.643204 largeDoses 828 | 69027 7.939406 1.325042 didntLike 829 | 78642 10.735384 0.705788 didntLike 830 | 30254 11.592723 0.286188 largeDoses 831 | 21704 10.098356 0.704748 largeDoses 832 | 34985 9.299025 0.545337 largeDoses 833 | 31316 11.158297 0.218067 largeDoses 834 | 76368 16.143900 0.558388 didntLike 835 | 27953 10.971700 1.221787 largeDoses 836 | 152 0.000000 0.681478 smallDoses 837 | 9146 3.178961 1.292692 smallDoses 838 | 75346 17.625350 0.339926 didntLike 839 | 26376 1.995833 0.267826 didntLike 840 | 35255 10.640467 0.416181 largeDoses 841 | 19198 9.628339 0.985462 largeDoses 842 | 12518 4.662664 0.495403 smallDoses 843 | 25453 5.754047 1.382742 smallDoses 844 | 12530 0.000000 0.037146 smallDoses 845 | 62230 9.334332 0.198118 didntLike 846 | 9517 3.846162 0.619968 smallDoses 847 | 71161 10.685084 0.678179 didntLike 848 | 1593 4.752134 0.359205 smallDoses 849 | 33794 0.697630 0.966786 didntLike 850 | 39710 10.365836 0.505898 largeDoses 851 | 16941 0.461478 0.352865 smallDoses 852 | 69209 11.339537 1.068740 didntLike 853 | 4446 5.420280 0.127310 smallDoses 854 | 9347 3.469955 1.619947 smallDoses 855 | 55635 8.517067 0.994858 largeDoses 856 | 65889 8.306512 0.413690 didntLike 857 | 10753 2.628690 0.444320 smallDoses 858 | 7055 0.000000 0.802985 smallDoses 859 | 7905 0.000000 1.170397 smallDoses 860 | 53447 7.298767 1.582346 largeDoses 861 | 9194 7.331319 1.277988 smallDoses 862 | 61914 9.392269 0.151617 didntLike 863 | 15630 5.541201 1.180596 smallDoses 864 | 79194 15.149460 0.537540 didntLike 865 | 12268 5.515189 0.250562 smallDoses 866 | 33682 7.728898 0.920494 largeDoses 867 | 26080 11.318785 1.510979 largeDoses 868 | 19119 3.574709 1.531514 smallDoses 869 | 30902 7.350965 0.026332 largeDoses 870 | 63039 7.122363 1.630177 didntLike 871 | 51136 1.828412 1.013702 didntLike 872 | 35262 10.117989 1.156862 largeDoses 873 | 42776 11.309897 0.086291 largeDoses 874 | 64191 8.342034 1.388569 didntLike 875 | 15436 0.241714 0.715577 smallDoses 876 | 14402 10.482619 1.694972 smallDoses 877 | 6341 9.289510 1.428879 smallDoses 878 | 14113 4.269419 0.134181 smallDoses 879 | 6390 0.000000 0.189456 smallDoses 880 | 8794 0.817119 0.143668 smallDoses 881 | 43432 1.508394 0.652651 didntLike 882 | 38334 9.359918 0.052262 largeDoses 883 | 34068 10.052333 0.550423 largeDoses 884 | 30819 11.111660 0.989159 largeDoses 885 | 22239 11.265971 0.724054 largeDoses 886 | 28725 10.383830 0.254836 largeDoses 887 | 57071 3.878569 1.377983 didntLike 888 | 72420 13.679237 0.025346 didntLike 889 | 28294 10.526846 0.781569 largeDoses 890 | 9896 0.000000 0.924198 smallDoses 891 | 65821 4.106727 1.085669 didntLike 892 | 7645 8.118856 1.470686 smallDoses 893 | 71289 7.796874 0.052336 didntLike 894 | 5128 2.789669 1.093070 smallDoses 895 | 13711 6.226962 0.287251 smallDoses 896 | 22240 10.169548 1.660104 largeDoses 897 | 15092 0.000000 1.370549 smallDoses 898 | 5017 7.513353 0.137348 smallDoses 899 | 10141 8.240793 0.099735 smallDoses 900 | 35570 14.612797 1.247390 largeDoses 901 | 46893 3.562976 0.445386 didntLike 902 | 8178 3.230482 1.331698 smallDoses 903 | 55783 3.612548 1.551911 didntLike 904 | 1148 0.000000 0.332365 smallDoses 905 | 10062 3.931299 0.487577 smallDoses 906 | 74124 14.752342 1.155160 didntLike 907 | 66603 10.261887 1.628085 didntLike 908 | 11893 2.787266 1.570402 smallDoses 909 | 50908 15.112319 1.324132 largeDoses 910 | 39891 5.184553 0.223382 largeDoses 911 | 65915 3.868359 0.128078 didntLike 912 | 65678 3.507965 0.028904 didntLike 913 | 62996 11.019254 0.427554 didntLike 914 | 36851 3.812387 0.655245 didntLike 915 | 36669 11.056784 0.378725 largeDoses 916 | 38876 8.826880 1.002328 largeDoses 917 | 26878 11.173861 1.478244 largeDoses 918 | 46246 11.506465 0.421993 largeDoses 919 | 12761 7.798138 0.147917 largeDoses 920 | 35282 10.155081 1.370039 largeDoses 921 | 68306 10.645275 0.693453 didntLike 922 | 31262 9.663200 1.521541 largeDoses 923 | 34754 10.790404 1.312679 largeDoses 924 | 13408 2.810534 0.219962 smallDoses 925 | 30365 9.825999 1.388500 largeDoses 926 | 10709 1.421316 0.677603 smallDoses 927 | 24332 11.123219 0.809107 largeDoses 928 | 45517 13.402206 0.661524 largeDoses 929 | 6178 1.212255 0.836807 smallDoses 930 | 10639 1.568446 1.297469 smallDoses 931 | 29613 3.343473 1.312266 didntLike 932 | 22392 5.400155 0.193494 didntLike 933 | 51126 3.818754 0.590905 didntLike 934 | 53644 7.973845 0.307364 largeDoses 935 | 51417 9.078824 0.734876 largeDoses 936 | 24859 0.153467 0.766619 didntLike 937 | 61732 8.325167 0.028479 didntLike 938 | 71128 7.092089 1.216733 didntLike 939 | 27276 5.192485 1.094409 largeDoses 940 | 30453 10.340791 1.087721 largeDoses 941 | 18670 2.077169 1.019775 smallDoses 942 | 70600 10.151966 0.993105 didntLike 943 | 12683 0.046826 0.809614 smallDoses 944 | 81597 11.221874 1.395015 didntLike 945 | 69959 14.497963 1.019254 didntLike 946 | 8124 3.554508 0.533462 smallDoses 947 | 18867 3.522673 0.086725 smallDoses 948 | 80886 14.531655 0.380172 didntLike 949 | 55895 3.027528 0.885457 didntLike 950 | 31587 1.845967 0.488985 didntLike 951 | 10591 10.226164 0.804403 largeDoses 952 | 70096 10.965926 1.212328 didntLike 953 | 53151 2.129921 1.477378 didntLike 954 | 11992 0.000000 1.606849 smallDoses 955 | 33114 9.489005 0.827814 largeDoses 956 | 7413 0.000000 1.020797 smallDoses 957 | 10583 0.000000 1.270167 smallDoses 958 | 58668 6.556676 0.055183 didntLike 959 | 35018 9.959588 0.060020 largeDoses 960 | 70843 7.436056 1.479856 didntLike 961 | 14011 0.404888 0.459517 smallDoses 962 | 35015 9.952942 1.650279 largeDoses 963 | 70839 15.600252 0.021935 didntLike 964 | 3024 2.723846 0.387455 smallDoses 965 | 5526 0.513866 1.323448 smallDoses 966 | 5113 0.000000 0.861859 smallDoses 967 | 20851 7.280602 1.438470 smallDoses 968 | 40999 9.161978 1.110180 largeDoses 969 | 15823 0.991725 0.730979 smallDoses 970 | 35432 7.398380 0.684218 largeDoses 971 | 53711 12.149747 1.389088 largeDoses 972 | 64371 9.149678 0.874905 didntLike 973 | 9289 9.666576 1.370330 smallDoses 974 | 60613 3.620110 0.287767 didntLike 975 | 18338 5.238800 1.253646 smallDoses 976 | 22845 14.715782 1.503758 largeDoses 977 | 74676 14.445740 1.211160 didntLike 978 | 34143 13.609528 0.364240 largeDoses 979 | 14153 3.141585 0.424280 smallDoses 980 | 9327 0.000000 0.120947 smallDoses 981 | 18991 0.454750 1.033280 smallDoses 982 | 9193 0.510310 0.016395 smallDoses 983 | 2285 3.864171 0.616349 smallDoses 984 | 9493 6.724021 0.563044 smallDoses 985 | 2371 4.289375 0.012563 smallDoses 986 | 13963 0.000000 1.437030 smallDoses 987 | 2299 3.733617 0.698269 smallDoses 988 | 5262 2.002589 1.380184 smallDoses 989 | 4659 2.502627 0.184223 smallDoses 990 | 17582 6.382129 0.876581 smallDoses 991 | 27750 8.546741 0.128706 largeDoses 992 | 9868 2.694977 0.432818 smallDoses 993 | 18333 3.951256 0.333300 smallDoses 994 | 3780 9.856183 0.329181 smallDoses 995 | 18190 2.068962 0.429927 smallDoses 996 | 11145 3.410627 0.631838 smallDoses 997 | 68846 9.974715 0.669787 didntLike 998 | 26575 10.650102 0.866627 largeDoses 999 | 48111 9.134528 0.728045 largeDoses 1000 | 43757 7.882601 1.332446 largeDoses 1001 | -------------------------------------------------------------------------------- /KNN/2.KNN应用/kNN_example1_machine_learning_in_action.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | 3 | from matplotlib.font_manager import FontProperties 4 | import matplotlib.lines as mlines 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import operator 8 | 9 | 10 | """ 11 | 函数说明:kNN算法,分类器 12 | 13 | Parameters: 14 | inX - 用于分类的数据(测试集) 15 | dataSet - 用于训练的数据(训练集) 16 | labes - 分类标签 17 | k - kNN算法参数,选择距离最小的k个点 18 | Returns: 19 | sortedClassCount[0][0] - 分类结果 20 | 21 | Modify: 22 | 2017-03-24 23 | """ 24 | def classify0(inX, dataSet, labels, k): 25 | #numpy函数shape[0]返回dataSet的行数 26 | dataSetSize = dataSet.shape[0] 27 | #在列向量方向上重复inX共1次(横向),行向量方向上重复inX共dataSetSize次(纵向) 28 | diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet 29 | #二维特征相减后平方 30 | sqDiffMat = diffMat**2 31 | #sum()所有元素相加,sum(0)列相加,sum(1)行相加 32 | sqDistances = sqDiffMat.sum(axis=1) 33 | #开方,计算出距离 34 | distances = sqDistances**0.5 35 | #返回distances中元素从小到大排序后的索引值 36 | sortedDistIndices = distances.argsort() 37 | #定一个记录类别次数的字典 38 | classCount = {} 39 | for i in range(k): 40 | #取出前k个元素的类别 41 | voteIlabel = labels[sortedDistIndices[i]] 42 | #dict.get(key,default=None),字典的get()方法,返回指定键的值,如果值不在字典中返回默认值。 43 | #计算类别次数 44 | classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1 45 | #python3中用items()替换python2中的iteritems() 46 | #key=operator.itemgetter(1)根据字典的值进行排序 47 | #key=operator.itemgetter(0)根据字典的键进行排序 48 | #reverse降序排序字典 49 | sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True) 50 | print(sortedClassCount) 51 | #返回次数最多的类别,即所要分类的类别 52 | return sortedClassCount[0][0] 53 | 54 | 55 | """ 56 | 函数说明:打开并解析文件,对数据进行分类:1代表不喜欢,2代表魅力一般,3代表极具魅力 57 | 58 | Parameters: 59 | filename - 文件名 60 | Returns: 61 | returnMat - 特征矩阵 62 | classLabelVector - 分类Label向量 63 | 64 | Modify: 65 | 2017-03-24 66 | """ 67 | def file2matrix(filename): 68 | #打开文件 69 | fr = open(filename) 70 | #读取文件所有内容 71 | arrayOLines = fr.readlines() 72 | #得到文件行数 73 | numberOfLines = len(arrayOLines) 74 | #返回的NumPy矩阵,解析完成的数据:numberOfLines行,3列 75 | returnMat = np.zeros((numberOfLines,3)) 76 | #返回的分类标签向量 77 | classLabelVector = [] 78 | #行的索引值 79 | index = 0 80 | for line in arrayOLines: 81 | #s.strip(rm),当rm空时,默认删除空白符(包括'\n','\r','\t',' ') 82 | line = line.strip() 83 | #使用s.split(str="",num=string,cout(str))将字符串根据'\t'分隔符进行切片。 84 | listFromLine = line.split('\t') 85 | #将数据前三列提取出来,存放到returnMat的NumPy矩阵中,也就是特征矩阵 86 | returnMat[index,:] = listFromLine[0:3] 87 | #根据文本中标记的喜欢的程度进行分类,1代表不喜欢,2代表魅力一般,3代表极具魅力 88 | if listFromLine[-1] == 'didntLike': 89 | classLabelVector.append(1) 90 | elif listFromLine[-1] == 'smallDoses': 91 | classLabelVector.append(2) 92 | elif listFromLine[-1] == 'largeDoses': 93 | classLabelVector.append(3) 94 | index += 1 95 | return returnMat, classLabelVector 96 | 97 | """ 98 | 函数说明:可视化数据 99 | 100 | Parameters: 101 | datingDataMat - 特征矩阵 102 | datingLabels - 分类Label 103 | Returns: 104 | 无 105 | Modify: 106 | 2017-03-24 107 | """ 108 | def showdatas(datingDataMat, datingLabels): 109 | #设置汉字格式 110 | font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14) 111 | #将fig画布分隔成1行1列,不共享x轴和y轴,fig画布的大小为(13,8) 112 | #当nrow=2,nclos=2时,代表fig画布被分为四个区域,axs[0][0]表示第一行第一个区域 113 | fig, axs = plt.subplots(nrows=2, ncols=2,sharex=False, sharey=False, figsize=(13,8)) 114 | 115 | numberOfLabels = len(datingLabels) 116 | LabelsColors = [] 117 | for i in datingLabels: 118 | if i == 1: 119 | LabelsColors.append('black') 120 | if i == 2: 121 | LabelsColors.append('orange') 122 | if i == 3: 123 | LabelsColors.append('red') 124 | #画出散点图,以datingDataMat矩阵的第一(飞行常客例程)、第二列(玩游戏)数据画散点数据,散点大小为15,透明度为0.5 125 | axs[0][0].scatter(x=datingDataMat[:,0], y=datingDataMat[:,1], color=LabelsColors,s=15, alpha=.5) 126 | #设置标题,x轴label,y轴label 127 | axs0_title_text = axs[0][0].set_title(u'每年获得的飞行常客里程数与玩视频游戏所消耗时间占比',FontProperties=font) 128 | axs0_xlabel_text = axs[0][0].set_xlabel(u'每年获得的飞行常客里程数',FontProperties=font) 129 | axs0_ylabel_text = axs[0][0].set_ylabel(u'玩视频游戏所消耗时间占比',FontProperties=font) 130 | plt.setp(axs0_title_text, size=9, weight='bold', color='red') 131 | plt.setp(axs0_xlabel_text, size=7, weight='bold', color='black') 132 | plt.setp(axs0_ylabel_text, size=7, weight='bold', color='black') 133 | 134 | #画出散点图,以datingDataMat矩阵的第一(飞行常客例程)、第三列(冰激凌)数据画散点数据,散点大小为15,透明度为0.5 135 | axs[0][1].scatter(x=datingDataMat[:,0], y=datingDataMat[:,2], color=LabelsColors,s=15, alpha=.5) 136 | #设置标题,x轴label,y轴label 137 | axs1_title_text = axs[0][1].set_title(u'每年获得的飞行常客里程数与每周消费的冰激淋公升数',FontProperties=font) 138 | axs1_xlabel_text = axs[0][1].set_xlabel(u'每年获得的飞行常客里程数',FontProperties=font) 139 | axs1_ylabel_text = axs[0][1].set_ylabel(u'每周消费的冰激淋公升数',FontProperties=font) 140 | plt.setp(axs1_title_text, size=9, weight='bold', color='red') 141 | plt.setp(axs1_xlabel_text, size=7, weight='bold', color='black') 142 | plt.setp(axs1_ylabel_text, size=7, weight='bold', color='black') 143 | 144 | #画出散点图,以datingDataMat矩阵的第二(玩游戏)、第三列(冰激凌)数据画散点数据,散点大小为15,透明度为0.5 145 | axs[1][0].scatter(x=datingDataMat[:,1], y=datingDataMat[:,2], color=LabelsColors,s=15, alpha=.5) 146 | #设置标题,x轴label,y轴label 147 | axs2_title_text = axs[1][0].set_title(u'玩视频游戏所消耗时间占比与每周消费的冰激淋公升数',FontProperties=font) 148 | axs2_xlabel_text = axs[1][0].set_xlabel(u'玩视频游戏所消耗时间占比',FontProperties=font) 149 | axs2_ylabel_text = axs[1][0].set_ylabel(u'每周消费的冰激淋公升数',FontProperties=font) 150 | plt.setp(axs2_title_text, size=9, weight='bold', color='red') 151 | plt.setp(axs2_xlabel_text, size=7, weight='bold', color='black') 152 | plt.setp(axs2_ylabel_text, size=7, weight='bold', color='black') 153 | #设置图例 154 | didntLike = mlines.Line2D([], [], color='black', marker='.', 155 | markersize=6, label='didntLike') 156 | smallDoses = mlines.Line2D([], [], color='orange', marker='.', 157 | markersize=6, label='smallDoses') 158 | largeDoses = mlines.Line2D([], [], color='red', marker='.', 159 | markersize=6, label='largeDoses') 160 | #添加图例 161 | axs[0][0].legend(handles=[didntLike,smallDoses,largeDoses]) 162 | axs[0][1].legend(handles=[didntLike,smallDoses,largeDoses]) 163 | axs[1][0].legend(handles=[didntLike,smallDoses,largeDoses]) 164 | #显示图片 165 | plt.show() 166 | 167 | 168 | """ 169 | 函数说明:对数据进行归一化 170 | 171 | Parameters: 172 | dataSet - 特征矩阵 173 | Returns: 174 | normDataSet - 归一化后的特征矩阵 175 | ranges - 数据范围 176 | minVals - 数据最小值 177 | 178 | Modify: 179 | 2017-03-24 180 | """ 181 | def autoNorm(dataSet): 182 | #获得数据的最小值 183 | minVals = dataSet.min(0) 184 | maxVals = dataSet.max(0) 185 | #最大值和最小值的范围 186 | ranges = maxVals - minVals 187 | #shape(dataSet)返回dataSet的矩阵行列数 188 | normDataSet = np.zeros(np.shape(dataSet)) 189 | #返回dataSet的行数 190 | m = dataSet.shape[0] 191 | #原始值减去最小值 192 | normDataSet = dataSet - np.tile(minVals, (m, 1)) 193 | #除以最大和最小值的差,得到归一化数据 194 | normDataSet = normDataSet / np.tile(ranges, (m, 1)) 195 | #返回归一化数据结果,数据范围,最小值 196 | return normDataSet, ranges, minVals 197 | 198 | 199 | """ 200 | 函数说明:分类器测试函数 201 | 202 | Parameters: 203 | 无 204 | Returns: 205 | normDataSet - 归一化后的特征矩阵 206 | ranges - 数据范围 207 | minVals - 数据最小值 208 | 209 | Modify: 210 | 2017-03-24 211 | """ 212 | def datingClassTest(): 213 | #打开的文件名 214 | filename = "datingTestSet.txt" 215 | #将返回的特征矩阵和分类向量分别存储到datingDataMat和datingLabels中 216 | datingDataMat, datingLabels = file2matrix(filename) 217 | #取所有数据的百分之十 218 | hoRatio = 0.10 219 | #数据归一化,返回归一化后的矩阵,数据范围,数据最小值 220 | normMat, ranges, minVals = autoNorm(datingDataMat) 221 | #获得normMat的行数 222 | m = normMat.shape[0] 223 | #百分之十的测试数据的个数 224 | numTestVecs = int(m * hoRatio) 225 | #分类错误计数 226 | errorCount = 0.0 227 | 228 | for i in range(numTestVecs): 229 | #前numTestVecs个数据作为测试集,后m-numTestVecs个数据作为训练集 230 | classifierResult = classify0(normMat[i,:], normMat[numTestVecs:m,:], 231 | datingLabels[numTestVecs:m], 4) 232 | print("分类结果:%s\t真实类别:%d" % (classifierResult, datingLabels[i])) 233 | if classifierResult != datingLabels[i]: 234 | errorCount += 1.0 235 | print("错误率:%f%%" %(errorCount/float(numTestVecs)*100)) 236 | 237 | """ 238 | 函数说明:通过输入一个人的三维特征,进行分类输出 239 | 240 | Parameters: 241 | 无 242 | Returns: 243 | 无 244 | 245 | Modify: 246 | 2017-03-24 247 | """ 248 | def classifyPerson(): 249 | #输出结果 250 | resultList = ['讨厌','有些喜欢','非常喜欢'] 251 | #三维特征用户输入 252 | precentTats = float(input("玩视频游戏所耗时间百分比:")) 253 | ffMiles = float(input("每年获得的飞行常客里程数:")) 254 | iceCream = float(input("每周消费的冰激淋公升数:")) 255 | #打开的文件名 256 | filename = "datingTestSet.txt" 257 | #打开并处理数据 258 | datingDataMat, datingLabels = file2matrix(filename) 259 | #训练集归一化 260 | normMat, ranges, minVals = autoNorm(datingDataMat) 261 | #生成NumPy数组,测试集 262 | inArr = np.array([ffMiles, precentTats, iceCream]) 263 | #测试集归一化 264 | norminArr = (inArr - minVals) / ranges 265 | #返回分类结果 266 | classifierResult = classify0(norminArr, normMat, datingLabels, 3) 267 | #打印结果 268 | print("你可能%s这个人" % (resultList[classifierResult-1])) 269 | 270 | """ 271 | 函数说明:main函数 272 | 273 | Parameters: 274 | 无 275 | Returns: 276 | 无 277 | 278 | Modify: 279 | 2017-03-24 280 | """ 281 | if __name__ == '__main__': 282 | datingClassTest() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine_learning 2 | -------------------------------------------------------------------------------- /metrics/auc_example.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | # 作者:wanglei5205 4 | # 邮箱:wanglei5205@126.com 5 | # 博客:http://cnblogs.com/wanglei5205 6 | # github:http://github.com/wanglei5205 7 | """ 8 | 9 | # 真实值和预测值 10 | import numpy as np 11 | y_test = np.array([0,0,1,1]) 12 | y_pred1 = np.array([0.3,0.2,0.25,0.7]) 13 | y_pred2 = np.array([0,0,1,0]) 14 | 15 | ### 性能度量auc 16 | from sklearn.metrics import roc_auc_score 17 | 18 | # 预测值是概率 19 | auc_score1 = roc_auc_score(y_test,y_pred1) 20 | print(auc_score1) 21 | 22 | # 预测值是类别 23 | auc_score2 = roc_auc_score(y_test,y_pred2) 24 | print(auc_score2) --------------------------------------------------------------------------------