├── 展示图片 └── all_trees.png ├── .gitignore ├── README.md ├── example.py ├── GBDT ├── loss_function.py ├── decision_tree.py ├── gbdt.py └── tree_plot.py └── LICENSE /展示图片/all_trees.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Freemanzxp/GBDT_Simple_Tutorial/HEAD/展示图片/all_trees.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | .idea/ 4 | *.py[cod] 5 | *$py.class 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GBDT_Simple_Tutorial（梯度提升树简易教程） 2 | ## 简介 3 | 利用python实现GBDT算法的回归、二分类以及多分类，将算法流程详情进行展示解读并可视化，便于读者庖丁解牛地理解GBDT。 4 | 5 | ## 项目进度： 6 | - [x] 回归 7 | - [x] 二分类 8 | - [x] 多分类 9 | - [x] 可视化 10 | *** 11 | **算法原理以及公式推导请前往blog：**[GBDT算法原理以及实例理解](https://blog.csdn.net/zpalyq110/article/details/79527653) 12 | *** 13 | ## 依赖环境 14 | - 操作系统：Windows/Linux 15 | - 编程语言：Python3 16 | - Python库：pandas、PIL、pydotplus， 17 | 其中pydotplus库会自动调用Graphviz，所以需要去[Graphviz官网](https://graphviz.gitlab.io/_pages/Download/Download_windows.html)下载`graphviz的-2.38.msi` 18 | ，先安装，再将安装目录下的`bin`添加到系统环境变量，此时如果再报错可以重启计算机。详细过程不再描述，网上很多解答。 19 | 20 | ## 文件结构 21 | - | - GBDT 主模块文件夹 22 | - | --- gbdt.py 梯度提升算法主框架 23 | - | --- decision_tree.py 单颗树生成，包括节点划分和叶子结点生成 24 | - | --- loss_function.py 损失函数 25 | - | --- tree_plot.py 树的可视化 26 | - | - example.py 回归/二分类/多分类测试文件 27 | 28 | 29 | ## 运行指南 30 | - 回归测试： 31 | 32 | `python example.py --model = regression` 33 | - 二分类测试： 34 | 35 | `python example.py --model = binary_cf` 36 | - 多分类测试： 37 | 38 | `python example.py --model = multi_cf` 39 | - 其他可配置参数：`lr` -- 学习率, `trees` -- 构建的决策树数量即迭代次数, 40 | `depth` -- 决策树的深度, `count` -- 决策树节点分裂的最小数据数量, 41 | `is_log` -- 是否打印树的生成过程, `is_plot` -- 是否可视化树的结构. 42 | - 结果文件：运行后会生成`results`文件夹,里面包含了每棵树的内部结构和生成日志 43 | 44 | 45 | ## 结果展示 46 | 仅展示最后所有树的集合，具体每棵树的详细信息望读者自行运行代码~ 47 |

-------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import logging 4 | import argparse 5 | import pandas as pd 6 | from GBDT.gbdt import GradientBoostingRegressor 7 | from GBDT.gbdt import GradientBoostingBinaryClassifier 8 | from GBDT.gbdt import GradientBoostingMultiClassifier 9 | 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | logger = logging.getLogger() 13 | logger.removeHandler(logger.handlers[0]) 14 | pd.set_option('display.max_columns', None) 15 | pd.set_option('display.max_rows', None) 16 | ch = logging.StreamHandler() 17 | ch.setLevel(logging.DEBUG) 18 | logger.addHandler(ch) 19 | 20 | 21 | def get_data(model): 22 | dic = {} 23 | dic['regression'] = [pd.DataFrame(data=[[1, 5, 20, 1.1], 24 | [2, 7, 30, 1.3], 25 | [3, 21, 70, 1.7], 26 | [4, 30, 60, 1.8], 27 | ], columns=['id', 'age', 'weight', 'label']), 28 | pd.DataFrame(data=[[5, 25, 65]], columns=['id', 'age', 'weight'])] 29 | 30 | dic['binary_cf'] = [pd.DataFrame(data=[[1, 5, 20, 0], 31 | [2, 7, 30, 0], 32 | [3, 21, 70, 1], 33 | [4, 30, 60, 1], 34 | ], columns=['id', 'age', 'weight', 'label']), 35 | pd.DataFrame(data=[[5, 25, 65]], columns=['id', 'age', 'weight'])] 36 | 37 | dic['multi_cf'] = [pd.DataFrame(data=[[1, 5, 20, 0], 38 | [2, 7, 30, 0], 39 | [3, 21, 70, 1], 40 | [4, 30, 60, 1], 41 | [5, 30, 60, 2], 42 | [6, 30, 70, 2], 43 | ], columns=['id', 'age', 'weight', 'label']), 44 | pd.DataFrame(data=[[5, 25, 65]], columns=['id', 'age', 'weight'])] 45 | 46 | return dic[model] 47 | 48 | 49 | def run(args): 50 | model = None 51 | # 获取训练和测试数据 52 | data = get_data(args.model)[0] 53 | test_data = get_data(args.model)[1] 54 | # 创建模型结果的目录 55 | if not os.path.exists('results'): 56 | os.makedirs('results') 57 | if len(os.listdir('results')) > 0: 58 | shutil.rmtree('results') 59 | os.makedirs('results') 60 | # 初始化模型 61 | if args.model == 'regression': 62 | model = GradientBoostingRegressor(learning_rate=args.lr, n_trees=args.trees, max_depth=args.depth, 63 | min_samples_split=args.count, is_log=args.log, is_plot=args.plot) 64 | if args.model == 'binary_cf': 65 | model = GradientBoostingBinaryClassifier(learning_rate=args.lr, n_trees=args.trees, max_depth=args.depth, 66 | is_log=args.log, is_plot=args.plot) 67 | if args.model == 'multi_cf': 68 | model = GradientBoostingMultiClassifier(learning_rate=args.lr, n_trees=args.trees, max_depth=args.depth, is_log=args.log,is_plot=args.plot) 69 | # 训练模型 70 | model.fit(data) 71 | # 记录日志 72 | logger.removeHandler(logger.handlers[-1]) 73 | logger.addHandler(logging.FileHandler('results/result.log'.format(iter), mode='w', encoding='utf-8')) 74 | logger.info(data) 75 | # 模型预测 76 | model.predict(test_data) 77 | # 记录日志 78 | logger.setLevel(logging.INFO) 79 | if args.model == 'regression': 80 | logger.info((test_data['predict_value'])) 81 | if args.model == 'binary_cf': 82 | logger.info((test_data['predict_proba'])) 83 | logger.info((test_data['predict_label'])) 84 | if args.model == 'multi_cf': 85 | logger.info((test_data['predict_label'])) 86 | pass 87 | 88 | 89 | if __name__ == "__main__": 90 | parser = argparse.ArgumentParser(description='GBDT-Simple-Tutorial') 91 | parser.add_argument('--model', default='regression', help='the model you want to use', 92 | choices=['regression', 'binary_cf', 'multi_cf']) 93 | parser.add_argument('--lr', default=0.1, type=float, help='learning rate') 94 | parser.add_argument('--trees', default=5, type=int, help='the number of decision trees') 95 | parser.add_argument('--depth', default=3, type=int, help='the max depth of decision trees') 96 | # 非叶节点的最小数据数目，如果一个节点只有一个数据，那么该节点就是一个叶子节点，停止往下划分 97 | parser.add_argument('--count', default=2, type=int, help='the min data count of a node') 98 | parser.add_argument('--log', default=False, type=bool, help='whether to print the log on the console') 99 | parser.add_argument('--plot', default=True, type=bool, help='whether to plot the decision trees') 100 | args = parser.parse_args() 101 | run(args) 102 | pass 103 | -------------------------------------------------------------------------------- /GBDT/loss_function.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on ：2019/03/30 3 | @author: Freeman, foreverfc1994 4 | """ 5 | import math 6 | import abc 7 | 8 | 9 | class LossFunction(metaclass=abc.ABCMeta): 10 | 11 | @abc.abstractmethod 12 | def initialize_f_0(self, data): 13 | """初始化 F_0 """ 14 | 15 | @abc.abstractmethod 16 | def calculate_residual(self, data, iter): 17 | """计算负梯度""" 18 | 19 | @abc.abstractmethod 20 | def update_f_m(self, data, trees, iter, learning_rate, logger): 21 | """计算 F_m """ 22 | 23 | @abc.abstractmethod 24 | def update_leaf_values(self, targets, y): 25 | """更新叶子节点的预测值""" 26 | 27 | @abc.abstractmethod 28 | def get_train_loss(self, y, f, iter, logger): 29 | """计算训练损失""" 30 | 31 | 32 | class SquaresError(LossFunction): 33 | 34 | def initialize_f_0(self, data): 35 | data['f_0'] = data['label'].mean() 36 | return data['label'].mean() 37 | 38 | def calculate_residual(self, data, iter): 39 | res_name = 'res_' + str(iter) 40 | f_prev_name = 'f_' + str(iter - 1) 41 | data[res_name] = data['label'] - data[f_prev_name] 42 | 43 | def update_f_m(self, data, trees, iter, learning_rate, logger): 44 | f_prev_name = 'f_' + str(iter - 1) 45 | f_m_name = 'f_' + str(iter) 46 | data[f_m_name] = data[f_prev_name] 47 | for leaf_node in trees[iter].leaf_nodes: 48 | data.loc[leaf_node.data_index, f_m_name] += learning_rate * leaf_node.predict_value 49 | # 打印每棵树的 train loss 50 | self.get_train_loss(data['label'], data[f_m_name], iter, logger) 51 | 52 | def update_leaf_values(self, targets, y): 53 | return targets.mean() 54 | 55 | def get_train_loss(self, y, f, iter, logger): 56 | loss = ((y - f) ** 2).mean() 57 | logger.info(('第%d棵树: mse_loss:%.4f' % (iter, loss))) 58 | 59 | 60 | class BinomialDeviance(LossFunction): 61 | 62 | def initialize_f_0(self, data): 63 | pos = data['label'].sum() 64 | neg = data.shape[0] - pos 65 | # 此处log是以e为底，也就是ln 66 | f_0 = math.log(pos / neg) 67 | data['f_0'] = f_0 68 | return f_0 69 | 70 | def calculate_residual(self, data, iter): 71 | # calculate negative gradient 72 | res_name = 'res_' + str(iter) 73 | f_prev_name = 'f_' + str(iter - 1) 74 | data[res_name] = data['label'] - 1 / (1 + data[f_prev_name].apply(lambda x: math.exp(-x))) 75 | 76 | def update_f_m(self, data, trees, iter, learning_rate, logger): 77 | f_prev_name = 'f_' + str(iter - 1) 78 | f_m_name = 'f_' + str(iter) 79 | data[f_m_name] = data[f_prev_name] 80 | for leaf_node in trees[iter].leaf_nodes: 81 | data.loc[leaf_node.data_index, f_m_name] += learning_rate * leaf_node.predict_value 82 | # 打印每棵树的 train loss 83 | self.get_train_loss(data['label'], data[f_m_name], iter, logger) 84 | 85 | def update_leaf_values(self, targets, y): 86 | numerator = targets.sum() 87 | if numerator == 0: 88 | return 0.0 89 | denominator = ((y - targets) * (1 - y + targets)).sum() 90 | if abs(denominator) < 1e-150: 91 | return 0.0 92 | else: 93 | return numerator / denominator 94 | 95 | def get_train_loss(self, y, f, iter, logger): 96 | loss = -2.0 * ((y * f) - f.apply(lambda x: math.exp(1+x))).mean() 97 | logger.info(('第%d棵树: log-likelihood:%.4f' % (iter, loss))) 98 | 99 | 100 | class MultinomialDeviance: 101 | 102 | def init_classes(self, classes): 103 | self.classes = classes 104 | 105 | @abc.abstractmethod 106 | def initialize_f_0(self, data, class_name): 107 | label_name = 'label_' + class_name 108 | f_name = 'f_' + class_name + '_0' 109 | class_counts = data[label_name].sum() 110 | f_0 = class_counts / len(data) 111 | data[f_name] = f_0 112 | return f_0 113 | 114 | def calculate_residual(self, data, iter): 115 | # calculate negative gradient 116 | data['sum_exp'] = data.apply(lambda x: 117 | sum([math.exp(x['f_' + i + '_' + str(iter - 1)]) for i in self.classes]), 118 | axis=1) 119 | for class_name in self.classes: 120 | label_name = 'label_' + class_name 121 | res_name = 'res_' + class_name + '_' + str(iter) 122 | f_prev_name = 'f_' + class_name + '_' + str(iter - 1) 123 | data[res_name] = data[label_name] - math.e ** data[f_prev_name] / data['sum_exp'] 124 | 125 | def update_f_m(self, data, trees, iter, class_name, learning_rate, logger): 126 | f_prev_name = 'f_' + class_name + '_' + str(iter - 1) 127 | f_m_name = 'f_' + class_name + '_' + str(iter) 128 | data[f_m_name] = data[f_prev_name] 129 | for leaf_node in trees[iter][class_name].leaf_nodes: 130 | data.loc[leaf_node.data_index, f_m_name] += learning_rate * leaf_node.predict_value 131 | # 打印每棵树的 train loss 132 | self.get_train_loss(data['label'], data[f_m_name], iter, logger) 133 | 134 | def update_leaf_values(self, targets, y): 135 | numerator = targets.sum() 136 | if numerator == 0: 137 | return 0.0 138 | numerator *= (self.classes.size - 1) / self.classes.size 139 | denominator = ((y - targets) * (1 - y + targets)).sum() 140 | if abs(denominator) < 1e-150: 141 | return 0.0 142 | else: 143 | return numerator / denominator 144 | 145 | def get_train_loss(self, y, f, iter, logger): 146 | loss = -2.0 * ((y * f) - f.apply(lambda x: math.exp(1+x))).mean() 147 | logger.info(('第%d棵树: log-likelihood:%.4f' % (iter, loss))) 148 | -------------------------------------------------------------------------------- /GBDT/decision_tree.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on ：2019/03/30 3 | @author: Freeman, feverfc1994 4 | """ 5 | 6 | 7 | class Node: 8 | def __init__(self, data_index, logger=None, split_feature=None, split_value=None, is_leaf=False, loss=None, 9 | deep=None): 10 | self.loss = loss 11 | self.split_feature = split_feature 12 | self.split_value = split_value 13 | self.data_index = data_index 14 | self.is_leaf = is_leaf 15 | self.predict_value = None 16 | self.left_child = None 17 | self.right_child = None 18 | self.logger = logger 19 | self.deep = deep 20 | 21 | def update_predict_value(self, targets, y): 22 | self.predict_value = self.loss.update_leaf_values(targets, y) 23 | self.logger.info(('叶子节点预测值：', self.predict_value)) 24 | 25 | def get_predict_value(self, instance): 26 | if self.is_leaf: 27 | self.logger.info(('predict:', self.predict_value)) 28 | return self.predict_value 29 | if instance[self.split_feature] < self.split_value: 30 | return self.left_child.get_predict_value(instance) 31 | else: 32 | return self.right_child.get_predict_value(instance) 33 | 34 | 35 | class Tree: 36 | def __init__(self, data, max_depth, min_samples_split, features, loss, target_name, logger): 37 | self.loss = loss 38 | self.max_depth = max_depth 39 | self.min_samples_split = min_samples_split 40 | self.features = features 41 | self.logger = logger 42 | self.target_name = target_name 43 | self.remain_index = [True] * len(data) 44 | self.leaf_nodes = [] 45 | self.root_node = self.build_tree(data, self.remain_index, depth=0) 46 | 47 | def build_tree(self, data, remain_index, depth=0): 48 | """ 49 | 此处有三个树继续生长的条件： 50 | 1: 深度没有到达最大, 树的深度假如是3，意思是需要生长成3层, 那么这里的depth只能是0, 1 51 | 所以判断条件是 depth < self.max_depth - 1 52 | 2: 点样本数 >= min_samples_split 53 | 3: 此节点上的样本的 target_name 值不一样（如果值一样说明已经划分得很好了，不需要再分） 54 | """ 55 | now_data = data[remain_index] 56 | 57 | if depth < self.max_depth - 1 \ 58 | and len(now_data) >= self.min_samples_split \ 59 | and len(now_data[self.target_name].unique()) > 1: 60 | se = None 61 | split_feature = None 62 | split_value = None 63 | left_index_of_now_data = None 64 | right_index_of_now_data = None 65 | self.logger.info(('--树的深度：%d' % depth)) 66 | for feature in self.features: 67 | self.logger.info(('----划分特征：', feature)) 68 | feature_values = now_data[feature].unique() 69 | for fea_val in feature_values: 70 | # 尝试划分 71 | left_index = list(now_data[feature] < fea_val) 72 | right_index = list(now_data[feature] >= fea_val) 73 | left_se = calculate_se(now_data[left_index][self.target_name]) 74 | right_se = calculate_se(now_data[right_index][self.target_name]) 75 | sum_se = left_se + right_se 76 | self.logger.info(('------划分值:%.3f,左节点损失:%.3f,右节点损失:%.3f,总损失:%.3f' % 77 | (fea_val, left_se, right_se, sum_se))) 78 | if se is None or sum_se < se: 79 | split_feature = feature 80 | split_value = fea_val 81 | se = sum_se 82 | left_index_of_now_data = left_index 83 | right_index_of_now_data = right_index 84 | self.logger.info(('--最佳划分特征：', split_feature)) 85 | self.logger.info(('--最佳划分值：', split_value)) 86 | 87 | node = Node(remain_index, self.logger, split_feature, split_value, deep=depth) 88 | """ 89 | trick for DataFrame, index revert 90 | 下面这部分代码是为了记录划分后样本在原始数据中的的索引 91 | DataFrame的数据索引可以使用True和False 92 | 所以下面得到的是一个bool类型元素组成的数组 93 | 利用这个数组进行索引获得划分后的数据 94 | """ 95 | left_index_of_all_data = [] 96 | for i in remain_index: 97 | if i: 98 | if left_index_of_now_data[0]: 99 | left_index_of_all_data.append(True) 100 | del left_index_of_now_data[0] 101 | else: 102 | left_index_of_all_data.append(False) 103 | del left_index_of_now_data[0] 104 | else: 105 | left_index_of_all_data.append(False) 106 | 107 | right_index_of_all_data = [] 108 | for i in remain_index: 109 | if i: 110 | if right_index_of_now_data[0]: 111 | right_index_of_all_data.append(True) 112 | del right_index_of_now_data[0] 113 | else: 114 | right_index_of_all_data.append(False) 115 | del right_index_of_now_data[0] 116 | else: 117 | right_index_of_all_data.append(False) 118 | 119 | node.left_child = self.build_tree(data, left_index_of_all_data, depth + 1) 120 | node.right_child = self.build_tree(data, right_index_of_all_data, depth + 1) 121 | return node 122 | else: 123 | node = Node(remain_index, self.logger, is_leaf=True, loss=self.loss, deep=depth) 124 | if len(self.target_name.split('_')) == 3: 125 | label_name = 'label_' + self.target_name.split('_')[1] 126 | else: 127 | label_name = 'label' 128 | node.update_predict_value(now_data[self.target_name], now_data[label_name]) 129 | self.leaf_nodes.append(node) 130 | return node 131 | 132 | 133 | def calculate_se(label): 134 | mean = label.mean() 135 | se = 0 136 | for y in label: 137 | se += (y - mean) * (y - mean) 138 | return se 139 | -------------------------------------------------------------------------------- /GBDT/gbdt.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on ：2019/03/28 3 | @author: Freeman, feverfc1994 4 | """ 5 | 6 | import abc 7 | import math 8 | import logging 9 | import pandas as pd 10 | from GBDT.decision_tree import Tree 11 | from GBDT.loss_function import SquaresError, BinomialDeviance, MultinomialDeviance 12 | from GBDT.tree_plot import plot_tree, plot_all_trees,plot_multi 13 | logging.basicConfig(level=logging.INFO) 14 | logger = logging.getLogger() 15 | pd.set_option('display.max_columns', None) 16 | pd.set_option('display.max_rows', None) 17 | 18 | 19 | class AbstractBaseGradientBoosting(metaclass=abc.ABCMeta): 20 | def __init__(self): 21 | pass 22 | 23 | def fit(self, data): 24 | pass 25 | 26 | def predict(self, data): 27 | pass 28 | 29 | 30 | class BaseGradientBoosting(AbstractBaseGradientBoosting): 31 | 32 | def __init__(self, loss, learning_rate, n_trees, max_depth, 33 | min_samples_split=2, is_log=False, is_plot=False): 34 | super().__init__() 35 | self.loss = loss 36 | self.learning_rate = learning_rate 37 | self.n_trees = n_trees 38 | self.max_depth = max_depth 39 | self.min_samples_split = min_samples_split 40 | self.features = None 41 | self.trees = {} 42 | self.f_0 = {} 43 | self.is_log = is_log 44 | self.is_plot = is_plot 45 | 46 | def fit(self, data): 47 | """ 48 | :param data: pandas.DataFrame, the features data of train training 49 | """ 50 | # 掐头去尾，删除id和label，得到特征名称 51 | self.features = list(data.columns)[1: -1] 52 | # 初始化 f_0(x) 53 | # 对于平方损失来说，初始化 f_0(x) 就是 y 的均值 54 | self.f_0 = self.loss.initialize_f_0(data) 55 | # 对 m = 1, 2, ..., M 56 | logger.handlers[0].setLevel(logging.INFO if self.is_log else logging.CRITICAL) 57 | for iter in range(1, self.n_trees+1): 58 | if len(logger.handlers) > 1: 59 | logger.removeHandler(logger.handlers[-1]) 60 | fh = logging.FileHandler('results/NO.{}_tree.log'.format(iter), mode='w', encoding='utf-8') 61 | fh.setLevel(logging.DEBUG) 62 | logger.addHandler(fh) 63 | # 计算负梯度--对于平方误差来说就是残差 64 | logger.info(('-----------------------------构建第%d颗树-----------------------------' % iter)) 65 | self.loss.calculate_residual(data, iter) 66 | target_name = 'res_' + str(iter) 67 | self.trees[iter] = Tree(data, self.max_depth, self.min_samples_split, 68 | self.features, self.loss, target_name, logger) 69 | self.loss.update_f_m(data, self.trees, iter, self.learning_rate, logger) 70 | if self.is_plot: 71 | plot_tree(self.trees[iter], max_depth=self.max_depth, iter=iter) 72 | # print(self.trees) 73 | if self.is_plot: 74 | plot_all_trees(self.n_trees) 75 | 76 | 77 | class GradientBoostingRegressor(BaseGradientBoosting): 78 | def __init__(self, learning_rate, n_trees, max_depth, 79 | min_samples_split=2, is_log=False, is_plot=False): 80 | super().__init__(SquaresError(), learning_rate, n_trees, max_depth, 81 | min_samples_split, is_log, is_plot) 82 | 83 | def predict(self, data): 84 | data['f_0'] = self.f_0 85 | for iter in range(1, self.n_trees+1): 86 | f_prev_name = 'f_' + str(iter - 1) 87 | f_m_name = 'f_' + str(iter) 88 | data[f_m_name] = data[f_prev_name] + \ 89 | self.learning_rate * \ 90 | data.apply(lambda x: self.trees[iter].root_node.get_predict_value(x), axis=1) 91 | data['predict_value'] = data[f_m_name] 92 | 93 | 94 | class GradientBoostingBinaryClassifier(BaseGradientBoosting): 95 | def __init__(self, learning_rate, n_trees, max_depth, 96 | min_samples_split=2, is_log=False, is_plot=False): 97 | super().__init__(BinomialDeviance(), learning_rate, n_trees, max_depth, 98 | min_samples_split, is_log, is_plot) 99 | 100 | def predict(self, data): 101 | data['f_0'] = self.f_0 102 | for iter in range(1, self.n_trees + 1): 103 | f_prev_name = 'f_' + str(iter - 1) 104 | f_m_name = 'f_' + str(iter) 105 | data[f_m_name] = data[f_prev_name] + \ 106 | self.learning_rate * \ 107 | data.apply(lambda x: self.trees[iter].root_node.get_predict_value(x), axis=1) 108 | data['predict_proba'] = data[f_m_name].apply(lambda x: 1 / (1 + math.exp(-x))) 109 | data['predict_label'] = data['predict_proba'].apply(lambda x: 1 if x >= 0.5 else 0) 110 | 111 | 112 | class GradientBoostingMultiClassifier(BaseGradientBoosting): 113 | def __init__(self, learning_rate, n_trees, max_depth, 114 | min_samples_split=2, is_log=False, is_plot=False): 115 | super().__init__(MultinomialDeviance(), learning_rate, n_trees, max_depth, 116 | min_samples_split, is_log, is_plot) 117 | 118 | def fit(self, data): 119 | # 掐头去尾，删除id和label，得到特征名称 120 | self.features = list(data.columns)[1: -1] 121 | # 获取所有类别 122 | self.classes = data['label'].unique().astype(str) 123 | # 初始化多分类损失函数的参数 K 124 | self.loss.init_classes(self.classes) 125 | # 根据类别将‘label’列进行one-hot处理 126 | for class_name in self.classes: 127 | label_name = 'label_' + class_name 128 | data[label_name] = data['label'].apply(lambda x: 1 if str(x) == class_name else 0) 129 | # 初始化 f_0(x) 130 | self.f_0[class_name] = self.loss.initialize_f_0(data, class_name) 131 | # print(data) 132 | # 对 m = 1, 2, ..., M 133 | logger.handlers[0].setLevel(logging.INFO if self.is_log else logging.CRITICAL) 134 | for iter in range(1, self.n_trees + 1): 135 | if len(logger.handlers) > 1: 136 | logger.removeHandler(logger.handlers[-1]) 137 | fh = logging.FileHandler('results/NO.{}_tree.log'.format(iter), mode='w', encoding='utf-8') 138 | fh.setLevel(logging.DEBUG) 139 | logger.addHandler(fh) 140 | logger.info(('-----------------------------构建第%d颗树-----------------------------' % iter)) 141 | # 这里计算负梯度整体计算是为了计算p_sum的一致性 142 | self.loss.calculate_residual(data, iter) 143 | self.trees[iter] = {} 144 | for class_name in self.classes: 145 | target_name = 'res_' + class_name + '_' + str(iter) 146 | self.trees[iter][class_name] = Tree(data, self.max_depth, self.min_samples_split, 147 | self.features, self.loss, target_name, logger) 148 | self.loss.update_f_m(data, self.trees, iter, class_name, self.learning_rate, logger) 149 | if self.is_plot: 150 | plot_multi(self.trees[iter], max_depth=self.max_depth, iter=iter) 151 | if self.is_plot: 152 | plot_all_trees(self.n_trees) 153 | 154 | def predict(self, data): 155 | """ 156 | 此处的预测的实现方式和生成树的方式不同， 157 | 生成树是需要每个类别的树的每次迭代需要一起进行，外层循环是iter，内层循环是class 158 | 但是，预测时树已经生成，可以将class这层循环作为外循环，可以节省计算成本。 159 | """ 160 | for class_name in self.classes: 161 | f_0_name = 'f_' + class_name + '_0' 162 | data[f_0_name] = self.f_0[class_name] 163 | for iter in range(1, self.n_trees + 1): 164 | f_prev_name = 'f_' + class_name + '_' + str(iter - 1) 165 | f_m_name = 'f_' + class_name + '_' + str(iter) 166 | data[f_m_name] = \ 167 | data[f_prev_name] + \ 168 | self.learning_rate * data.apply(lambda x: 169 | self.trees[iter][class_name].root_node.get_predict_value(x), axis=1) 170 | 171 | data['sum_exp'] = data.apply(lambda x: 172 | sum([math.exp(x['f_' + i + '_' + str(iter)]) for i in self.classes]), axis=1) 173 | 174 | for class_name in self.classes: 175 | proba_name = 'predict_proba_' + class_name 176 | f_m_name = 'f_' + class_name + '_' + str(iter) 177 | data[proba_name] = data.apply(lambda x: math.exp(x[f_m_name]) / x['sum_exp'], axis=1) 178 | # TODO: log 每一类的概率 179 | data['predict_label'] = data.apply(lambda x: self._get_multi_label(x), axis=1) 180 | 181 | def _get_multi_label(self, x): 182 | label = None 183 | max_proba = -1 184 | for class_name in self.classes: 185 | if x['predict_proba_' + class_name] > max_proba: 186 | max_proba = x['predict_proba_' + class_name] 187 | label = class_name 188 | return label 189 | -------------------------------------------------------------------------------- /GBDT/tree_plot.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on ：2019/04/7 3 | @author: Freeman, feverfc1994 4 | """ 5 | 6 | from PIL import Image 7 | import pydotplus as pdp 8 | from GBDT.decision_tree import Node, Tree 9 | import os 10 | import matplotlib.pyplot as plt 11 | 12 | 13 | def plot_multi(trees: dict, max_depth: int, iter: int): 14 | trees_traversal = {} 15 | trees_nodes = {} 16 | for class_index in trees.keys(): 17 | tree = trees[class_index] 18 | res = [] 19 | root = tree.root_node 20 | traversal(root,res) 21 | trees_traversal[class_index] = res 22 | # 获取所有节点 23 | nodes = {} 24 | index = 0 25 | for i in res: 26 | p, c = i[0], i[1] 27 | if p not in nodes.values(): 28 | nodes[index] = p 29 | index = index + 1 30 | if c not in nodes.values(): 31 | nodes[index] = c 32 | index = index + 1 33 | trees_nodes[class_index] = nodes 34 | # 通过dot语法将决策树展示出来 35 | trees_edges = {} 36 | trees_node = {} 37 | for class_index in trees.keys(): 38 | trees_node[class_index] = '' 39 | trees_edges[class_index] = '' 40 | for depth in range(max_depth): 41 | for class_index in trees.keys(): 42 | for nodepair in trees_traversal[class_index]: 43 | if nodepair[0].deep == depth: 44 | p, c = nodepair[0], nodepair[1] 45 | l = len([i for i in range(len(c.data_index)) if c.data_index[i] is True]) 46 | pname = str(list(trees_nodes[class_index].keys())[list(trees_nodes[class_index].values()).index(p)]) 47 | cname = str(list(trees_nodes[class_index].keys())[list(trees_nodes[class_index].values()).index(c)]) 48 | if l > 0: 49 | trees_edges[class_index] = trees_edges[class_index] + pname + '->' + cname + '[label=\"' + str(p.split_feature) + ( 50 | '<' if p.left_child == c else '>=') + str(p.split_value) + '\"]' + ';\n' 51 | 52 | trees_node[class_index] = trees_node[class_index] + pname + '[width=1,height=0.5,color=lemonchiffon,style=filled,shape=ellipse,label=\"id:' + str( 53 | [i for i in range(len(p.data_index)) if p.data_index[i] is True]) + '\"];\n' + \ 54 | ( 55 | cname + '[width=1,height=0.5,color=lemonchiffon,style=filled,shape=ellipse,label=\"id:' + str( 56 | [i for i in range(len(c.data_index)) if 57 | c.data_index[i] is True]) + '\"];\n' if l > 0 else '') 58 | if c.is_leaf and l > 0: 59 | trees_edges[class_index] = trees_edges[class_index] + cname + '->' + cname + 'p[style=dotted];\n' 60 | trees_node[class_index] = trees_node[class_index] + cname + 'p[width=1,height=0.5,color=lightskyblue,style=filled,shape=box,label=\"' + str( 61 | "{:.4f}".format(c.predict_value)) + '\"];\n' 62 | else: 63 | continue 64 | dot = '''digraph g {\n''' + trees_edges[class_index] + trees_node[class_index] + '''}''' 65 | graph = pdp.graph_from_dot_data(dot) 66 | # 保存图片+pyplot展示 67 | graph.write_png('results/NO.{}_{}_tree.png'.format(iter, class_index)) 68 | plt.ion() 69 | plt.figure(1, figsize=(30, 20)) 70 | plt.axis('off') 71 | plt.title('NO.{} iter '.format(iter)) 72 | class_num = len(trees.keys()) 73 | if class_num / 3 - int(class_num / 3) <0.000001: 74 | rows = int(class_num/3) 75 | else: 76 | rows = int(class_num/3)+1 77 | for class_index in trees.keys(): 78 | index = list(trees.keys()).index(class_index) 79 | plt.subplot(rows, 3, index+1) 80 | img = Image.open('results/NO.{}_{}_tree.png'.format(iter, class_index)) 81 | img = img.resize((1024, 700), Image.ANTIALIAS) 82 | plt.axis('off') 83 | plt.title('NO.{}_class {}'.format(iter, class_index)) 84 | plt.rcParams['figure.figsize'] = (30.0, 20.0) 85 | plt.imshow(img) 86 | plt.savefig('results/NO.{}_tree.png'.format(iter)) 87 | plt.pause(0.01) 88 | 89 | 90 | def plot_tree(tree: Tree, max_depth: int, iter: int): 91 | """ 92 | 展示单棵决策树 93 | :param tree: 生成的决策树 94 | :param max_depth: 决策树的最大深度 95 | :param iter: 第几棵决策树 96 | :return: 97 | """ 98 | root = tree.root_node 99 | res = [] 100 | # 通过遍历获取决策树的父子节点关系，可选有traversal 层次遍历和traversal_preorder 先序遍历 101 | traversal(root, res) 102 | 103 | # 获取所有节点 104 | nodes = {} 105 | index = 0 106 | for i in res: 107 | p, c = i[0], i[1] 108 | if p not in nodes.values(): 109 | nodes[index] = p 110 | index = index + 1 111 | if c not in nodes.values(): 112 | nodes[index] = c 113 | index = index + 1 114 | 115 | # 通过dot语法将决策树展示出来 116 | edges = '' 117 | node = '' 118 | # 将节点层次展示 119 | for depth in range(max_depth): 120 | for nodepair in res: 121 | if nodepair[0].deep == depth: 122 | # p,c分别为节点对中的父节点和子节点 123 | p, c = nodepair[0], nodepair[1] 124 | l = len([i for i in range(len(c.data_index)) if c.data_index[i] is True]) 125 | pname = str(list(nodes.keys())[list(nodes.values()).index(p)]) 126 | cname = str(list(nodes.keys())[list(nodes.values()).index(c)]) 127 | if l > 0: 128 | edges = edges + pname + '->' + cname + '[label=\"' + str(p.split_feature) + ( 129 | '<' if p.left_child == c else '>=') + str(p.split_value) + '\"]' + ';\n' 130 | 131 | node = node + pname + '[width=1,height=0.5,color=lemonchiffon,style=filled,shape=ellipse,label=\"id:' + str( 132 | [i for i in range(len(p.data_index)) if p.data_index[i] is True]) + '\"];\n' + \ 133 | (cname + '[width=1,height=0.5,color=lemonchiffon,style=filled,shape=ellipse,label=\"id:' + str( 134 | [i for i in range(len(c.data_index)) if c.data_index[i] is True]) + '\"];\n' if l > 0 else '') 135 | if c.is_leaf and l > 0: 136 | edges = edges + cname + '->' + cname + 'p[style=dotted];\n' 137 | node = node + cname + 'p[width=1,height=0.5,color=lightskyblue,style=filled,shape=box,label=\"' + str( 138 | "{:.4f}".format(c.predict_value)) + '\"];\n' 139 | else: 140 | continue 141 | dot = '''digraph g {\n''' + edges + node + '''}''' 142 | graph = pdp.graph_from_dot_data(dot) 143 | # 保存图片+pyplot展示 144 | graph.write_png('results/NO.{}_tree.png'.format(iter)) 145 | img = Image.open('results/NO.{}_tree.png'.format(iter)) 146 | img = img.resize((1024, 700), Image.ANTIALIAS) 147 | plt.ion() 148 | plt.figure(1, figsize=(30, 20)) 149 | plt.axis('off') 150 | plt.title('NO.{} tree'.format(iter)) 151 | plt.rcParams['figure.figsize'] = (30.0, 20.0) 152 | plt.imshow(img) 153 | plt.pause(0.01) 154 | 155 | 156 | def plot_all_trees(numberOfTrees: int): 157 | ''' 158 | 将所有生成的决策树集中到一张图中展示 159 | :param numberOfTrees: 决策树的数量 160 | :return: 161 | ''' 162 | # 每行展示3棵决策树根据决策树数量决定行数 163 | if numberOfTrees / 3 - int(numberOfTrees / 3) > 0.000001: 164 | rows = int(numberOfTrees / 3)+1 165 | else: 166 | rows = int(numberOfTrees / 3) 167 | # 利用subplot 将所有决策树在一个figure中展示 168 | plt.figure(1, figsize=(30,20)) 169 | plt.axis('off') 170 | try: 171 | for index in range(1, numberOfTrees + 1): 172 | path = os.path.join('results', 'NO.{}_tree.png'.format(index)) 173 | plt.subplot(rows, 3, index) 174 | img = Image.open(path) 175 | img = img.resize((1000, 800), Image.ANTIALIAS) 176 | plt.axis('off') 177 | plt.title('NO.{} tree'.format(index)) 178 | plt.imshow(img) 179 | plt.savefig('results/all_trees.png', dpi=300) 180 | plt.show() 181 | # 由于pyplot图片像素不是很高，使用方法生成高质量的图片 182 | image_compose(numberOfTrees) 183 | except Exception as e: 184 | raise e 185 | 186 | 187 | def image_compose(numberOfTrees: int): 188 | ''' 189 | 将numberOfTrees棵决策树的图片拼接到一张图片上 190 | :param numberOfTrees: 决策树的数量 191 | :return: 192 | ''' 193 | 194 | png_to_compose = [] 195 | # 获取每张图片的size 196 | for index in range(1,numberOfTrees+1): 197 | png_to_compose.append('NO.{}_tree.png'.format(index)) 198 | try: 199 | path = os.path.join('results', png_to_compose[0]) 200 | shape = Image.open(path).size 201 | except Exception as e: 202 | raise e 203 | IMAGE_WIDTH = shape[0] 204 | IMAGE_HEIGET = shape[1] 205 | IMAGE_COLUMN = 3 206 | 207 | if len(png_to_compose)/IMAGE_COLUMN - int(len(png_to_compose)/IMAGE_COLUMN) > 0.0000001: 208 | IMAGE_ROW = int(len(png_to_compose)/IMAGE_COLUMN)+1 209 | else: 210 | IMAGE_ROW = int(len(png_to_compose) / IMAGE_COLUMN) 211 | # 新建一张用于拼接的图片 212 | to_image = Image.new('RGB', (IMAGE_COLUMN*IMAGE_WIDTH, IMAGE_ROW*IMAGE_HEIGET), '#FFFFFF') 213 | # 拼接图片 214 | for y in range(IMAGE_ROW): 215 | for x in range(IMAGE_COLUMN): 216 | if y*IMAGE_COLUMN+x+1 > len(png_to_compose): 217 | break 218 | path = os.path.join('results', 'NO.'+str(y*IMAGE_COLUMN+x+1)+'_tree.png') 219 | from_image = Image.open(path) 220 | to_image.paste(from_image, (x*IMAGE_WIDTH, y*IMAGE_HEIGET)) 221 | 222 | to_image.save('results/all_trees_high_quality.png') 223 | 224 | 225 | def traversal_preorder(root: Node, res: list): 226 | ''' 227 | 228 | 先序遍历决策树获取节点间的父子关系 229 | :param root: 决策树的根节点 230 | :param res: 存储节点对(父节点,子节点)的list 231 | :return: res 232 | ''' 233 | if root is None: 234 | return 235 | if root.left_child is not None: 236 | res.append([root, root.left_child]) 237 | traversal_preorder(root.left_child, res) 238 | if root.right_child is not None: 239 | res.append([root, root.right_child]) 240 | traversal_preorder(root.right_child, res) 241 | 242 | 243 | def traversal(root: Node, res: list): 244 | ''' 245 | 246 | 层次遍历决策树获取节点间的父子关系 247 | :param root: 决策树的根节点 248 | :param res: 存储节点对(父节点,子节点)的list 249 | :return: res 250 | ''' 251 | outList = [] 252 | queue = [root] 253 | while queue != [] and root: 254 | outList.append(queue[0].data_index) 255 | if queue[0].left_child != None: 256 | queue.append(queue[0].left_child) 257 | res.append([queue[0], queue[0].left_child]) 258 | if queue[0].right_child != None: 259 | queue.append(queue[0].right_child) 260 | res.append([queue[0], queue[0].right_child]) 261 | queue.pop(0) 262 | 263 | 264 | if __name__ =="__main__": 265 | plot_all_trees(10) 266 | # image_compose(10) 267 | 268 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [2019] [Freemanzxp] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------